Skip to content

Commit ddd2fa1

Browse files
committed
Main: Commit 2
1 parent 6198f07 commit ddd2fa1

File tree

6 files changed

+171
-146
lines changed

6 files changed

+171
-146
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Git: https://github.com/raboonik
1818

1919
    ./install.sh path/to/directory
2020

21-
or to install under the default directory simply execute
21+
    or to install under the default directory simply execute
2222

2323
    ./install.sh
2424

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,35 @@
1-
import AutoParallelizePy
1+
"""
2+
Summary: A simple single-core example program
3+
emulating a hypothetical MPI environment consisting
4+
of 32 processes to test and demonstrate the
5+
AutoParallelizePy class:
6+
class domainDecomposeND
7+
8+
Aims: Create a simple 4D array of arbitrary
9+
shape and auto-domain decompose using the
10+
built-in domainDecomposeND class and recover
11+
the original array after decomposition.
12+
"""
13+
214
import numpy as np
15+
import AutoParallelizePy as APP
16+
17+
# Even though this example isn't MPI-parallelized, we may emulate a
18+
# hypothetical MPI environment assuming there are a total of 32 procs
19+
size = 32
320

4-
size = 48
5-
axes_limits = [47,37,1,98] # aka how many data points are in each dimension
21+
# Create a simple 4D array of random numbers
22+
arrShape = [47,37,19,98]
23+
origArr = np.random.uniform(low=-20, high=20, size=(arrShape))
24+
25+
# Configure the domain decomposition scheme such that only the first,
26+
# second, and fourth dimensions of this array are parallelized
627
parallel_axes = [0,1,3]
7-
user_nblocks = [3,4,1,4]
8-
user_nblocks = None
9-
domDecompND = AutoParallelizePy.domainDecomposeND(size,axes_limits,parallel_axes,user_nblocks,False,True,False)
10-
a = np.arange(np.prod(axes_limits)).reshape(axes_limits)
11-
a1 = np.zeros(axes_limits)
28+
domDecompND = APP.domainDecomposeND(size,arrShape,parallel_axes)
29+
30+
# Now let's have our hypothetical procs (ranks) each take a slice of
31+
# the data and dump it in a new array to retrieve the original data
32+
testArr = np.zeros(arrShape)
1233
for rank in range(size):
1334
slq0 = domDecompND.slq[0][rank]
1435
elq0 = domDecompND.elq[0][rank]
@@ -18,7 +39,58 @@
1839
elq2 = domDecompND.elq[2][rank]
1940
slq3 = domDecompND.slq[3][rank]
2041
elq3 = domDecompND.elq[3][rank]
21-
a1[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3] = a[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3]
42+
mySubArr = origArr[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3]
43+
# Dump the subarray into the test array in the correct block
44+
testArr[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3] = mySubArr
2245

46+
# If the domain decomposition scheme is correctly done, testArr
47+
# recovers the original array. Let's check that
48+
print("*********************************")
49+
if np.all(origArr == testArr):
50+
print("The original data was successfully recovered!")
51+
else:
52+
print("Failed!")
53+
print("*********************************")
54+
print("")
2355

24-
print(np.all(a == a1))
56+
# Now let's print the different attributes of the object domDecompND
57+
print("Assuming the data is N-dimensional and MPI-parallelized across a total number of np cores:")
58+
print("-------------------------------------------------------------------")
59+
print('\033[1m'+"domDecompND.nblock"+'\033[0m'+" gives an N-D array of the number of MPI-blocks in each dimension.")
60+
print("The product of all the blocks must recover the total number of cores in an MPI scheme.")
61+
print("In this example we have domDecompND.nblock = ", domDecompND.nblock)
62+
print("-------------------------------------------------------------------")
63+
print('\033[1m'+"domDecompND.coordinates"+'\033[0m'+" gives an Nxnp array of all the Cartesian coordinates of the MPI-blocks.")
64+
print("In this example we have domDecompND.coordinates = ", domDecompND.coordinates)
65+
print("and hence the coordinates of the MPI-block handled by 27th (rank = 27) processor is:",domDecompND.coordinates[:,27])
66+
print("-------------------------------------------------------------------")
67+
print('\033[1m'+"domDecompND.slq"+'\033[0m'+" gives an Nxnp array of all the datapoint indices that mark the begining of each MPI-block dimension.")
68+
print("In this example we have domDecompND.slq = ", domDecompND.slq)
69+
print("and hence the MPI-block belonging to the 27th processor starts at:",domDecompND.slq[:,27])
70+
print("-------------------------------------------------------------------")
71+
print('\033[1m'+"domDecompND.elq"+'\033[0m'+" gives an Nxnp array of all the datapoint indices that mark the end of each MPI-block dimension.")
72+
print("In this example we have domDecompND.elq = ", domDecompND.elq)
73+
print("and hence the MPI-block belonging to the 27th processor ends at:",domDecompND.elq[:,27])
74+
print("-------------------------------------------------------------------")
75+
print('\033[1m'+"domDecompND.mynq"+'\033[0m'+" gives an Nxnp array of the total number of gridpoints of each MPI-block dimension.")
76+
print("Note that we have domDecompND.mynq = domDecompND.elq - domDecompND.slq")
77+
print("In this example we have domDecompND.mynq = ", domDecompND.mynq)
78+
print("and hence the MPI-block belonging to the 27th processor has the following number of gridpoints in each dimension:",domDecompND.mynq[:,27])
79+
print("-------------------------------------------------------------------")
80+
print('\033[1m'+"domDecompND.split_sizes"+'\033[0m'+" gives an array of length np of the total number of gridpoints handled by each processor.")
81+
print("Note that we have domDecompND.split_sizes == np.prod(domDecompND.mynq, axis=0)")
82+
print("In this example we have domDecompND.split_sizes = ", domDecompND.split_sizes)
83+
print("and hence the 27th processor has the following total number of gridpoints:",domDecompND.split_sizes[27])
84+
print("-------------------------------------------------------------------")
85+
print('\033[1m'+"domDecompND.arrShape"+'\033[0m'+" gives the shape of the array to be MPI-parallelized.")
86+
print("In this example we have domDecompND.arrShape = ", domDecompND.arrShape)
87+
print("-------------------------------------------------------------------")
88+
print('\033[1m'+"domDecompND.parallel_axes"+'\033[0m'+" gives the user's input list of axes/dimensions to be parallelized.")
89+
print("In this example we have domDecompND.parallel_axes = ", domDecompND.parallel_axes)
90+
print("-------------------------------------------------------------------")
91+
print('\033[1m'+"domDecompND.n_dim"+'\033[0m'+" gives the number of dimensions of the raw array.")
92+
print("In this example we have domDecompND.n_dim = ", domDecompND.n_dim)
93+
print("-------------------------------------------------------------------")
94+
print('\033[1m'+"domDecompND.n_par_dim"+'\033[0m'+" gives the number of parallelized dimensions.")
95+
print("In this example we have domDecompND.n_par_dim = ", domDecompND.n_par_dim)
96+
print("-------------------------------------------------------------------")
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
gather_vector_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)
1+
gather_array_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)

examples/example_hybrid1.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
1+
"""
2+
Summary: A hybrid example program to test and demonstrate
3+
the use of the following AutoParallelizePy methods:
4+
class domainDecomposeND
5+
function get_subarray
6+
function scatter_vector_ND
7+
function gather_vector_ND
8+
9+
Aims: Create a 4D array of random real numbers, domain
10+
decompose and parallelize it, and then de-parallelize
11+
to recover the same array.
12+
"""
13+
114
import numpy as np
215
from mpi4py import *
3-
import AutoParallelizePy
16+
import AutoParallelizePy as APP
417

18+
# Initialize the MPI environment
519
#◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈
620
# Init Parallel #◈
721
comm = MPI.COMM_WORLD #◈
@@ -10,41 +24,51 @@
1024
mainrank = 0 #◈
1125
#◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈
1226

13-
# Create a 4D array of random floats
14-
axes_limits = [27,56,34,86]
15-
parallel_axes = [0,1,3]
16-
new_parallel_axes = [1,2]
27+
# Using create_randoms_acorss_cores, create a 4D array of random
28+
# floats to be parallelized in the first, second, and fourth
29+
# dimensions and broadcast to all procs
30+
dataShape = [27,56,34,86]
31+
parallel_axes = [0,1,3]
32+
origArr = APP.create_randoms_acorss_cores(comm, rank, mainrank, dataShape)
33+
34+
# Using domainDecomposeND Configure the automatic domain decomposition
35+
# scheme based on the prescribed data shape and parallelized axes
36+
domDecompND = APP.domainDecomposeND(size,dataShape,parallel_axes)
1737

18-
testArr = AutoParallelizePy.create_randoms_acorss_cores(comm, rank, mainrank, axes_limits)
38+
# Use get_subarray to have each proc take a slice of the original array
39+
myarrV1 = APP.get_subarray(rank,domDecompND,origArr)
1940

41+
# Now let's slice the original array this time using scatter_vector_ND
42+
# To do so, frist store the original array on the main rank
2043
if rank == mainrank:
21-
lowHigh=[-13.54,13.3]
22-
mainArr = testArr.copy()
44+
copyArr = origArr.copy()
2345
else:
24-
mainArr = None
25-
26-
# Domain decompose
27-
domDecompND = AutoParallelizePy.domainDecomposeND(size,axes_limits,parallel_axes)
46+
copyArr = None
2847

29-
myarr0 = AutoParallelizePy.get_subarray(rank,domDecompND,testArr)
30-
myarr = AutoParallelizePy.scatter_vector_ND(comm,rank,mainrank,domDecompND,mainArr,dtype='float')
48+
# Use scatter_vector_ND to scatter chunks/slices of the original array
49+
# across all procs as prescribed in domDecompND
50+
myarrV2 = APP.scatter_vector_ND(comm,rank,mainrank,domDecompND,copyArr,dtype='float')
3151

32-
if np.all(myarr0 == myarr):
33-
print("rank = {} -- Success!".format(rank))
52+
# Check that the two versions do indeed are the same
53+
if np.all(myarrV1 == myarrV2):
54+
print("rank = {} -- Success! The two versions of local sub-arrays sliced using get_subarray and scatter_vector_ND yielded the same results!".format(rank))
3455
else:
3556
print("rank = {} -- Failed!".format(rank))
3657

37-
gathered_new_myTestArr = AutoParallelizePy.gather_vector_ND(comm, rank, mainrank, domDecompND, myarr0, 'float')
38-
gathered_new_myTestArr1 = AutoParallelizePy.gather_vector_ND(comm, rank, mainrank, domDecompND, myarr, 'float')
58+
# Use gather_vector_ND to gather all the local data chunks of both versions on the main rank
59+
# and retrieve the original data
60+
gathered_myArrV1 = APP.gather_vector_ND(comm, rank, mainrank, domDecompND, myarrV1, 'float')
61+
gathered_myArrV2 = APP.gather_vector_ND(comm, rank, mainrank, domDecompND, myarrV2, 'float')
3962

63+
# Check if the gathered data recovers the original array
4064
if rank == mainrank:
4165
print("")
42-
if np.all(gathered_new_myTestArr == testArr):
43-
print("Success! gathered_new_myTestArr = testArr")
66+
if np.all(gathered_myArrV1 == origArr):
67+
print("Success! gathered_myArrV1 = origArr")
4468
else:
45-
print("Failed! gathered_new_myTestArr != testArr")
69+
print("Failed! gathered_myArrV1 != origArr")
4670
print("")
47-
if np.all(gathered_new_myTestArr1 == testArr):
48-
print("Success! gathered_new_myTestArr1 = testArr")
71+
if np.all(gathered_myArrV2 == origArr):
72+
print("Success! gathered_myArrV2 = origArr")
4973
else:
50-
print("Failed! gathered_new_myTestArr1 != testArr")
74+
print("Failed! gathered_myArrV2 != origArr")

libs/domainDecomposeND.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111

1212
class domainDecomposeND:
13-
def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None, suggest_alternative=False, set_last_to1=False, decompose=True):
13+
def __init__(self, n_processors, arrShape, parallel_axes, user_nblocks=None, suggest_alternative=False, set_last_to1=False, decompose=True):
1414
"""
1515
Domain decomposition class designed to optimally break
16-
down an array of shape axes_limits into n_processors
16+
down an array of shape arrShape into n_processors
1717
blocks for easy implementation in parallelization schemes.
1818
1919
Required in:
@@ -25,7 +25,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
2525
Inputs:
2626
Mandatory:
2727
n_processors: Total number of processors available
28-
axes_limits: Shape of the N-dimensional array to
28+
arrShape: Shape of the N-dimensional array to
2929
be domain decomposed
3030
parallel_axes: List of m <= N integers prescribing the axes
3131
of the N-Dimensional array to be parallelized
@@ -57,15 +57,15 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
5757
n_dim: Total number of dimensions of the data
5858
n_par_dim: Total number of axes parallelized
5959
direction by each processor.
60-
axes_limits: The input axes_limits
60+
arrShape: The input arrShape
6161
parallel_axes: Final parallel_axes
6262
user_nblocks: The input user_nblocks
6363
n_processors: The input n_processors
6464
suggest_alternative: The input suggest_alternative
6565
set_last_to1: The input set_last_to1
6666
decompose: The input decompose
6767
"""
68-
n_dim , axes_limits = len(axes_limits) ,np.array(axes_limits)
68+
n_dim , arrShape = len(arrShape) ,np.array(arrShape)
6969
n_par_dim, parallel_axes = len(parallel_axes),np.array(parallel_axes)
7070

7171
slq = np.zeros([n_dim,n_processors], int)
@@ -83,7 +83,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
8383

8484
parallel_axes = np.sort(parallel_axes)
8585

86-
parallel_axes_limits = np.array([axes_limits[i] if i in parallel_axes else 1 for i in range(n_dim)],int)
86+
parallel_arrShape = np.array([arrShape[i] if i in parallel_axes else 1 for i in range(n_dim)],int)
8787

8888
# Parallel scheme
8989
if user_nblocks == None:
@@ -102,7 +102,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
102102

103103
if n_dim - n_par_dim != __ones[0]: raise ValueError("There is a mismatch between the axes to be parallelized {} and the prescribed \
104104
block list {}! The position of the `1's in the prescribed block list must not \
105-
exist in list of the axes to be parallelized!".format(axes_limits, user_nblocks))
105+
exist in list of the axes to be parallelized!".format(arrShape, user_nblocks))
106106

107107
if np.any(temp_parallel_axes != parallel_axes): raise ValueError("The prescribed block list and the list of axes to be parallelized do not match!")
108108

@@ -117,27 +117,28 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
117117
indices = get_nested_for_loops_indices(nblocks)
118118

119119
for idim in range(n_dim):
120-
nblockq = nblocks[idim]
120+
nblockq = nblocks[idim]
121121
nlq = np.array([ii for ii in range(0,int(nblockq)+1)])
122122
if nblockq == 1:
123123
# No parallelization here
124-
elq[idim,:] = axes_limits[idim]
124+
elq[idim,:] = arrShape[idim]
125125
else:
126-
myslq,myelq = get_slq_elq(parallel_axes_limits[idim],nblockq)
126+
myslq,myelq = get_slq_elq(parallel_arrShape[idim],nblockq)
127127
for rank in range(n_processors):
128128
slq[idim,rank] = myslq[indices[idim][rank]]
129129
elq[idim,rank] = myelq[indices[idim][rank]]
130-
blcq[idim,rank] = nlq[indices[idim][rank]]
130+
blcq[idim,rank] = nlq[indices[idim][rank]]
131131
else:
132-
elq = np.array([[axes_limits[i] for rank in range(n_processors)] for i in range(n_dim)],dtype=int)
132+
elq = np.array([[arrShape[i] for rank in range(n_processors)] for i in range(n_dim)],dtype=int)
133133

134134
# Object attributes
135135
self.nblock = nblocks
136136
self.coordinates = blcq
137137
self.slq = slq
138138
self.elq = elq
139139
self.mynq = elq - slq
140-
self.axes_limits = axes_limits
140+
self.split_sizes = [np.prod(self.mynq[:,rank]) for rank in range(n_processors)]
141+
self.arrShape = arrShape
141142
self.parallel_axes = parallel_axes
142143
self.n_dim = n_dim
143144
self.n_par_dim = n_par_dim

0 commit comments

Comments
 (0)