Main: Commit 2

raboonik · raboonik · commit ddd2fa19447f · 2024-08-18T13:31:20.000+10:00
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Git:   https://github.com/raboonik
 
 &nbsp;&nbsp;&nbsp;&nbsp;./install.sh path/to/directory
 
-or to install under the default directory simply execute
+&nbsp;&nbsp;&nbsp;&nbsp;or to install under the default directory simply execute
 
 &nbsp;&nbsp;&nbsp;&nbsp;./install.sh
 
diff --git a/examples/example00_domainDecomposeND.py b/examples/example00_domainDecomposeND.py
@@ -1,14 +1,35 @@
-import AutoParallelizePy
+"""
+    Summary: A simple single-core example program 
+    emulating a hypothetical MPI environment consisting
+    of 32 processes to test and demonstrate the 
+    AutoParallelizePy class:
+        class    domainDecomposeND
+    
+    Aims: Create a simple 4D array of arbitrary
+    shape and auto-domain decompose using the
+    built-in domainDecomposeND class and recover
+    the original array after decomposition. 
+"""
+
 import numpy as np
+import AutoParallelizePy as APP
+
+# Even though this example isn't MPI-parallelized, we may emulate a
+# hypothetical MPI environment assuming there are a total of 32 procs
+size = 32
 
-size = 48
-axes_limits = [47,37,1,98]  # aka how many data points are in each dimension
+# Create a simple 4D array of random numbers
+arrShape = [47,37,19,98]
+origArr   = np.random.uniform(low=-20, high=20, size=(arrShape))
+
+# Configure the domain decomposition scheme such that only the first,
+# second, and fourth dimensions of this array are parallelized
 parallel_axes = [0,1,3]
-user_nblocks = [3,4,1,4]
-user_nblocks = None
-domDecompND = AutoParallelizePy.domainDecomposeND(size,axes_limits,parallel_axes,user_nblocks,False,True,False)
-a  = np.arange(np.prod(axes_limits)).reshape(axes_limits)
-a1 = np.zeros(axes_limits)
+domDecompND   = APP.domainDecomposeND(size,arrShape,parallel_axes)
+
+# Now let's have our hypothetical procs (ranks) each take a slice of 
+# the data and dump it in a new array to retrieve the original data 
+testArr = np.zeros(arrShape)
 for rank in range(size):
     slq0 = domDecompND.slq[0][rank]
     elq0 = domDecompND.elq[0][rank]
@@ -18,7 +39,58 @@
     elq2 = domDecompND.elq[2][rank]
     slq3 = domDecompND.slq[3][rank]
     elq3 = domDecompND.elq[3][rank]
-    a1[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3] = a[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3]
+    mySubArr = origArr[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3]
+    # Dump the subarray into the test array in the correct block
+    testArr[slq0:elq0,slq1:elq1,slq2:elq2,slq3:elq3] = mySubArr
 
+# If the domain decomposition scheme is correctly done, testArr 
+# recovers the original array. Let's check that 
+print("*********************************")
+if np.all(origArr == testArr):
+    print("The original data was successfully recovered!")
+else:
+    print("Failed!")
+print("*********************************")
+print("")
 
-print(np.all(a == a1))
+# Now let's print the different attributes of the object domDecompND
+print("Assuming the data is N-dimensional and MPI-parallelized across a total number of np cores:")
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.nblock"+'\033[0m'+" gives an N-D array of the number of MPI-blocks in each dimension.")
+print("The product of all the blocks must recover the total number of cores in an MPI scheme.")
+print("In this example we have domDecompND.nblock = ", domDecompND.nblock)
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.coordinates"+'\033[0m'+" gives an Nxnp array of all the Cartesian coordinates of the MPI-blocks.")
+print("In this example we have domDecompND.coordinates = ", domDecompND.coordinates)
+print("and hence the coordinates of the MPI-block handled by 27th (rank = 27) processor is:",domDecompND.coordinates[:,27])
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.slq"+'\033[0m'+" gives an Nxnp array of all the datapoint indices that mark the begining of each MPI-block dimension.")
+print("In this example we have domDecompND.slq = ", domDecompND.slq)
+print("and hence the MPI-block belonging to the 27th processor starts at:",domDecompND.slq[:,27])
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.elq"+'\033[0m'+" gives an Nxnp array of all the datapoint indices that mark the end of each MPI-block dimension.")
+print("In this example we have domDecompND.elq = ", domDecompND.elq)
+print("and hence the MPI-block belonging to the 27th processor ends at:",domDecompND.elq[:,27])
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.mynq"+'\033[0m'+" gives an Nxnp array of the total number of gridpoints of each MPI-block dimension.")
+print("Note that we have domDecompND.mynq = domDecompND.elq - domDecompND.slq")
+print("In this example we have domDecompND.mynq = ", domDecompND.mynq)
+print("and hence the MPI-block belonging to the 27th processor has the following number of gridpoints in each dimension:",domDecompND.mynq[:,27])
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.split_sizes"+'\033[0m'+" gives an array of length np of the total number of gridpoints handled by each processor.")
+print("Note that we have domDecompND.split_sizes == np.prod(domDecompND.mynq, axis=0)")
+print("In this example we have domDecompND.split_sizes = ", domDecompND.split_sizes)
+print("and hence the 27th processor has the following total number of gridpoints:",domDecompND.split_sizes[27])
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.arrShape"+'\033[0m'+" gives the shape of the array to be MPI-parallelized.")
+print("In this example we have domDecompND.arrShape = ", domDecompND.arrShape)
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.parallel_axes"+'\033[0m'+" gives the user's input list of axes/dimensions to be parallelized.")
+print("In this example we have domDecompND.parallel_axes = ", domDecompND.parallel_axes)
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.n_dim"+'\033[0m'+" gives the number of dimensions of the raw array.")
+print("In this example we have domDecompND.n_dim = ", domDecompND.n_dim)
+print("-------------------------------------------------------------------")
+print('\033[1m'+"domDecompND.n_par_dim"+'\033[0m'+" gives the number of parallelized dimensions.")
+print("In this example we have domDecompND.n_par_dim = ", domDecompND.n_par_dim)
+print("-------------------------------------------------------------------")
diff --git a/examples/example02_gather_vector_1D.py b/examples/example02_gather_vector_1D.py
@@ -1 +1 @@
-gather_vector_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)
+gather_array_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)
diff --git a/examples/example_hybrid1.py b/examples/example_hybrid1.py
@@ -1,7 +1,21 @@
+"""
+    Summary: A hybrid example program to test and demonstrate 
+    the use of the following AutoParallelizePy methods:
+        class    domainDecomposeND
+        function get_subarray
+        function scatter_vector_ND
+        function gather_vector_ND
+    
+    Aims: Create a 4D array of random real numbers, domain 
+    decompose and parallelize it, and then de-parallelize
+    to recover the same array.
+"""
+
 import numpy as np
 from mpi4py import * 
-import AutoParallelizePy
+import AutoParallelizePy as APP
 
+# Initialize the MPI environment
 #◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈
 #      Init Parallel        #◈
 comm     = MPI.COMM_WORLD   #◈
@@ -10,41 +24,51 @@
 mainrank = 0                #◈
 #◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈◈
 
-# Create a 4D array of random floats
-axes_limits       = [27,56,34,86]
-parallel_axes     = [0,1,3]
-new_parallel_axes = [1,2]
+# Using create_randoms_acorss_cores, create a 4D array of random 
+# floats to be parallelized in the first, second, and fourth 
+# dimensions and broadcast to all procs
+dataShape     = [27,56,34,86]
+parallel_axes = [0,1,3]
+origArr = APP.create_randoms_acorss_cores(comm, rank, mainrank, dataShape)
+
+# Using domainDecomposeND Configure the automatic domain decomposition  
+# scheme based on the prescribed data shape and parallelized axes
+domDecompND = APP.domainDecomposeND(size,dataShape,parallel_axes)
 
-testArr = AutoParallelizePy.create_randoms_acorss_cores(comm, rank, mainrank, axes_limits)
+# Use get_subarray to have each proc take a slice of the original array
+myarrV1 = APP.get_subarray(rank,domDecompND,origArr) 
 
+# Now let's slice the original array this time using scatter_vector_ND
+# To do so, frist store the original array on the main rank
 if rank == mainrank:
-    lowHigh=[-13.54,13.3]
-    mainArr = testArr.copy()
+    copyArr = origArr.copy()
 else:
-    mainArr = None
-
-# Domain decompose
-domDecompND = AutoParallelizePy.domainDecomposeND(size,axes_limits,parallel_axes)
+    copyArr = None
 
-myarr0 = AutoParallelizePy.get_subarray(rank,domDecompND,testArr) 
-myarr  = AutoParallelizePy.scatter_vector_ND(comm,rank,mainrank,domDecompND,mainArr,dtype='float')
+# Use scatter_vector_ND to scatter chunks/slices of the original array
+# across all procs as prescribed in domDecompND
+myarrV2 = APP.scatter_vector_ND(comm,rank,mainrank,domDecompND,copyArr,dtype='float')
 
-if np.all(myarr0 == myarr):
-    print("rank = {} -- Success!".format(rank))
+# Check that the two versions do indeed are the same
+if np.all(myarrV1 == myarrV2):
+    print("rank = {} -- Success! The two versions of local sub-arrays sliced using get_subarray and scatter_vector_ND yielded the same results!".format(rank))
 else:
     print("rank = {} -- Failed!".format(rank))
 
-gathered_new_myTestArr = AutoParallelizePy.gather_vector_ND(comm, rank, mainrank, domDecompND, myarr0, 'float')
-gathered_new_myTestArr1 = AutoParallelizePy.gather_vector_ND(comm, rank, mainrank, domDecompND, myarr, 'float')
+# Use gather_vector_ND to gather all the local data chunks of both versions on the main rank  
+# and retrieve the original data
+gathered_myArrV1 = APP.gather_vector_ND(comm, rank, mainrank, domDecompND, myarrV1, 'float')
+gathered_myArrV2 = APP.gather_vector_ND(comm, rank, mainrank, domDecompND, myarrV2, 'float')
 
+# Check if the gathered data recovers the original array
 if rank == mainrank:
     print("")
-    if np.all(gathered_new_myTestArr == testArr):
-        print("Success! gathered_new_myTestArr = testArr")
+    if np.all(gathered_myArrV1 == origArr):
+        print("Success! gathered_myArrV1 = origArr")
     else:
-        print("Failed! gathered_new_myTestArr != testArr")
+        print("Failed! gathered_myArrV1 != origArr")
     print("")
-    if np.all(gathered_new_myTestArr1 == testArr):
-        print("Success! gathered_new_myTestArr1 = testArr")
+    if np.all(gathered_myArrV2 == origArr):
+        print("Success! gathered_myArrV2 = origArr")
     else:
-        print("Failed! gathered_new_myTestArr1 != testArr")
+        print("Failed! gathered_myArrV2 != origArr")
diff --git a/libs/domainDecomposeND.py b/libs/domainDecomposeND.py
@@ -10,10 +10,10 @@
 
 
 class domainDecomposeND:
-    def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None, suggest_alternative=False, set_last_to1=False, decompose=True):
+    def __init__(self, n_processors, arrShape, parallel_axes, user_nblocks=None, suggest_alternative=False, set_last_to1=False, decompose=True):
         """
             Domain decomposition class designed to optimally break 
-            down an array of shape axes_limits into n_processors 
+            down an array of shape arrShape into n_processors 
             blocks for easy implementation in parallelization schemes.
             
             Required in:
@@ -25,7 +25,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
             Inputs:
                 Mandatory:
                     n_processors:  Total number of processors available
-                    axes_limits:   Shape of the N-dimensional array to
+                    arrShape:   Shape of the N-dimensional array to
                                    be domain decomposed
                     parallel_axes: List of m <= N integers prescribing the axes 
                                    of the N-Dimensional array to be parallelized
@@ -57,15 +57,15 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
                 n_dim:                Total number of dimensions of the data 
                 n_par_dim:            Total number of axes parallelized
                                       direction by each processor.
-                axes_limits:          The input axes_limits
+                arrShape:          The input arrShape
                 parallel_axes:        Final parallel_axes
                 user_nblocks:         The input user_nblocks
                 n_processors:         The input n_processors
                 suggest_alternative:  The input suggest_alternative
                 set_last_to1:         The input set_last_to1
                 decompose:            The input decompose
         """
-        n_dim    , axes_limits   = len(axes_limits)  ,np.array(axes_limits)
+        n_dim    , arrShape   = len(arrShape)  ,np.array(arrShape)
         n_par_dim, parallel_axes = len(parallel_axes),np.array(parallel_axes)
         
         slq     = np.zeros([n_dim,n_processors], int)
@@ -83,7 +83,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
             
             parallel_axes = np.sort(parallel_axes)
             
-            parallel_axes_limits = np.array([axes_limits[i] if i in parallel_axes else 1 for i in range(n_dim)],int)
+            parallel_arrShape = np.array([arrShape[i] if i in parallel_axes else 1 for i in range(n_dim)],int)
             
             # Parallel scheme
             if user_nblocks == None:
@@ -102,7 +102,7 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
                 
                 if n_dim - n_par_dim != __ones[0]:              raise ValueError("There is a mismatch between the axes to be parallelized {} and the prescribed \
                                                                                   block list {}! The position of the `1's in the prescribed block list must not \
-                                                                                  exist in list of the axes to be parallelized!".format(axes_limits, user_nblocks))
+                                                                                  exist in list of the axes to be parallelized!".format(arrShape, user_nblocks))
                 
                 if np.any(temp_parallel_axes != parallel_axes): raise ValueError("The prescribed block list and the list of axes to be parallelized do not match!")
                 
@@ -117,27 +117,28 @@ def __init__(self, n_processors, axes_limits, parallel_axes, user_nblocks=None,
             indices = get_nested_for_loops_indices(nblocks)
             
             for idim in range(n_dim):
-                nblockq = nblocks[idim]            
+                nblockq = nblocks[idim]
                 nlq     = np.array([ii for ii in range(0,int(nblockq)+1)])
                 if nblockq == 1:
                     # No parallelization here
-                    elq[idim,:] = axes_limits[idim]
+                    elq[idim,:] = arrShape[idim]
                 else:
-                    myslq,myelq = get_slq_elq(parallel_axes_limits[idim],nblockq)
+                    myslq,myelq = get_slq_elq(parallel_arrShape[idim],nblockq)
                     for rank in range(n_processors):
                         slq[idim,rank] = myslq[indices[idim][rank]]
                         elq[idim,rank] = myelq[indices[idim][rank]]
-                    blcq[idim,rank] = nlq[indices[idim][rank]]  
+                        blcq[idim,rank] = nlq[indices[idim][rank]]
         else:
-            elq = np.array([[axes_limits[i] for rank in range(n_processors)] for i in range(n_dim)],dtype=int)
+            elq = np.array([[arrShape[i] for rank in range(n_processors)] for i in range(n_dim)],dtype=int)
         
         # Object attributes
         self.nblock              = nblocks
         self.coordinates         = blcq
         self.slq                 = slq
         self.elq                 = elq
         self.mynq                = elq - slq
-        self.axes_limits         = axes_limits
+        self.split_sizes         = [np.prod(self.mynq[:,rank]) for rank in range(n_processors)]
+        self.arrShape            = arrShape
         self.parallel_axes       = parallel_axes
         self.n_dim               = n_dim
         self.n_par_dim           = n_par_dim
diff --git a/libs/mpi4pyFuncs.py b/libs/mpi4pyFuncs.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-gather_vector_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)`
	`1`	`+gather_array_1D(comm, rank, mainrank, domDecompND, myinput, dtype=dtype)`