|
24 | 24 | reduce_to_device, copy_array, transpose_sum) |
25 | 25 | from gpu4pyscf.lib import logger |
26 | 26 | from gpu4pyscf.gto.mole import basis_seg_contraction |
27 | | -from gpu4pyscf.__config__ import num_devices, _streams |
| 27 | +from gpu4pyscf.__config__ import num_devices |
28 | 28 |
|
29 | 29 | LMAX_ON_GPU = 8 |
30 | 30 | FREE_CUPY_CACHE = True |
@@ -253,7 +253,7 @@ def build(self, cutoff=1e-14, group_size=None, group_size_aux=None, |
253 | 253 | def bpcache(self): |
254 | 254 | device_id = cupy.cuda.Device().id |
255 | 255 | if device_id not in self._bpcache: |
256 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 256 | + with cupy.cuda.Device(device_id): |
257 | 257 | log = logger.new_logger(self.mol, self.mol.verbose) |
258 | 258 | cput0 = log.init_timer() |
259 | 259 | bpcache = ctypes.POINTER(BasisProdCache)() |
@@ -777,7 +777,7 @@ def get_j_int3c2e_pass2(intopt, rhoj, stream=None): |
777 | 777 | return vj |
778 | 778 |
|
779 | 779 | def _int3c2e_jk_task(intopt, task_k_list, dm0, mocc, device_id=0, omega=None): |
780 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 780 | + with cupy.cuda.Device(device_id): |
781 | 781 | log = logger.new_logger(intopt.mol, intopt.mol.verbose) |
782 | 782 | t0 = log.init_timer() |
783 | 783 | mocc = cupy.asarray(mocc) |
@@ -874,7 +874,7 @@ def _int3c2e_ip1_vjk_task(intopt, task_k_list, rhoj, rhok, dm0, orbo, device_id= |
874 | 874 | aoslices = intopt.mol.aoslice_by_atom() |
875 | 875 | vj1_buf = vk1_buf = vj1 = vk1 = None |
876 | 876 |
|
877 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 877 | + with cupy.cuda.Device(device_id): |
878 | 878 | log = logger.new_logger(intopt.mol, intopt.mol.verbose) |
879 | 879 | t0 = log.init_timer() |
880 | 880 | ao2atom = get_ao2atom(intopt, aoslices) |
@@ -978,7 +978,7 @@ def _int3c2e_ip2_vjk_task(intopt, task_k_list, rhoj, rhok, dm0, orbo, |
978 | 978 | nao = intopt.mol.nao |
979 | 979 | auxslices = intopt.auxmol.aoslice_by_atom() |
980 | 980 | vj1 = vk1 = None |
981 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 981 | + with cupy.cuda.Device(device_id): |
982 | 982 | log = logger.new_logger(intopt.mol, intopt.mol.verbose) |
983 | 983 | t0 = log.init_timer() |
984 | 984 | aux2atom = get_aux2atom(intopt, auxslices) |
@@ -1067,7 +1067,7 @@ def _int3c2e_ip1_wjk_task(intopt, task_k_list, dm0, orbo, wk, device_id=0, with_ |
1067 | 1067 | nao = intopt.mol.nao |
1068 | 1068 | naux = intopt.auxmol.nao |
1069 | 1069 | aux_ao_loc = intopt.aux_ao_loc |
1070 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 1070 | + with cupy.cuda.Device(device_id): |
1071 | 1071 | log = logger.new_logger(intopt.mol, intopt.mol.verbose) |
1072 | 1072 | t0 = log.init_timer() |
1073 | 1073 | ncp_ij = len(intopt.log_qs) |
@@ -1127,7 +1127,7 @@ def get_int3c2e_ip1_wjk(intopt, dm0_tag, with_k=True, omega=None): |
1127 | 1127 |
|
1128 | 1128 | def _int3c2e_ip2_wjk(intopt, task_list, dm0, orbo, with_k=True, omega=None, device_id=0): |
1129 | 1129 | aux_ao_loc = intopt.aux_ao_loc |
1130 | | - with cupy.cuda.Device(device_id), _streams[device_id]: |
| 1130 | + with cupy.cuda.Device(device_id): |
1131 | 1131 | cupy.get_default_memory_pool().free_all_blocks() |
1132 | 1132 | log = logger.new_logger(intopt.mol, intopt.mol.verbose) |
1133 | 1133 | t0 = log.init_timer() |
|
0 commit comments