diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py
index 2e5a8da47..53d1a4a70 100644
--- a/gpu4pyscf/dft/numint.py
+++ b/gpu4pyscf/dft/numint.py
@@ -974,7 +974,7 @@ def get_rho(ni, mol, dm, grids, max_memory=2000, verbose=None):
 
     mem_avail = get_avail_mem()
     blksize = mem_avail*.2/8/nao//ALIGNED * ALIGNED
-    blksize = min(blksize, MIN_BLK_SIZE)
+    blksize = max(blksize, MIN_BLK_SIZE)
     GB = 1024*1024*1024
     log.debug(f'GPU Memory {mem_avail/GB:.1f} GB available, block size {blksize}')
 
@@ -1584,7 +1584,7 @@ def _block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
         #cupy.get_default_memory_pool().free_all_blocks()
         mem_avail = get_avail_mem()
         blksize = int((mem_avail*.2/8/((comp+1)*nao + extra))/ ALIGNED) * ALIGNED
-        blksize = min(blksize, MIN_BLK_SIZE)
+        blksize = max(blksize, MIN_BLK_SIZE)
         log.debug(f'{mem_avail/1e6} MB memory is available on Device {device_id}, block_size {blksize}')
         if blksize < ALIGNED:
             raise RuntimeError('Not enough GPU memory')
@@ -1645,7 +1645,7 @@ def _grouped_block_loop(ni, mol, grids, nao=None, deriv=0, max_memory=2000,
         #cupy.get_default_memory_pool().free_all_blocks()
         mem_avail = get_avail_mem()
         blksize = int((mem_avail*.2/8/((comp+1)*nao + extra))/ ALIGNED) * ALIGNED
-        blksize = min(blksize, MIN_BLK_SIZE)
+        blksize = max(blksize, MIN_BLK_SIZE)
         log.debug1('Available GPU mem %f Mb, block_size %d', mem_avail/1e6, blksize)
         if blksize < ALIGNED:
             raise RuntimeError('Not enough GPU memory')