Added L2 cache size property to CUDA backends

fjwillemsen · fjwillemsen · commit a020791a5559 · 2024-02-28T10:13:25.000+01:00
diff --git a/kernel_tuner/backends/cupy.py b/kernel_tuner/backends/cupy.py
@@ -46,6 +46,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
         self.devprops = dev.attributes
         self.cc = dev.compute_capability
         self.max_threads = self.devprops["MaxThreadsPerBlock"]
+        self.cache_size_L2 = self.devprops["L2CacheSize"]
 
         self.iterations = iterations
         self.current_module = None
diff --git a/kernel_tuner/backends/nvcuda.py b/kernel_tuner/backends/nvcuda.py
@@ -66,6 +66,10 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
             cudart.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock, device
         )
         cuda_error_check(err)
+        err, self.cache_size_L2 = cudart.cudaDeviceGetAttribute(
+            cudart.cudaDeviceAttr.cudaDevAttrL2CacheSize, device
+        )
+        cuda_error_check(err)
         self.cc = f"{major}{minor}"
         self.iterations = iterations
         self.current_module = None
diff --git a/kernel_tuner/backends/pycuda.py b/kernel_tuner/backends/pycuda.py
@@ -101,6 +101,7 @@ def _finish_up():
             str(k): v for (k, v) in self.context.get_device().get_attributes().items()
         }
         self.max_threads = devprops["MAX_THREADS_PER_BLOCK"]
+        self.cache_size_L2 = devprops["L2_CACHE_SIZE"]
         cc = str(devprops.get("COMPUTE_CAPABILITY_MAJOR", "0")) + str(
             devprops.get("COMPUTE_CAPABILITY_MINOR", "0")
         )

Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,7 @@ def _finish_up():`
`101`	`101`	`str(k): v for (k, v) in self.context.get_device().get_attributes().items()`
`102`	`102`	`}`
`103`	`103`	`self.max_threads = devprops["MAX_THREADS_PER_BLOCK"]`
	`104`	`+ self.cache_size_L2 = devprops["L2_CACHE_SIZE"]`
`104`	`105`	`cc = str(devprops.get("COMPUTE_CAPABILITY_MAJOR", "0")) + str(`
`105`	`106`	`devprops.get("COMPUTE_CAPABILITY_MINOR", "0")`
`106`	`107`	`)`