pymc-devs
diff --git a/‎pytensor/link/numba/cache.py‎
Lines changed: 19 additions & 18 deletions b/‎pytensor/link/numba/cache.py‎
Lines changed: 19 additions & 18 deletions
diff --git a/‎pytensor/link/numba/dispatch/basic.py‎
Lines changed: 55 additions & 16 deletions b/‎pytensor/link/numba/dispatch/basic.py‎
Lines changed: 55 additions & 16 deletions
diff --git a/‎pytensor/link/numba/dispatch/elemwise.py‎
Lines changed: 4 additions & 26 deletions b/‎pytensor/link/numba/dispatch/elemwise.py‎
Lines changed: 4 additions & 26 deletions
diff --git a/‎pytensor/link/numba/dispatch/extra_ops.py‎
Lines changed: 1 addition & 1 deletion b/‎pytensor/link/numba/dispatch/extra_ops.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytensor/link/numba/dispatch/scalar.py‎
Lines changed: 1 addition & 7 deletions b/‎pytensor/link/numba/dispatch/scalar.py‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 3 additions & 4 deletions b/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 3 additions & 4 deletions
@@ -1,6 +1,6 @@
 from collections.abc import Callable
 from pathlib import Path
-from tempfile import NamedTemporaryFile, TemporaryFile
+from tempfile import NamedTemporaryFile
 from typing import Any
 
 from numba.core.caching import CacheImpl, _CacheLocator
@@ -9,7 +9,9 @@
 
 
 NUMBA_PYTENSOR_CACHE_ENABLED = True
-COMPILED_SRC_FUNCTIONS = {}
+NUMBA_CACHE_PATH = config.base_compiledir / "numba"
+NUMBA_CACHE_PATH.mkdir(exist_ok=True)
+CACHED_SRC_FUNCTIONS = {}
 
 
 def compile_and_cache_numba_function_src(
@@ -20,9 +22,7 @@ def compile_and_cache_numba_function_src(
     key: str | None = None,
 ) -> Callable:
     if key is not None:
-        numba_path = config.base_compiledir / "numba"
-        numba_path.mkdir(exist_ok=True)
-        filename = numba_path / key
+        filename = NUMBA_CACHE_PATH / key
         with filename.open("wb") as f:
             f.write(src.encode())
     else:
@@ -43,10 +43,19 @@ def compile_and_cache_numba_function_src(
     res.__source__ = src  # type: ignore
 
     if key is not None:
-        COMPILED_SRC_FUNCTIONS[res] = key
+        CACHED_SRC_FUNCTIONS[res] = key
     return res
 
 
+def cache_numba_function(
+    fn,
+    key: str | None = None,
+) -> Callable:
+    if key is not None:
+        CACHED_SRC_FUNCTIONS[fn] = key
+    return fn
+
+
 class NumbaPyTensorCacheLocator(_CacheLocator):
     def __init__(self, py_func, py_file, hash):
         # print(f"New locator {py_func=}, {py_file=}, {hash=}")
@@ -57,34 +66,26 @@ def __init__(self, py_func, py_file, hash):
         # self._hash = hash((src_hash, py_file, pytensor.__version__))
 
     def ensure_cache_path(self):
-        # print("ensure_cache_path called")
-        path = self.get_cache_path()
-        path.mkdir(exist_ok=True)
-        # Ensure the directory is writable by trying to write a temporary file
-        TemporaryFile(dir=path).close()
+        pass
 
     def get_cache_path(self):
         """
         Return the directory the function is cached in.
         """
-        # print("get_cache_path called")
-        return self._py_file
+        return NUMBA_CACHE_PATH
 
     def get_source_stamp(self):
         """
         Get a timestamp representing the source code's freshness.
         Can return any picklable Python object.
         """
         return 0
-        # print("get_source_stamp called")
-        return self._hash
 
     def get_disambiguator(self):
         """
         Get a string disambiguator for this locator's function.
         It should allow disambiguating different but similarly-named functions.
         """
-        # print("get_disambiguator called")
         return self._hash
 
     @classmethod
@@ -94,9 +95,9 @@ def from_function(cls, py_func, py_file):
         """
         # py_file = Path(py_file).parent
         # if py_file == (config.base_compiledir / "numba"):
-        if NUMBA_PYTENSOR_CACHE_ENABLED and py_func in COMPILED_SRC_FUNCTIONS:
+        if NUMBA_PYTENSOR_CACHE_ENABLED and py_func in CACHED_SRC_FUNCTIONS:
             # print(f"Applies to {py_file}")
-            return cls(py_func, Path(py_file).parent, COMPILED_SRC_FUNCTIONS[py_func])
+            return cls(py_func, Path(py_file).parent, CACHED_SRC_FUNCTIONS[py_func])
 
 
 CacheImpl._locator_classes.insert(0, NumbaPyTensorCacheLocator)
@@ -3,6 +3,7 @@
 import warnings
 from copy import copy
 from functools import singledispatch
+from hashlib import sha256
 from textwrap import dedent
 
 import numba
@@ -11,11 +12,11 @@
 import scipy
 import scipy.special
 from llvmlite import ir
+from numba import njit as _njit
 from numba import types
 from numba.core.errors import NumbaWarning, TypingError
 from numba.cpython.unsafe.tuple import tuple_setitem  # noqa: F401
-from numba.extending import box, overload
-from numba.extending import register_jitable as _register_jitable
+from numba.extending import box, overload, register_jitable
 
 from pytensor import In, config
 from pytensor.compile import NUMBA
@@ -26,7 +27,9 @@
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.type import Type
 from pytensor.ifelse import IfElse
-from pytensor.link.numba.cache import compile_and_cache_numba_function_src
+from pytensor.link.numba.cache import (
+    compile_and_cache_numba_function_src,
+)
 from pytensor.link.numba.dispatch.sparse import CSCMatrixType, CSRMatrixType
 from pytensor.link.utils import fgraph_to_python
 from pytensor.scalar.basic import ScalarType
@@ -50,11 +53,7 @@ def global_numba_func(func):
     return func
 
 
-def numba_njit(*args, fastmath=None, register_jitable: bool = True, **kwargs):
-    kwargs.setdefault("cache", True)
-    kwargs.setdefault("no_cpython_wrapper", False)
-    kwargs.setdefault("no_cfunc_wrapper", False)
-
+def numba_njit(*args, fastmath=None, final_function: bool = False, **kwargs):
     if fastmath is None:
         if config.numba__fastmath:
             # Opinionated default on fastmath flags
@@ -69,6 +68,12 @@ def numba_njit(*args, fastmath=None, register_jitable: bool = True, **kwargs):
         else:
             fastmath = False
 
+    if final_function:
+        kwargs.setdefault("cache", True)
+    # else:
+    #     kwargs.setdefault("no_cpython_wrapper", True)
+    #     kwargs.setdefault("no_cfunc_wrapper", True)
+
     # Suppress cache warning for internal functions
     # We have to add an ansi escape code for optional bold text by numba
     warnings.filterwarnings(
@@ -82,7 +87,7 @@ def numba_njit(*args, fastmath=None, register_jitable: bool = True, **kwargs):
         category=NumbaWarning,
     )
 
-    func = _register_jitable if register_jitable else numba.njit
+    func = register_jitable if final_function else _njit
     if len(args) > 0 and callable(args[0]):
         return func(*args[1:], fastmath=fastmath, **kwargs)(args[0])
     else:
@@ -384,8 +389,43 @@ def numba_funcify_FunctionGraph(
     **kwargs,
 ):
     def numba_funcify_njit(op, node, **kwargs):
-        jitable_func = numba_funcify(op, node=node, **kwargs)
-        return numba_njit(lambda *args: jitable_func(*args), register_jitable=False)
+        jitable_func_and_key = numba_funcify(op, node=node, **kwargs)
+        from collections.abc import Callable
+
+        match jitable_func_and_key:
+            case (Callable(), str()):
+                jitable_func, key = jitable_func_and_key
+            case (Callable(), int()):
+                # Default key for Ops that return an integer
+                jitable_func, int_key = jitable_func_and_key
+                key = sha256(
+                    str((type(op), op._props_dict(), int_key)).encode()
+                ).hexdigest()
+            case Callable():
+                jitable_func, key = jitable_func_and_key, None
+                warnings.warn(
+                    f"No cache key returned by numba_funcify of op {op}. This function won't be cached by Numba"
+                )
+            case _:
+                raise TypeError(
+                    f"numpy_funcify should return a callable or a callable, key pair, got {jitable_func_and_key}"
+                )
+
+        if 0 and key is not None:
+            # To force numba to use our cache, we must compile the function so that any closure
+            # becomes a global variable...
+            op_name = op.__class__.__name__
+            cached_func = compile_and_cache_numba_function_src(
+                src=f"def {op_name}(*args): return jitable_func(*args)",
+                function_name=op_name,
+                global_env=globals() | dict(jitable_func=jitable_func),
+                key=key,
+            )
+            return numba_njit(cached_func, final_function=True, cache=True)
+        else:
+            return numba_njit(
+                lambda *args: jitable_func(*args), final_function=True, cache=False
+            )
 
     return fgraph_to_python(
         fgraph,
@@ -410,7 +450,7 @@ def dispatch_deepcopyop(x):
 
 @numba_funcify.register(DeepCopyOp)
 def numba_funcify_DeepCopyOp(op, node, **kwargs):
-    return deepcopyop
+    return deepcopyop, 0
 
 
 @numba_funcify.register(MakeSlice)
@@ -439,7 +479,7 @@ def numba_funcify_Shape_i(op, **kwargs):
     def shape_i(x):
         return np.asarray(np.shape(x)[i])
 
-    return shape_i
+    return shape_i, 0
 
 
 @numba_funcify.register(SortOp)
@@ -543,7 +583,7 @@ def reshape(x, shape):
                 numba_ndarray.to_fixed_tuple(shape, ndim),
             )
 
-    return reshape
+    return reshape, 0
 
 
 @numba_funcify.register(SpecifyShape)
@@ -571,9 +611,8 @@ def specify_shape(x, {create_arg_string(shape_input_names)}):
         func,
         "specify_shape",
         globals(),
-        key=hash_from_code(func),
     )
-    return numba_njit(specify_shape)
+    return numba_njit(specify_shape), hash_from_code(func)
 
 
 def int_to_float_fn(inputs, out_dtype):
 
@@ -288,10 +288,7 @@ def numba_funcify_Elemwise(op, node, **kwargs):
             ),
         )
     )
-    core_op_key = sha256(core_op_key.encode()).hexdigest()
-    core_op_fn = store_core_outputs(
-        scalar_op_fn, nin=nin, nout=nout, core_op_key=core_op_key
-    )
+    core_op_fn = store_core_outputs(scalar_op_fn, nin=nin, nout=nout)
 
     input_bc_patterns = tuple(inp.type.broadcastable for inp in node.inputs)
     output_bc_patterns = tuple(out.type.broadcastable for out in node.outputs)
@@ -342,27 +339,8 @@ def elemwise(*inputs):
     def ov_elemwise(*inputs):
         return elemwise_wrapper
 
-    # TODO: Also input dtypes in key
-    elemwise_key = "_".join(
-        map(
-            str,
-            (
-                "Elemwise",
-                core_op_key,
-                input_bc_patterns,
-                inplace_pattern,
-            ),
-        )
-    )
-    elemwise_key = sha256(elemwise_key.encode()).hexdigest()
-    f = compile_and_cache_numba_function_src(
-        "def f(*inputs): return elemwise(*inputs)",
-        "f",
-        {**globals(), **{"elemwise": elemwise}},
-        key=elemwise_key,
-    )
-
-    return numba_njit(f)
+    elemwise_key = sha256(f"Elemwise2{core_op_key}".encode()).hexdigest()
+    return elemwise, elemwise_key
 
 
 @numba_funcify.register(Sum)
@@ -470,7 +448,7 @@ def dimshuffle(x):
 
             return as_strided(x, shape=new_shape, strides=new_strides)
 
-    return dimshuffle
+    return dimshuffle, 0
 
 
 @numba_funcify.register(Softmax)
 
@@ -367,4 +367,4 @@ def check_and_raise(x, *conditions):
                 raise error(msg)
         return x
 
-    return check_and_raise
+    return check_and_raise, 0
@@ -136,13 +136,7 @@ def {scalar_op_fn_name}({', '.join(input_names)}):
 
     # signature = create_numba_signature(node, force_scalar=True)
 
-    return numba_basic.numba_njit(
-        # signature,
-        # Functions that call a function pointer can't be cached
-        no_cfunc_wrapper=True,
-        no_cpython_wrapper=True,
-        register_jitable=False,
-    )(scalar_op_fn)
+    return numba_basic.numba_njit(scalar_op_fn)
 
 
 @numba_funcify.register(Switch)
 
@@ -101,9 +101,8 @@ def {function_name}({", ".join(input_names)}):
         subtensor_def_src,
         function_name=function_name,
         global_env=globals() | {"np": np},
-        key=hash_from_code(subtensor_def_src),
     )
-    return numba_njit(func, boundscheck=True)
+    return numba_njit(func, boundscheck=True), hash_from_code(subtensor_def_src)
 
 
 @numba_funcify.register(AdvancedSubtensor)
@@ -350,7 +349,7 @@ def advancedincsubtensor1_inplace(x, vals, idxs):
                 return x
 
     if inplace:
-        return advancedincsubtensor1_inplace
+        return advancedincsubtensor1_inplace, 0
 
     else:
 
@@ -359,4 +358,4 @@ def advancedincsubtensor1(x, vals, idxs):
             x = x.copy()
             return advancedincsubtensor1_inplace(x, vals, idxs)
 
-        return advancedincsubtensor1
+        return advancedincsubtensor1, 0