Gemm optimizer spends too much time creating constants of the wrong type and then casting them

ricardoV94 · ricardoV94 · commit 6cf584248fbc · 2025-09-05T23:13:02.000+02:00
diff --git a/pytensor/tensor/rewriting/blas.py b/pytensor/tensor/rewriting/blas.py
@@ -83,7 +83,13 @@
 )
 from pytensor.graph.rewriting.db import SequenceDB
 from pytensor.graph.utils import InconsistencyError
-from pytensor.tensor import basic as ptb
+from pytensor.tensor import as_tensor_variable
+from pytensor.tensor.basic import (
+    AllocEmpty,
+    cast,
+    get_underlying_scalar_constant_value,
+    zeros,
+)
 from pytensor.tensor.blas import (
     Dot22,
     _batched_dot,
@@ -143,7 +149,7 @@ def _as_scalar(res, dtype=None):
             # as the cast of the scalar can be done before or after the dot22
             # and this will give the same result.
             if pytensor.scalar.upcast(res.dtype, dtype) == dtype:
-                return ptb.cast(rval, dtype)
+                return cast(rval, dtype)
             else:
                 return None
 
@@ -358,13 +364,13 @@ def _gemm_from_factored_list(fgraph, lst):
         # sM can be a tuple of 2 elements or an PyTensor variable.
         if isinstance(sM, tuple):
             sm0, sm1 = sM
-            sm0 = ptb.as_tensor_variable(sm0)
-            sm0_dtype = sm0.type.dtype
             sm1_dtype = sm1.type.dtype
+            sm0 = as_tensor_variable(sm0, dtype=sm1_dtype)
+            sm0_dtype = sm0.type.dtype
             if sm0_dtype == sm1_dtype:
                 lst2.append((sm0, sm1))
             elif upcast(sm0_dtype, sm1_dtype) == sm1_dtype:
-                lst2.append((ptb.cast(sm0, sm1_dtype), sm1))
+                lst2.append((cast(sm0, sm1_dtype), sm1))
 
     lst = lst2
 
@@ -654,7 +660,7 @@ def local_gemm_to_ger(fgraph, node):
         xv = x.dimshuffle(0)
         yv = y.dimshuffle(1)
         try:
-            bval = ptb.get_underlying_scalar_constant_value(b)
+            bval = get_underlying_scalar_constant_value(b)
         except NotScalarConstantError:
             # b isn't a constant, GEMM is doing useful pre-scaling
             return
@@ -663,8 +669,7 @@ def local_gemm_to_ger(fgraph, node):
             rval = ger(z, a, xv, yv)
             new_out = [rval]
         elif bval == 0:  # GER on zeros_like should be faster than GEMM
-            zeros = ptb.zeros([x.shape[0], y.shape[1]], x.dtype)
-            rval = ger(zeros, a, xv, yv)
+            rval = ger(zeros([x.shape[0], y.shape[1]], x.dtype), a, xv, yv)
             new_out = [rval]
         else:
             # if bval is another constant, then z is being usefully
@@ -681,32 +686,32 @@ def local_dot22_to_ger_or_gemv(fgraph, node):
     x, y = node.inputs
     xb = x.broadcastable
     yb = y.broadcastable
-    one = ptb.as_tensor_variable(np.asarray(1, dtype=x.dtype))
-    zero = ptb.as_tensor_variable(np.asarray(0, dtype=x.dtype))
+    one = as_tensor_variable(np.asarray(1, dtype=x.dtype))
+    zero = as_tensor_variable(np.asarray(0, dtype=x.dtype))
     if xb[1] and yb[0]:
         # x and y are both vectors so this might qualifies for a GER
         xv = x.dimshuffle(0)
         yv = y.dimshuffle(1)
-        zeros = ptb.zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
+        zeros = zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
         rval = ger(zeros, one, xv, yv)
         new_out = [rval]
     elif xb[0] and yb[1]:
         # x and y are both vectors so this qualifies for a sdot / ddot
         # PyTensor's CGemv will call sdot/ddot at runtime, the Scipy Gemv may not
         xv = x.dimshuffle(1)
-        zeros = ptb.AllocEmpty(x.dtype)(1)
+        zeros = AllocEmpty(x.dtype)(1)
         rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
         new_out = [rval.dimshuffle("x", 0)]
     elif xb[0] and not yb[0] and not yb[1]:
         # x is vector, y is matrix so try gemv
         xv = x.dimshuffle(1)
-        zeros = ptb.AllocEmpty(x.dtype)(y.shape[1])
+        zeros = AllocEmpty(x.dtype)(y.shape[1])
         rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
         new_out = [rval.dimshuffle("x", 0)]
     elif not xb[0] and not xb[1] and yb[1]:
         # x is matrix, y is vector, try gemv
         yv = y.dimshuffle(0)
-        zeros = ptb.AllocEmpty(x.dtype)(x.shape[0])
+        zeros = AllocEmpty(x.dtype)(x.shape[0])
         rval = gemv_no_inplace(zeros, one, x, yv, zero)
         new_out = [rval.dimshuffle(0, "x")]
     else:
@@ -841,9 +846,7 @@ def local_dot22_to_dot22scalar(fgraph, node):
                 " matrix type"
             )
             return False
-        a = ptb.cast(
-            _as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype
-        )
+        a = cast(_as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype)
         assert not a.type.ndim
         dot = _dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)
 
@@ -881,7 +884,7 @@ def local_dot22_to_dot22scalar(fgraph, node):
     o.remove(d)
     o.remove(s)
 
-    a = ptb.cast(i_scalar[scalar_idx], d.type.dtype)
+    a = cast(i_scalar[scalar_idx], d.type.dtype)
     assert not a.type.ndim
     if len(o) == 0:
         return [_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)]