From bda6c5b4c307fc150773bc56807c5d994f437c73 Mon Sep 17 00:00:00 2001
From: chillenb <chillenbrand15@gmail.com>
Date: Mon, 4 Nov 2024 20:07:13 -0500
Subject: [PATCH 1/3] minimal pyproject.toml to prepare for Pip 25.0

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 pyproject.toml
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..feb5d4d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools >= 64"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file

From 80bbe2be0664f27eaee68588d5779f09ea9db350 Mon Sep 17 00:00:00 2001
From: chillenb <chillenbrand15@gmail.com>
Date: Mon, 4 Nov 2024 20:09:08 -0500
Subject: [PATCH 2/3] Add TBLIS tensor_add and tensor_dot; do not skip RPATH
 when linking external TBLIS

---
 pyscf/tblis_einsum/CMakeLists.txt       |  31 ++---
 pyscf/tblis_einsum/__init__.py          |   2 +-
 pyscf/tblis_einsum/as_einsum.cxx        |  45 ++++++-
 pyscf/tblis_einsum/tblis_einsum.py      | 149 +++++++++++++++++++-----
 pyscf/tblis_einsum/tests/test_einsum.py |  20 +++-
 5 files changed, 201 insertions(+), 46 deletions(-)

diff --git a/pyscf/tblis_einsum/CMakeLists.txt b/pyscf/tblis_einsum/CMakeLists.txt
index 534fc46..7cb42b4 100644
--- a/pyscf/tblis_einsum/CMakeLists.txt
+++ b/pyscf/tblis_einsum/CMakeLists.txt
@@ -30,25 +30,30 @@ if (CMAKE_COMPILER_IS_GNUCC) # Does it skip the link flag on old OsX?
   endif()
 endif()
 
-# See also https://gitlab.kitware.com/cmake/community/wikis/doc/cmake/RPATH-handling
-if (WIN32)
-  #?
-elseif (APPLE)
-  set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-  set(CMAKE_INSTALL_RPATH "@loader_path")
-  set(CMAKE_BUILD_RPATH "@loader_path")
-else ()
-  set(CMAKE_SKIP_BUILD_RPATH  True)
-  set(CMAKE_BUILD_WITH_INSTALL_RPATH True)
-  set(CMAKE_INSTALL_RPATH "\$ORIGIN")
-endif ()
+option(VENDOR_TBLIS "Download and build tblis" on)
+
+if(VENDOR_TBLIS)
+  # The following is needed because TBLIS will be installed in the same folder
+  # as the built CPython extension.
+  # See also https://gitlab.kitware.com/cmake/community/wikis/doc/cmake/RPATH-handling
+  if (WIN32)
+    #?
+  elseif (APPLE)
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
+    set(CMAKE_INSTALL_RPATH "@loader_path")
+    set(CMAKE_BUILD_RPATH "@loader_path")
+  else ()
+    set(CMAKE_SKIP_BUILD_RPATH  True)
+    set(CMAKE_BUILD_WITH_INSTALL_RPATH True)
+    set(CMAKE_INSTALL_RPATH "\$ORIGIN")
+  endif ()
+endif()
 
 set(C_LINK_TEMPLATE "<CMAKE_C_COMPILER> <CMAKE_SHARED_LIBRARY_C_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
 set(CXX_LINK_TEMPLATE "<CMAKE_CXX_COMPILER> <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
 
 add_library(tblis_einsum SHARED as_einsum.cxx)
 
-option(VENDOR_TBLIS "Download and build tblis" on)
 set_target_properties(tblis_einsum PROPERTIES
   LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}
   COMPILE_FLAGS "-std=c++11")
diff --git a/pyscf/tblis_einsum/__init__.py b/pyscf/tblis_einsum/__init__.py
index 14ff671..3342be8 100644
--- a/pyscf/tblis_einsum/__init__.py
+++ b/pyscf/tblis_einsum/__init__.py
@@ -1,3 +1,3 @@
 __version__ = '0.1.5'
 
-from .tblis_einsum import contract
+from .tblis_einsum import contract, tensor_add, tensor_mult, tensor_dot
diff --git a/pyscf/tblis_einsum/as_einsum.cxx b/pyscf/tblis_einsum/as_einsum.cxx
index 3deae64..def3d34 100644
--- a/pyscf/tblis_einsum/as_einsum.cxx
+++ b/pyscf/tblis_einsum/as_einsum.cxx
@@ -1,4 +1,5 @@
 #include <stddef.h>
+#include <stdlib.h>
 #include <tblis/tblis.h>
 using namespace tblis;
 
@@ -42,7 +43,7 @@ static void _as_tensor(tblis_tensor *t, void *data, int dtype, int ndim,
 }
 
 extern "C" {
-void as_einsum(void *data_A, int ndim_A, ptrdiff_t *shape_A, ptrdiff_t *strides_A, char *descr_A,
+void tensor_mult(void *data_A, int ndim_A, ptrdiff_t *shape_A, ptrdiff_t *strides_A, char *descr_A,
                void *data_B, int ndim_B, ptrdiff_t *shape_B, ptrdiff_t *strides_B, char *descr_B,
                void *data_C, int ndim_C, ptrdiff_t *shape_C, ptrdiff_t *strides_C, char *descr_C,
                int dtype, void *alpha, void *beta)
@@ -54,4 +55,46 @@ void as_einsum(void *data_A, int ndim_A, ptrdiff_t *shape_A, ptrdiff_t *strides_
 
     tblis_tensor_mult(NULL, NULL, &A, descr_A, &B, descr_B, &C, descr_C);
 }
+
+
+void tensor_add(void *data_A, int ndim_A, ptrdiff_t *shape_A, ptrdiff_t *strides_A, char *descr_A,
+               void *data_B, int ndim_B, ptrdiff_t *shape_B, ptrdiff_t *strides_B, char *descr_B,
+               int dtype, void *alpha, void *beta)
+{
+    tblis_tensor A, B;
+    _as_tensor(&A, data_A, dtype, ndim_A, shape_A, strides_A, alpha);
+    _as_tensor(&B, data_B, dtype, ndim_B, shape_B, strides_B, beta);
+
+    tblis_tensor_add(NULL, NULL, &A, descr_A, &B, descr_B);
+}
+
+void tensor_dot(void *data_A, int ndim_A, ptrdiff_t *shape_A, ptrdiff_t *strides_A, char *descr_A,
+               void *data_B, int ndim_B, ptrdiff_t *shape_B, ptrdiff_t *strides_B, char *descr_B,
+               int dtype, void *result)
+{
+    tblis_tensor A, B;
+    tblis_scalar s;
+    _as_tensor(&A, data_A, dtype, ndim_A, shape_A, strides_A, NULL);
+    _as_tensor(&B, data_B, dtype, ndim_B, shape_B, strides_B, NULL);
+
+    tblis_tensor_dot(NULL, NULL, &A, descr_A, &B, descr_B, &s);
+
+    ssize_t bytes;
+    switch(dtype) {
+        case TYPE_SINGLE:
+            bytes = sizeof(float);
+            break;
+        case TYPE_DOUBLE:
+            bytes = sizeof(double);
+            break;
+        case TYPE_SCOMPLEX:
+            bytes = sizeof(scomplex);
+            break;
+        case TYPE_DCOMPLEX:
+            bytes = sizeof(dcomplex);
+            break;
+    }
+
+    memcpy(result, &s.data, bytes);
 }
+}
\ No newline at end of file
diff --git a/pyscf/tblis_einsum/tblis_einsum.py b/pyscf/tblis_einsum/tblis_einsum.py
index 2fdfa98..834cd37 100644
--- a/pyscf/tblis_einsum/tblis_einsum.py
+++ b/pyscf/tblis_einsum/tblis_einsum.py
@@ -20,8 +20,8 @@
 
 libtblis = numpy.ctypeslib.load_library('libtblis_einsum', os.path.dirname(__file__))
 
-libtblis.as_einsum.restype = None
-libtblis.as_einsum.argtypes = (
+libtblis.tensor_mult.restype = None
+libtblis.tensor_mult.argtypes = (
     numpy.ctypeslib.ndpointer(), ctypes.c_int,
     ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(ctypes.c_size_t),
     ctypes.POINTER(ctypes.c_char),
@@ -35,6 +35,30 @@
     numpy.ctypeslib.ndpointer(), numpy.ctypeslib.ndpointer()
 )
 
+libtblis.tensor_add.restype = None
+libtblis.tensor_add.argtypes = (
+    numpy.ctypeslib.ndpointer(), ctypes.c_int,
+    ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(ctypes.c_size_t),
+    ctypes.POINTER(ctypes.c_char),
+    numpy.ctypeslib.ndpointer(), ctypes.c_int,
+    ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(ctypes.c_size_t),
+    ctypes.POINTER(ctypes.c_char),
+    ctypes.c_int,
+    numpy.ctypeslib.ndpointer(), numpy.ctypeslib.ndpointer()
+)
+
+libtblis.tensor_dot.restype = None
+libtblis.tensor_dot.argtypes = (
+    numpy.ctypeslib.ndpointer(), ctypes.c_int,
+    ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(ctypes.c_size_t),
+    ctypes.POINTER(ctypes.c_char),
+    numpy.ctypeslib.ndpointer(), ctypes.c_int,
+    ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(ctypes.c_size_t),
+    ctypes.POINTER(ctypes.c_char),
+    ctypes.c_int,
+    numpy.ctypeslib.ndpointer()
+)
+
 tblis_dtype = {
     numpy.dtype(numpy.float32)    : 0,
     numpy.dtype(numpy.double)     : 1,
@@ -44,6 +68,29 @@
 
 EINSUM_MAX_SIZE = 2000
 
+def ctype_strides(*arrays):
+    return [ (ctypes.c_size_t*arr.ndim)(*[x//arr.dtype.itemsize for x in arr.strides]) for arr in arrays ]
+
+def ctype_shapes(*arrays):
+    return [ (ctypes.c_size_t*arr.ndim)(*arr.shape) for arr in arrays ]
+
+def check_tblis_shapes(a, a_inds, b, b_inds, subscripts=None, c_inds=None):
+    a_shape_dic = dict(zip(a_inds, a.shape))
+    b_shape_dic = dict(zip(b_inds, b.shape))
+    if subscripts is None:
+        subscripts = a_inds + ',' + b_inds
+    if any(a_shape_dic[x] != b_shape_dic[x]
+           for x in set(a_inds).intersection(b_inds)):
+        raise ValueError('operands dimension error for "%s" : %s %s'
+                         % (subscripts, a.shape, b.shape))
+
+    if c_inds is not None:
+        ab_shape_dic = a_shape_dic
+        ab_shape_dic.update(b_shape_dic)
+        c_shape = tuple([ab_shape_dic[x] for x in c_inds])
+        return c_shape
+    return None
+
 _numpy_einsum = numpy.einsum
 def contract(subscripts, *tensors, **kwargs):
     '''
@@ -87,45 +134,87 @@ def contract(subscripts, *tensors, **kwargs):
     alpha = kwargs.get('alpha', 1)
     beta  = kwargs.get('beta', 0)
     c_dtype = numpy.result_type(c_dtype, alpha, beta)
-    alpha = numpy.asarray(alpha, dtype=c_dtype)
-    beta  = numpy.asarray(beta , dtype=c_dtype)
+
     a = numpy.asarray(a, dtype=c_dtype)
     b = numpy.asarray(b, dtype=c_dtype)
-    assert len(a_descr) == a.ndim
-    assert len(b_descr) == b.ndim
-    a_shape = a.shape
-    b_shape = b.shape
-    a_shape_dic = dict(zip(a_descr, a_shape))
-    b_shape_dic = dict(zip(b_descr, b_shape))
-    if any(a_shape_dic[x] != b_shape_dic[x]
-           for x in set(a_descr).intersection(b_descr)):
-        raise ValueError('operands dimension error for "%s" : %s %s'
-                         % (subscripts, a_shape, b_shape))
 
-    ab_shape_dic = a_shape_dic
-    ab_shape_dic.update(b_shape_dic)
-    c_shape = tuple([ab_shape_dic[x] for x in c_descr])
+    c_shape = check_tblis_shapes(a, a_descr, b, b_descr, subscripts=subscripts, c_inds=c_descr)
 
     out = kwargs.get('out', None)
     if out is None:
         order = kwargs.get('order', 'C')
         c = numpy.empty(c_shape, dtype=c_dtype, order=order)
     else:
-        assert(out.dtype == c_dtype)
-        assert(out.shape == c_shape)
         c = out
+    return tensor_mult(a, a_descr, b, b_descr, c, c_descr, alpha=alpha, beta=beta, dtype=c_dtype)
+
+def tensor_mult(a, a_inds, b, b_inds, c, c_inds, alpha=1, beta=0, dtype=None):
+    ''' Wrapper for tblis_tensor_mult
+
+    Performs the einsum operation
+    c_{c_inds} = alpha * SUM[a_{a_inds} * b_{b_inds}] + beta * c_{c_inds}
+    where the sum is over indices in a_inds and b_inds that are not in c_inds.
+    '''
 
-    a_shape = (ctypes.c_size_t*a.ndim)(*a_shape)
-    b_shape = (ctypes.c_size_t*b.ndim)(*b_shape)
-    c_shape = (ctypes.c_size_t*c.ndim)(*c_shape)
+    if dtype is None:
+        dtype = c.dtype.type
+    assert dtype == c.dtype.type
+    assert dtype == a.dtype.type
+    assert dtype == b.dtype.type
 
-    nbytes = c_dtype.itemsize
-    a_strides = (ctypes.c_size_t*a.ndim)(*[x//nbytes for x in a.strides])
-    b_strides = (ctypes.c_size_t*b.ndim)(*[x//nbytes for x in b.strides])
-    c_strides = (ctypes.c_size_t*c.ndim)(*[x//nbytes for x in c.strides])
+    alpha = numpy.asarray(alpha, dtype=dtype)
+    beta  = numpy.asarray(beta , dtype=dtype)
 
-    libtblis.as_einsum(a, a.ndim, a_shape, a_strides, a_descr.encode('ascii'),
-                       b, b.ndim, b_shape, b_strides, b_descr.encode('ascii'),
-                       c, c.ndim, c_shape, c_strides, c_descr.encode('ascii'),
-                       tblis_dtype[c_dtype], alpha, beta)
+    assert len(a_inds) == a.ndim
+    assert len(b_inds) == b.ndim
+    assert len(c_inds) == c.ndim
+
+    a_shape, b_shape, c_shape = ctype_shapes(a, b, c)
+    a_strides, b_strides, c_strides = ctype_strides(a, b, c)
+    assert c.shape == check_tblis_shapes(a, a_inds, b, b_inds, c_inds=c_inds)
+
+
+    libtblis.tensor_mult(a, a.ndim, a_shape, a_strides, a_inds.encode('ascii'),
+                       b, b.ndim, b_shape, b_strides, b_inds.encode('ascii'),
+                       c, c.ndim, c_shape, c_strides, c_inds.encode('ascii'),
+                       tblis_dtype[c.dtype], alpha, beta)
     return c
+
+
+def tensor_add(a, a_inds, b, b_inds, alpha=1, beta=1):
+    '''Wrapper for tblis_tensor_add
+    '''
+    assert a.dtype.type == b.dtype.type
+
+    alpha = numpy.asarray(alpha, dtype=b.dtype)
+    beta  = numpy.asarray(beta , dtype=b.dtype)
+
+    assert len(a_inds) == a.ndim
+    assert len(b_inds) == b.ndim
+
+    a_shape, b_shape = ctype_shapes(a, b)
+    a_strides, b_strides = ctype_strides(a, b)
+
+    libtblis.tensor_add(a, a.ndim, a_shape, a_strides, a_inds.encode('ascii'),
+                       b, b.ndim, b_shape, b_strides, b_inds.encode('ascii'),
+                       tblis_dtype[b.dtype], alpha, beta)
+
+def tensor_dot(a, a_inds, b, b_inds):
+    '''Wrapper for tblis_tensor_dot
+    '''
+
+    assert a.dtype.type == b.dtype.type
+
+    assert len(a_inds) == a.ndim
+    assert len(b_inds) == b.ndim
+
+    a_shape, b_shape = ctype_shapes(a, b)
+    a_strides, b_strides = ctype_strides(a, b)
+
+    result = numpy.zeros(1, dtype=a.dtype.type)
+
+    libtblis.tensor_dot(a, a.ndim, a_shape, a_strides, a_inds.encode('ascii'),
+                       b, b.ndim, b_shape, b_strides, b_inds.encode('ascii'),
+                       tblis_dtype[b.dtype], result)
+
+    return result[0]
diff --git a/pyscf/tblis_einsum/tests/test_einsum.py b/pyscf/tblis_einsum/tests/test_einsum.py
index 64877d5..0df7789 100644
--- a/pyscf/tblis_einsum/tests/test_einsum.py
+++ b/pyscf/tblis_einsum/tests/test_einsum.py
@@ -1,6 +1,6 @@
 import unittest
 import numpy as np
-from pyscf.tblis_einsum import tblis_einsum
+from pyscf.tblis_einsum import tblis_einsum, tensor_add, tensor_mult, tensor_dot
 
 def setUpModule():
     global bak
@@ -129,6 +129,24 @@ def test_contraction6(self):
         res_tblis = tblis_einsum.contract("ij,jk->k", c, c)
         self.assertTrue (abs(res_np - res_tblis).max() < 1e-13)
 
+    def test_tensor_add(self):
+        a = np.random.random((3,3,3,3))
+        b = np.random.random((3,3,3,3))
+        c0 = np.transpose(a, (1,2,3,0)) + b
+        tensor_add(a, 'lijk', b, 'ijkl')
+        print(abs(c0-b).max())
+        self.assertTrue(abs(c0-b).max() < 1e-14)
+
+    def test_tensor_add_scaled(self):
+        a = np.random.random((7,1,3,4))
+        b = np.random.random((7,1,3,4))
+        alpha = 2.0
+        beta = 3.0
+        c0 = alpha*a + beta*b
+        tensor_add(a, 'ijkl', b, 'ijkl', alpha=alpha, beta=beta)
+        self.assertTrue(abs(c0-b).max() < 1e-14)
+
+
 
 if __name__ == '__main__':
     unittest.main()

From 7bc70f9238ff3e4728deebc4d771fdb5245e3885 Mon Sep 17 00:00:00 2001
From: chillenb <chillenbrand15@gmail.com>
Date: Mon, 4 Nov 2024 20:22:55 -0500
Subject: [PATCH 3/3] add tensor_dot test

---
 pyscf/tblis_einsum/tests/test_einsum.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pyscf/tblis_einsum/tests/test_einsum.py b/pyscf/tblis_einsum/tests/test_einsum.py
index 0df7789..d3145f2 100644
--- a/pyscf/tblis_einsum/tests/test_einsum.py
+++ b/pyscf/tblis_einsum/tests/test_einsum.py
@@ -134,7 +134,6 @@ def test_tensor_add(self):
         b = np.random.random((3,3,3,3))
         c0 = np.transpose(a, (1,2,3,0)) + b
         tensor_add(a, 'lijk', b, 'ijkl')
-        print(abs(c0-b).max())
         self.assertTrue(abs(c0-b).max() < 1e-14)
 
     def test_tensor_add_scaled(self):
@@ -146,6 +145,13 @@ def test_tensor_add_scaled(self):
         tensor_add(a, 'ijkl', b, 'ijkl', alpha=alpha, beta=beta)
         self.assertTrue(abs(c0-b).max() < 1e-14)
 
+    def test_tensor_dot(self):
+        a = np.random.random((3,3,3,3))
+        b = np.random.random((3,3,3,3))
+        ans = np.einsum('lijk,ijkl->', a, b)
+        ans2 = tensor_dot(a, 'lijk', b, 'ijkl')
+        self.assertTrue(abs(ans-ans2) < 1e-14)
+
 
 
 if __name__ == '__main__':