Merge pull request #221 from SwayamInSync/216

ngoldbaum · web-flow · commit 8487df328eec · 2025-11-07T13:50:31.000-07:00
diff --git a/quaddtype/README.md b/quaddtype/README.md
@@ -31,6 +31,7 @@ np.array([1,2,3], dtype=QuadPrecDType("longdouble"))
 - **CMake** (≥3.15)
 - **Python 3.10+**
 - **Git**
+- **NumPy >= 2.4** (build from source)
 
 ### Linux/Unix/macOS
 
@@ -48,11 +49,11 @@ pip install numpy pytest
 # export CFLAGS="-DDISABLE_QUADBLAS"
 # export CXXFLAGS="-DDISABLE_QUADBLAS"
 
-python -m pip install . -v
+python -m pip install . -v --no-build-isolation
 
 # Run the tests
 cd ..
-python -m pytest
+python -m pytest/quaddtype/tests/
 ```
 
 ### Windows
@@ -94,14 +95,14 @@ python -m pytest
 
    ```powershell
    # Build and install the package
-   python -m pip install . -v
+   python -m pip install . -v --no-build-isolation
    ```
 
 5. **Test Installation**
 
    ```powershell
    # Run tests
-   pytest -s tests/
+   pytest -s ..\quaddtype\tests\
    ```
 
 6. **QBLAS Disabled**: QuadBLAS optimization is automatically disabled on Windows builds due to MSVC compatibility issues. This is handled by the `-DDISABLE_QUADBLAS` compiler flag.
@@ -112,3 +113,58 @@ python -m pytest
    - VS 2017: `"Visual Studio 15 2017"`
 
 8. **Architecture**: The instructions are for x64. For x86 builds, change `-A x64` to `-A Win32`.
+
+## Building with ThreadSanitizer (TSan)
+
+This is a development feature to help detect threading issues. To build `numpy-quaddtype` with TSan enabled, follow these steps:
+
+> Use of clang is recommended with machine NOT supporting `libquadmath` (like ARM64). Set the compiler to clang/clang++ before proceeding.
+> ```bash
+> export CC=clang
+> export CXX=clang++
+> ```
+
+1. Compile free-threaded CPython with TSan support. Follow the [Python Free-Threading Guide](https://py-free-threading.github.io/thread_sanitizer/#compile-free-threaded-cpython-with-tsan) for detailed instructions.
+2. Create and activate a virtual environment using the TSan-enabled Python build.
+3. Installing dependencies:
+
+  ```bash
+  python -m pip install meson meson-python wheel ninja
+  # Need NumPy built with TSan as well
+  python -m pip install "numpy @ git+https://github.com/numpy/numpy" -C'setup-args=-Db_sanitize=thread'
+  ```
+4. Building SLEEF with TSan:
+
+  ```bash
+  # clone the repository
+  git clone -b 3.8 https://github.com/shibatch/sleef.git
+  cd sleef
+  
+  # Build SLEEF with TSan
+  cmake \
+  -DCMAKE_C_COMPILER=clang \
+  -DCMAKE_CXX_COMPILER=clang++ \
+  -DCMAKE_C_FLAGS="-fsanitize=thread -g -O1" \
+  -DCMAKE_CXX_FLAGS="-fsanitize=thread -g -O1" \
+  -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=thread" \
+  -DCMAKE_SHARED_LINKER_FLAGS="-fsanitize=thread" \
+  -DSLEEF_BUILD_QUAD=ON \
+  -DSLEEF_BUILD_TESTS=OFF \
+  -S . -B build
+
+  cmake --build build -j
+
+  # Install the built library and headers into the system path (/usr/local)
+  sudo cmake --install build --prefix=/usr/local
+  ```
+5. Build and install `numpy-quaddtype` with TSan:
+
+  ```bash
+  # SLEEF is already installed with TSan, we need to provide proper flags to numpy-quaddtype's meson file
+  # So that it does not build SLEEF again and use the installed one.
+
+  export CFLAGS="-fsanitize=thread -g -O0" 
+  export CXXFLAGS="-fsanitize=thread -g -O0"
+  export LDFLAGS="-fsanitize=thread"
+  python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread
+  ```
diff --git a/quaddtype/numpy_quaddtype/_quaddtype_main.pyi b/quaddtype/numpy_quaddtype/_quaddtype_main.pyi
@@ -1,5 +1,5 @@
 from typing import Any, Literal, TypeAlias, final, overload
-
+import builtins
 import numpy as np
 from numpy._typing import _128Bit  # pyright: ignore[reportPrivateUsage]
 from typing_extensions import Never, Self, override
@@ -157,9 +157,10 @@ class QuadPrecision(np.floating[_128Bit]):
     # NOTE: is_integer() and as_integer_ratio() are defined on numpy.floating in the
     # stubs, but don't exist at runtime. And because QuadPrecision does not implement
     # them, we use this hacky workaround to emulate their absence.
-    # TODO: Remove after https://github.com/numpy/numpy-user-dtypes/issues/216
-    is_integer: Never  # pyright: ignore[reportIncompatibleMethodOverride]
-    as_integer_ratio: Never  # pyright: ignore[reportIncompatibleMethodOverride]
+    @override
+    def is_integer(self, /) -> builtins.bool: ...
+    @override
+    def as_integer_ratio(self, /) -> tuple[int, int]: ...
 
 #
 def is_longdouble_128() -> bool: ...
diff --git a/quaddtype/numpy_quaddtype/src/scalar.c b/quaddtype/numpy_quaddtype/src/scalar.c
@@ -22,6 +22,18 @@
 // src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
 #define SLEEF_QUAD_DECIMAL_DIG 36
 
+#if PY_VERSION_HEX < 0x30d00b3
+static PyThread_type_lock sleef_lock;
+#define LOCK_SLEEF PyThread_acquire_lock(sleef_lock, WAIT_LOCK)
+#define UNLOCK_SLEEF PyThread_release_lock(sleef_lock)
+#else
+static PyMutex sleef_lock = {0};
+#define LOCK_SLEEF PyMutex_Lock(&sleef_lock)
+#define UNLOCK_SLEEF PyMutex_Unlock(&sleef_lock)
+#endif
+
+
+
 
 QuadPrecisionObject *
 QuadPrecision_raw_new(QuadBackendType backend)
@@ -419,6 +431,187 @@ QuadPrecision_get_imag(QuadPrecisionObject *self, void *closure)
     return (PyObject *)QuadPrecision_raw_new(self->backend);
 }
 
+// Method implementations for float compatibility
+static PyObject *
+QuadPrecision_is_integer(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
+{
+    Sleef_quad value;
+    
+    if (self->backend == BACKEND_SLEEF) {
+        value = self->value.sleef_value;
+    }
+    else {
+        // lets also tackle ld from sleef functions as well
+        value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
+    }
+    
+    if(Sleef_iunordq1(value, value)) {
+      Py_RETURN_FALSE;
+    }
+    
+    // Check if value is finite (not inf or nan)
+    Sleef_quad abs_value = Sleef_fabsq1(value);
+    Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
+    int32_t is_finite = Sleef_icmpltq1(abs_value, pos_inf);
+    
+    if (!is_finite) {
+        Py_RETURN_FALSE;
+    }
+    
+    // Check if value equals its truncated version
+    Sleef_quad truncated = Sleef_truncq1(value);
+    int32_t is_equal = Sleef_icmpeqq1(value, truncated);
+    
+    if (is_equal) {
+        Py_RETURN_TRUE;
+    }
+    else {
+        Py_RETURN_FALSE;
+    }
+}
+
+PyObject* quad_to_pylong(Sleef_quad value)
+{
+    char buffer[128];
+
+    // Sleef_snprintf call is thread-unsafe
+    LOCK_SLEEF;
+    // Format as integer (%.0Qf gives integer with no decimal places)
+    // Q modifier means pass Sleef_quad by value
+    int written = Sleef_snprintf(buffer, sizeof(buffer), "%.0Qf", value);
+    UNLOCK_SLEEF;
+    if (written < 0 || written >= sizeof(buffer)) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to convert quad to string");
+        return NULL;
+    }
+
+    PyObject *result = PyLong_FromString(buffer, NULL, 10);
+    
+    if (result == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to parse integer string");
+        return NULL;
+    }
+    
+    return result;
+}
+
+// inspired by the CPython implementation
+// https://github.com/python/cpython/blob/ac1ffd77858b62d169a08040c08aa5de26e145ac/Objects/floatobject.c#L1503C1-L1572C2
+static PyObject *
+QuadPrecision_as_integer_ratio(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
+{
+
+    Sleef_quad value;
+    Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
+    const int FLOAT128_PRECISION = 113;
+    
+    if (self->backend == BACKEND_SLEEF) {
+        value = self->value.sleef_value;
+    }
+    else {
+        // lets also tackle ld from sleef functions as well
+        value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
+    }
+    
+    if(Sleef_iunordq1(value, value)) {
+      PyErr_SetString(PyExc_ValueError, "Cannot convert NaN to integer ratio");
+      return NULL;
+    }
+    if(Sleef_icmpgeq1(Sleef_fabsq1(value), pos_inf)) {
+      PyErr_SetString(PyExc_OverflowError, "Cannot convert infinite value to integer ratio");
+      return NULL;
+    }
+
+    // Sleef_value == float_part * 2**exponent exactly
+    int exponent;
+    Sleef_quad mantissa = Sleef_frexpq1(value, &exponent); // within [0.5, 1.0)
+
+    /*
+    CPython loops for 300 (some huge number) to make sure 
+    float_part gets converted to the floor(float_part) i.e. near integer as
+    
+    for (i=0; i<300 && float_part != floor(float_part) ; i++) {
+        float_part *= 2.0;
+        exponent--;
+    }
+
+    It seems highly inefficient from performance perspective, maybe they pick 300 for future-proof
+    or If FLT_RADIX != 2, the 300 steps may leave a tiny fractional part
+
+    Another way can be doing as:
+    ```
+    mantissa = ldexpq(mantissa, FLOAT128_PRECISION);
+    exponent -= FLOAT128_PRECISION;
+    ```
+    This should work but give non-simplified, huge integers (although they also come down to same representation)
+    We can also do gcd to find simplified values, but it'll add more O(log(N))
+    For the sake of simplicity and fixed 128-bit nature, we will loop till 113 only
+    */
+
+    for (int i = 0; i < FLOAT128_PRECISION && !Sleef_icmpeqq1(mantissa, Sleef_floorq1(mantissa)); i++) {
+        mantissa = Sleef_mulq1_u05(mantissa, Sleef_cast_from_doubleq1(2.0));
+        exponent--;
+    }
+
+    // numerator and denominators can't fit in int
+    // convert items to PyLongObject from string instead
+    PyObject *py_exp = PyLong_FromLongLong(Py_ABS(exponent));
+    if(py_exp == NULL)
+    {
+        return NULL;
+    }
+    
+    PyObject *numerator = quad_to_pylong(mantissa);
+    if(numerator == NULL)
+    {
+        Py_DECREF(py_exp);  
+        return NULL;
+    }
+    PyObject *denominator = PyLong_FromLong(1);
+    if (denominator == NULL) {
+        Py_DECREF(py_exp);
+        Py_DECREF(numerator);
+        return NULL;
+    }
+
+    // fold in 2**exponent
+    if(exponent > 0)
+    {
+        PyObject *new_num = PyNumber_Lshift(numerator, py_exp);
+        Py_DECREF(numerator);
+        if(new_num == NULL)
+        {
+            Py_DECREF(denominator);
+            Py_DECREF(py_exp);
+            return NULL;
+        }
+        numerator = new_num;
+    }
+    else
+    {
+        PyObject *new_denom = PyNumber_Lshift(denominator, py_exp);
+        Py_DECREF(denominator);
+        if(new_denom == NULL)
+        {
+            Py_DECREF(numerator);
+            Py_DECREF(py_exp);
+            return NULL;
+        }
+        denominator = new_denom;
+    }
+
+    Py_DECREF(py_exp);
+    return PyTuple_Pack(2, numerator, denominator);
+}
+
+static PyMethodDef QuadPrecision_methods[] = {
+    {"is_integer", (PyCFunction)QuadPrecision_is_integer, METH_NOARGS,
+     "Return True if the value is an integer."},
+    {"as_integer_ratio", (PyCFunction)QuadPrecision_as_integer_ratio, METH_NOARGS,
+     "Return a pair of integers whose ratio is exactly equal to the original value."},
+    {NULL, NULL, 0, NULL}  /* Sentinel */
+};
+
 static PyGetSetDef QuadPrecision_getset[] = {
     {"real", (getter)QuadPrecision_get_real, NULL, "Real part of the scalar", NULL},
     {"imag", (getter)QuadPrecision_get_imag, NULL, "Imaginary part of the scalar (always 0 for real types)", NULL},
@@ -436,12 +629,20 @@ PyTypeObject QuadPrecision_Type = {
         .tp_as_number = &quad_as_scalar,
         .tp_as_buffer = &QuadPrecision_as_buffer,
         .tp_richcompare = (richcmpfunc)quad_richcompare,
+        .tp_methods = QuadPrecision_methods,
         .tp_getset = QuadPrecision_getset,
 };
 
 int
 init_quadprecision_scalar(void)
 {
+#if PY_VERSION_HEX < 0x30d00b3
+    sleef_lock = PyThread_allocate_lock();
+    if (sleef_lock == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+#endif
     QuadPrecision_Type.tp_base = &PyFloatingArrType_Type;
     return PyType_Ready(&QuadPrecision_Type);
 }
diff --git a/quaddtype/reinstall.sh b/quaddtype/reinstall.sh
@@ -8,7 +8,11 @@ if [ -d "build/" ]; then
     rm -rf subprojects/sleef
 fi
 
-# export CFLAGS="-g -O0" 
-# export CXXFLAGS="-g -O0"
 python -m pip uninstall -y numpy_quaddtype
 python -m pip install . -vv --no-build-isolation 2>&1 | tee build_log.txt
+
+# for debugging and TSAN builds, comment the above line and uncomment all below:
+# export CFLAGS="-fsanitize=thread -g -O0" 
+# export CXXFLAGS="-fsanitize=thread -g -O0"
+# export LDFLAGS="-fsanitize=thread"
+# CC=clang CXX=clang++ python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread 2>&1 | tee build_log.txt
diff --git a/quaddtype/subprojects/packagefiles/sleef/meson.build b/quaddtype/subprojects/packagefiles/sleef/meson.build
@@ -12,6 +12,7 @@ if host_machine.system() == 'windows'
   parallel_flag = []
 endif
 
+# For building sleef with TSan, delete the sleef subproject and follow the README instructions to build sleef externally.
 sleef_configure = run_command([
     cmake, 
     '-S', meson.current_source_dir(),
diff --git a/quaddtype/tests/test_multithreading.py b/quaddtype/tests/test_multithreading.py
diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py