Skip to content

Commit 8487df3

Browse files
authored
Merge pull request #221 from SwayamInSync/216
2 parents 00fa954 + 6856f5f commit 8487df3

File tree

7 files changed

+421
-11
lines changed

7 files changed

+421
-11
lines changed

quaddtype/README.md

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ np.array([1,2,3], dtype=QuadPrecDType("longdouble"))
3131
- **CMake** (≥3.15)
3232
- **Python 3.10+**
3333
- **Git**
34+
- **NumPy >= 2.4** (build from source)
3435

3536
### Linux/Unix/macOS
3637

@@ -48,11 +49,11 @@ pip install numpy pytest
4849
# export CFLAGS="-DDISABLE_QUADBLAS"
4950
# export CXXFLAGS="-DDISABLE_QUADBLAS"
5051

51-
python -m pip install . -v
52+
python -m pip install . -v --no-build-isolation
5253

5354
# Run the tests
5455
cd ..
55-
python -m pytest
56+
python -m pytest/quaddtype/tests/
5657
```
5758

5859
### Windows
@@ -94,14 +95,14 @@ python -m pytest
9495

9596
```powershell
9697
# Build and install the package
97-
python -m pip install . -v
98+
python -m pip install . -v --no-build-isolation
9899
```
99100

100101
5. **Test Installation**
101102

102103
```powershell
103104
# Run tests
104-
pytest -s tests/
105+
pytest -s ..\quaddtype\tests\
105106
```
106107

107108
6. **QBLAS Disabled**: QuadBLAS optimization is automatically disabled on Windows builds due to MSVC compatibility issues. This is handled by the `-DDISABLE_QUADBLAS` compiler flag.
@@ -112,3 +113,58 @@ python -m pytest
112113
- VS 2017: `"Visual Studio 15 2017"`
113114

114115
8. **Architecture**: The instructions are for x64. For x86 builds, change `-A x64` to `-A Win32`.
116+
117+
## Building with ThreadSanitizer (TSan)
118+
119+
This is a development feature to help detect threading issues. To build `numpy-quaddtype` with TSan enabled, follow these steps:
120+
121+
> Use of clang is recommended with machine NOT supporting `libquadmath` (like ARM64). Set the compiler to clang/clang++ before proceeding.
122+
> ```bash
123+
> export CC=clang
124+
> export CXX=clang++
125+
> ```
126+
127+
1. Compile free-threaded CPython with TSan support. Follow the [Python Free-Threading Guide](https://py-free-threading.github.io/thread_sanitizer/#compile-free-threaded-cpython-with-tsan) for detailed instructions.
128+
2. Create and activate a virtual environment using the TSan-enabled Python build.
129+
3. Installing dependencies:
130+
131+
```bash
132+
python -m pip install meson meson-python wheel ninja
133+
# Need NumPy built with TSan as well
134+
python -m pip install "numpy @ git+https://github.com/numpy/numpy" -C'setup-args=-Db_sanitize=thread'
135+
```
136+
4. Building SLEEF with TSan:
137+
138+
```bash
139+
# clone the repository
140+
git clone -b 3.8 https://github.com/shibatch/sleef.git
141+
cd sleef
142+
143+
# Build SLEEF with TSan
144+
cmake \
145+
-DCMAKE_C_COMPILER=clang \
146+
-DCMAKE_CXX_COMPILER=clang++ \
147+
-DCMAKE_C_FLAGS="-fsanitize=thread -g -O1" \
148+
-DCMAKE_CXX_FLAGS="-fsanitize=thread -g -O1" \
149+
-DCMAKE_EXE_LINKER_FLAGS="-fsanitize=thread" \
150+
-DCMAKE_SHARED_LINKER_FLAGS="-fsanitize=thread" \
151+
-DSLEEF_BUILD_QUAD=ON \
152+
-DSLEEF_BUILD_TESTS=OFF \
153+
-S . -B build
154+
155+
cmake --build build -j
156+
157+
# Install the built library and headers into the system path (/usr/local)
158+
sudo cmake --install build --prefix=/usr/local
159+
```
160+
5. Build and install `numpy-quaddtype` with TSan:
161+
162+
```bash
163+
# SLEEF is already installed with TSan, we need to provide proper flags to numpy-quaddtype's meson file
164+
# So that it does not build SLEEF again and use the installed one.
165+
166+
export CFLAGS="-fsanitize=thread -g -O0"
167+
export CXXFLAGS="-fsanitize=thread -g -O0"
168+
export LDFLAGS="-fsanitize=thread"
169+
python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread
170+
```

quaddtype/numpy_quaddtype/_quaddtype_main.pyi

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import Any, Literal, TypeAlias, final, overload
2-
2+
import builtins
33
import numpy as np
44
from numpy._typing import _128Bit # pyright: ignore[reportPrivateUsage]
55
from typing_extensions import Never, Self, override
@@ -157,9 +157,10 @@ class QuadPrecision(np.floating[_128Bit]):
157157
# NOTE: is_integer() and as_integer_ratio() are defined on numpy.floating in the
158158
# stubs, but don't exist at runtime. And because QuadPrecision does not implement
159159
# them, we use this hacky workaround to emulate their absence.
160-
# TODO: Remove after https://github.com/numpy/numpy-user-dtypes/issues/216
161-
is_integer: Never # pyright: ignore[reportIncompatibleMethodOverride]
162-
as_integer_ratio: Never # pyright: ignore[reportIncompatibleMethodOverride]
160+
@override
161+
def is_integer(self, /) -> builtins.bool: ...
162+
@override
163+
def as_integer_ratio(self, /) -> tuple[int, int]: ...
163164

164165
#
165166
def is_longdouble_128() -> bool: ...

quaddtype/numpy_quaddtype/src/scalar.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@
2222
// src: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
2323
#define SLEEF_QUAD_DECIMAL_DIG 36
2424

25+
#if PY_VERSION_HEX < 0x30d00b3
26+
static PyThread_type_lock sleef_lock;
27+
#define LOCK_SLEEF PyThread_acquire_lock(sleef_lock, WAIT_LOCK)
28+
#define UNLOCK_SLEEF PyThread_release_lock(sleef_lock)
29+
#else
30+
static PyMutex sleef_lock = {0};
31+
#define LOCK_SLEEF PyMutex_Lock(&sleef_lock)
32+
#define UNLOCK_SLEEF PyMutex_Unlock(&sleef_lock)
33+
#endif
34+
35+
36+
2537

2638
QuadPrecisionObject *
2739
QuadPrecision_raw_new(QuadBackendType backend)
@@ -419,6 +431,187 @@ QuadPrecision_get_imag(QuadPrecisionObject *self, void *closure)
419431
return (PyObject *)QuadPrecision_raw_new(self->backend);
420432
}
421433

434+
// Method implementations for float compatibility
435+
static PyObject *
436+
QuadPrecision_is_integer(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
437+
{
438+
Sleef_quad value;
439+
440+
if (self->backend == BACKEND_SLEEF) {
441+
value = self->value.sleef_value;
442+
}
443+
else {
444+
// lets also tackle ld from sleef functions as well
445+
value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
446+
}
447+
448+
if(Sleef_iunordq1(value, value)) {
449+
Py_RETURN_FALSE;
450+
}
451+
452+
// Check if value is finite (not inf or nan)
453+
Sleef_quad abs_value = Sleef_fabsq1(value);
454+
Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
455+
int32_t is_finite = Sleef_icmpltq1(abs_value, pos_inf);
456+
457+
if (!is_finite) {
458+
Py_RETURN_FALSE;
459+
}
460+
461+
// Check if value equals its truncated version
462+
Sleef_quad truncated = Sleef_truncq1(value);
463+
int32_t is_equal = Sleef_icmpeqq1(value, truncated);
464+
465+
if (is_equal) {
466+
Py_RETURN_TRUE;
467+
}
468+
else {
469+
Py_RETURN_FALSE;
470+
}
471+
}
472+
473+
PyObject* quad_to_pylong(Sleef_quad value)
474+
{
475+
char buffer[128];
476+
477+
// Sleef_snprintf call is thread-unsafe
478+
LOCK_SLEEF;
479+
// Format as integer (%.0Qf gives integer with no decimal places)
480+
// Q modifier means pass Sleef_quad by value
481+
int written = Sleef_snprintf(buffer, sizeof(buffer), "%.0Qf", value);
482+
UNLOCK_SLEEF;
483+
if (written < 0 || written >= sizeof(buffer)) {
484+
PyErr_SetString(PyExc_RuntimeError, "Failed to convert quad to string");
485+
return NULL;
486+
}
487+
488+
PyObject *result = PyLong_FromString(buffer, NULL, 10);
489+
490+
if (result == NULL) {
491+
PyErr_SetString(PyExc_RuntimeError, "Failed to parse integer string");
492+
return NULL;
493+
}
494+
495+
return result;
496+
}
497+
498+
// inspired by the CPython implementation
499+
// https://github.com/python/cpython/blob/ac1ffd77858b62d169a08040c08aa5de26e145ac/Objects/floatobject.c#L1503C1-L1572C2
500+
static PyObject *
501+
QuadPrecision_as_integer_ratio(QuadPrecisionObject *self, PyObject *Py_UNUSED(ignored))
502+
{
503+
504+
Sleef_quad value;
505+
Sleef_quad pos_inf = sleef_q(+0x1000000000000LL, 0x0000000000000000ULL, 16384);
506+
const int FLOAT128_PRECISION = 113;
507+
508+
if (self->backend == BACKEND_SLEEF) {
509+
value = self->value.sleef_value;
510+
}
511+
else {
512+
// lets also tackle ld from sleef functions as well
513+
value = Sleef_cast_from_doubleq1((double)self->value.longdouble_value);
514+
}
515+
516+
if(Sleef_iunordq1(value, value)) {
517+
PyErr_SetString(PyExc_ValueError, "Cannot convert NaN to integer ratio");
518+
return NULL;
519+
}
520+
if(Sleef_icmpgeq1(Sleef_fabsq1(value), pos_inf)) {
521+
PyErr_SetString(PyExc_OverflowError, "Cannot convert infinite value to integer ratio");
522+
return NULL;
523+
}
524+
525+
// Sleef_value == float_part * 2**exponent exactly
526+
int exponent;
527+
Sleef_quad mantissa = Sleef_frexpq1(value, &exponent); // within [0.5, 1.0)
528+
529+
/*
530+
CPython loops for 300 (some huge number) to make sure
531+
float_part gets converted to the floor(float_part) i.e. near integer as
532+
533+
for (i=0; i<300 && float_part != floor(float_part) ; i++) {
534+
float_part *= 2.0;
535+
exponent--;
536+
}
537+
538+
It seems highly inefficient from performance perspective, maybe they pick 300 for future-proof
539+
or If FLT_RADIX != 2, the 300 steps may leave a tiny fractional part
540+
541+
Another way can be doing as:
542+
```
543+
mantissa = ldexpq(mantissa, FLOAT128_PRECISION);
544+
exponent -= FLOAT128_PRECISION;
545+
```
546+
This should work but give non-simplified, huge integers (although they also come down to same representation)
547+
We can also do gcd to find simplified values, but it'll add more O(log(N))
548+
For the sake of simplicity and fixed 128-bit nature, we will loop till 113 only
549+
*/
550+
551+
for (int i = 0; i < FLOAT128_PRECISION && !Sleef_icmpeqq1(mantissa, Sleef_floorq1(mantissa)); i++) {
552+
mantissa = Sleef_mulq1_u05(mantissa, Sleef_cast_from_doubleq1(2.0));
553+
exponent--;
554+
}
555+
556+
// numerator and denominators can't fit in int
557+
// convert items to PyLongObject from string instead
558+
PyObject *py_exp = PyLong_FromLongLong(Py_ABS(exponent));
559+
if(py_exp == NULL)
560+
{
561+
return NULL;
562+
}
563+
564+
PyObject *numerator = quad_to_pylong(mantissa);
565+
if(numerator == NULL)
566+
{
567+
Py_DECREF(py_exp);
568+
return NULL;
569+
}
570+
PyObject *denominator = PyLong_FromLong(1);
571+
if (denominator == NULL) {
572+
Py_DECREF(py_exp);
573+
Py_DECREF(numerator);
574+
return NULL;
575+
}
576+
577+
// fold in 2**exponent
578+
if(exponent > 0)
579+
{
580+
PyObject *new_num = PyNumber_Lshift(numerator, py_exp);
581+
Py_DECREF(numerator);
582+
if(new_num == NULL)
583+
{
584+
Py_DECREF(denominator);
585+
Py_DECREF(py_exp);
586+
return NULL;
587+
}
588+
numerator = new_num;
589+
}
590+
else
591+
{
592+
PyObject *new_denom = PyNumber_Lshift(denominator, py_exp);
593+
Py_DECREF(denominator);
594+
if(new_denom == NULL)
595+
{
596+
Py_DECREF(numerator);
597+
Py_DECREF(py_exp);
598+
return NULL;
599+
}
600+
denominator = new_denom;
601+
}
602+
603+
Py_DECREF(py_exp);
604+
return PyTuple_Pack(2, numerator, denominator);
605+
}
606+
607+
static PyMethodDef QuadPrecision_methods[] = {
608+
{"is_integer", (PyCFunction)QuadPrecision_is_integer, METH_NOARGS,
609+
"Return True if the value is an integer."},
610+
{"as_integer_ratio", (PyCFunction)QuadPrecision_as_integer_ratio, METH_NOARGS,
611+
"Return a pair of integers whose ratio is exactly equal to the original value."},
612+
{NULL, NULL, 0, NULL} /* Sentinel */
613+
};
614+
422615
static PyGetSetDef QuadPrecision_getset[] = {
423616
{"real", (getter)QuadPrecision_get_real, NULL, "Real part of the scalar", NULL},
424617
{"imag", (getter)QuadPrecision_get_imag, NULL, "Imaginary part of the scalar (always 0 for real types)", NULL},
@@ -436,12 +629,20 @@ PyTypeObject QuadPrecision_Type = {
436629
.tp_as_number = &quad_as_scalar,
437630
.tp_as_buffer = &QuadPrecision_as_buffer,
438631
.tp_richcompare = (richcmpfunc)quad_richcompare,
632+
.tp_methods = QuadPrecision_methods,
439633
.tp_getset = QuadPrecision_getset,
440634
};
441635

442636
int
443637
init_quadprecision_scalar(void)
444638
{
639+
#if PY_VERSION_HEX < 0x30d00b3
640+
sleef_lock = PyThread_allocate_lock();
641+
if (sleef_lock == NULL) {
642+
PyErr_NoMemory();
643+
return -1;
644+
}
645+
#endif
445646
QuadPrecision_Type.tp_base = &PyFloatingArrType_Type;
446647
return PyType_Ready(&QuadPrecision_Type);
447648
}

quaddtype/reinstall.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ if [ -d "build/" ]; then
88
rm -rf subprojects/sleef
99
fi
1010

11-
# export CFLAGS="-g -O0"
12-
# export CXXFLAGS="-g -O0"
1311
python -m pip uninstall -y numpy_quaddtype
1412
python -m pip install . -vv --no-build-isolation 2>&1 | tee build_log.txt
13+
14+
# for debugging and TSAN builds, comment the above line and uncomment all below:
15+
# export CFLAGS="-fsanitize=thread -g -O0"
16+
# export CXXFLAGS="-fsanitize=thread -g -O0"
17+
# export LDFLAGS="-fsanitize=thread"
18+
# CC=clang CXX=clang++ python -m pip install . -vv --no-build-isolation -Csetup-args=-Db_sanitize=thread 2>&1 | tee build_log.txt

quaddtype/subprojects/packagefiles/sleef/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ if host_machine.system() == 'windows'
1212
parallel_flag = []
1313
endif
1414

15+
# For building sleef with TSan, delete the sleef subproject and follow the README instructions to build sleef externally.
1516
sleef_configure = run_command([
1617
cmake,
1718
'-S', meson.current_source_dir(),

0 commit comments

Comments
 (0)