From 243b5f00e34d5526eea9b45aad70129d8ab43a27 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Sun, 2 Nov 2025 15:29:09 +0300 Subject: [PATCH 01/16] Fix data race in _struct module endian table initialization --- ...-11-02-15-28-33.gh-issue-140260.JNzlGz.rst | 2 + Modules/_struct.c | 91 +++++++++++-------- 2 files changed, 54 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst new file mode 100644 index 00000000000000..9a04f8286eb526 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst @@ -0,0 +1,2 @@ +Fix :mod:`_struct` data race in endian table initialization with +subinterpreters. Patch by Shamil Abdulaev. diff --git a/Modules/_struct.c b/Modules/_struct.c index f09252e82c3915..06b5fe186c80eb 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -12,6 +12,7 @@ #include "pycore_long.h" // _PyLong_AsByteArray() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() +#include "pycore_lock.h" // _PyOnceFlag_CallOnce() #include // offsetof() @@ -1505,6 +1506,55 @@ static formatdef lilendian_table[] = { {0} }; +/* Ensure endian table optimization happens exactly once across all interpreters */ +static _PyOnceFlag endian_tables_init_once = {0}; + +static int +init_endian_tables(void *arg) +{ + (void)arg; // Unused but required by _Py_once_fn_t signature + + const formatdef *native = native_table; + formatdef *other, *ptr; +#if PY_LITTLE_ENDIAN + other = lilendian_table; +#else + other = bigendian_table; +#endif + /* Scan through the native table, find a matching + entry in the endian table and swap in the + native implementations whenever possible + (64-bit platforms may not have "standard" sizes) */ + while (native->format != '\0' && other->format != '\0') { + ptr = other; + while (ptr->format != '\0') { + if (ptr->format == native->format) { + /* Match faster when formats are + listed in the same order */ + if (ptr == other) + other++; + /* Only use the trick if the + size matches */ + if (ptr->size != native->size) + break; + /* Skip float and double, could be + "unknown" float format */ + if (ptr->format == 'd' || ptr->format == 'f') + break; + /* Skip _Bool, semantics are different for standard size */ + if (ptr->format == '?') + break; + ptr->pack = native->pack; + ptr->unpack = native->unpack; + break; + } + ptr++; + } + native++; + } + return 0; +} + static const formatdef * whichtable(const char **pfmt) @@ -2711,45 +2761,8 @@ _structmodule_exec(PyObject *m) } /* Check endian and swap in faster functions */ - { - const formatdef *native = native_table; - formatdef *other, *ptr; -#if PY_LITTLE_ENDIAN - other = lilendian_table; -#else - other = bigendian_table; -#endif - /* Scan through the native table, find a matching - entry in the endian table and swap in the - native implementations whenever possible - (64-bit platforms may not have "standard" sizes) */ - while (native->format != '\0' && other->format != '\0') { - ptr = other; - while (ptr->format != '\0') { - if (ptr->format == native->format) { - /* Match faster when formats are - listed in the same order */ - if (ptr == other) - other++; - /* Only use the trick if the - size matches */ - if (ptr->size != native->size) - break; - /* Skip float and double, could be - "unknown" float format */ - if (ptr->format == 'd' || ptr->format == 'f') - break; - /* Skip _Bool, semantics are different for standard size */ - if (ptr->format == '?') - break; - ptr->pack = native->pack; - ptr->unpack = native->unpack; - break; - } - ptr++; - } - native++; - } + if (_PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL) == -1) { + return -1; } /* Add some symbolic constants to the module */ From 4c9892edbf12e303b9426feb54513bb5217a0448 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Sun, 2 Nov 2025 15:36:20 +0300 Subject: [PATCH 02/16] fix doc --- .../2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst index 9a04f8286eb526..96bf9b51e4862c 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-02-15-28-33.gh-issue-140260.JNzlGz.rst @@ -1,2 +1,2 @@ -Fix :mod:`_struct` data race in endian table initialization with +Fix :mod:`struct` data race in endian table initialization with subinterpreters. Patch by Shamil Abdulaev. From 95958e8db9dbc7c89de3f985e527b98f2c79337a Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Sun, 2 Nov 2025 15:57:23 +0300 Subject: [PATCH 03/16] refactor(c-analyzer): add endian_tables_init_once to globals-to-fix list --- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 3c3cb2f9c86f16..33b70e5757d196 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -419,6 +419,7 @@ Modules/_cursesmodule.c - curses_setupterm_called - Modules/_cursesmodule.c - curses_start_color_called - Modules/_cursesmodule.c - curses_screen_encoding - Modules/_elementtree.c - expat_capi - +Modules/_struct.c - endian_tables_init_once - Modules/readline.c - libedit_append_replace_history_offset - Modules/readline.c - using_libedit_emulation - Modules/readline.c - libedit_history_start - From 3130512dbb51d37de1f5e64bb26bd70a9fc2f50c Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Sun, 2 Nov 2025 19:09:25 +0300 Subject: [PATCH 04/16] Use Py_UNUSED for unused arg in init_endian_tables --- Modules/_struct.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Modules/_struct.c b/Modules/_struct.c index 06b5fe186c80eb..67cd3fdd356112 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1510,10 +1510,8 @@ static formatdef lilendian_table[] = { static _PyOnceFlag endian_tables_init_once = {0}; static int -init_endian_tables(void *arg) +init_endian_tables(void *Py_UNUSED(arg)) { - (void)arg; // Unused but required by _Py_once_fn_t signature - const formatdef *native = native_table; formatdef *other, *ptr; #if PY_LITTLE_ENDIAN From d33514b66f4c90258b6c1ed05a66e51c7bb1a3ac Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Sun, 2 Nov 2025 21:04:18 +0300 Subject: [PATCH 05/16] fix: remove duplicate include of pycore_lock.h in _struct.c --- Modules/_struct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_struct.c b/Modules/_struct.c index 67cd3fdd356112..1a205813d96d02 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -9,10 +9,10 @@ #include "Python.h" #include "pycore_bytesobject.h" // _PyBytesWriter +#include "pycore_lock.h" // _PyOnceFlag_CallOnce() #include "pycore_long.h" // _PyLong_AsByteArray() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() -#include "pycore_lock.h" // _PyOnceFlag_CallOnce() #include // offsetof() From c4f577f4d7f1eeaeebb7a85b47f15980cd7762b0 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Mon, 3 Nov 2025 17:31:02 +0300 Subject: [PATCH 06/16] add test --- Lib/test/test_struct.py | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 75c76a36ee92f5..0b2d85276429ee 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -4,6 +4,7 @@ import gc import math import operator +import threading import unittest import platform import struct @@ -39,6 +40,24 @@ def bigendian_to_native(value): else: return string_reverse(value) +# Helper for test_endian_table_init_subinterpreters +class _Result: + def __init__(self): + self.ret = -1 + self.err = None + +def _run_in_subinterp_worker(code, barrier, result): + """ + Worker function for a thread. Waits on the barrier, then runs the + code in a subinterpreter. + """ + try: + # Wait until all threads are ready to start simultaneously. + barrier.wait() + result.ret = support.run_in_subinterp(code) + except Exception as e: + result.err = e + class StructTest(ComplexesAreIdenticalMixin, unittest.TestCase): def test_isbigendian(self): self.assertEqual((struct.pack('=i', 1)[0] == 0), ISBIGENDIAN) @@ -800,6 +819,44 @@ def test_c_complex_round_trip(self): round_trip = struct.unpack(f, struct.pack(f, z))[0] self.assertComplexesAreIdentical(z, round_trip) + def test_endian_table_init_subinterpreters(self): + # Verify that struct works correctly in subinterpreters after + # once-only endian table initialization (gh-140260), when + # initialized concurrently. + # Use struct in the main interpreter first. + self.assertEqual(struct.unpack('>i', struct.pack('>i', 1))[0], 1) + self.assertEqual(struct.unpack('i', 1)\n" + "assert struct.unpack('>i', x)[0] == 1\n" + "y = struct.pack(' Date: Mon, 3 Nov 2025 19:31:21 +0300 Subject: [PATCH 07/16] Remove return check for endian table init --- Modules/_struct.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Modules/_struct.c b/Modules/_struct.c index 1a205813d96d02..3849ea0c9b00fa 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2758,10 +2758,7 @@ _structmodule_exec(PyObject *m) return -1; } - /* Check endian and swap in faster functions */ - if (_PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL) == -1) { - return -1; - } + _PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL); /* Add some symbolic constants to the module */ state->StructError = PyErr_NewException("struct.error", NULL, NULL); From f4d24ff6b455dcbd60a1da5d81d831b474772400 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Mon, 3 Nov 2025 19:49:38 +0300 Subject: [PATCH 08/16] Use InterpreterPoolExecutor in struct tests --- Lib/test/test_struct.py | 63 ++++++----------------------------------- 1 file changed, 8 insertions(+), 55 deletions(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 0b2d85276429ee..b9ee745a8d1350 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -1,10 +1,10 @@ from collections import abc from itertools import combinations import array +from concurrent.futures import InterpreterPoolExecutor import gc import math import operator -import threading import unittest import platform import struct @@ -40,24 +40,6 @@ def bigendian_to_native(value): else: return string_reverse(value) -# Helper for test_endian_table_init_subinterpreters -class _Result: - def __init__(self): - self.ret = -1 - self.err = None - -def _run_in_subinterp_worker(code, barrier, result): - """ - Worker function for a thread. Waits on the barrier, then runs the - code in a subinterpreter. - """ - try: - # Wait until all threads are ready to start simultaneously. - barrier.wait() - result.ret = support.run_in_subinterp(code) - except Exception as e: - result.err = e - class StructTest(ComplexesAreIdenticalMixin, unittest.TestCase): def test_isbigendian(self): self.assertEqual((struct.pack('=i', 1)[0] == 0), ISBIGENDIAN) @@ -820,42 +802,13 @@ def test_c_complex_round_trip(self): self.assertComplexesAreIdentical(z, round_trip) def test_endian_table_init_subinterpreters(self): - # Verify that struct works correctly in subinterpreters after - # once-only endian table initialization (gh-140260), when - # initialized concurrently. - # Use struct in the main interpreter first. - self.assertEqual(struct.unpack('>i', struct.pack('>i', 1))[0], 1) - self.assertEqual(struct.unpack('i', 1)\n" - "assert struct.unpack('>i', x)[0] == 1\n" - "y = struct.pack(' Date: Mon, 3 Nov 2025 21:26:06 +0300 Subject: [PATCH 09/16] Ignore return value of _PyOnceFlag_CallOnce Silence compiler warning about the unused result; the init cannot fail. --- Modules/_struct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_struct.c b/Modules/_struct.c index 3849ea0c9b00fa..2acb3df3a30395 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -2758,7 +2758,8 @@ _structmodule_exec(PyObject *m) return -1; } - _PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL); + /* init cannot fail */ + (void)_PyOnceFlag_CallOnce(&endian_tables_init_once, init_endian_tables, NULL); /* Add some symbolic constants to the module */ state->StructError = PyErr_NewException("struct.error", NULL, NULL); From 5e6b45b041912cd4b31ca9ec9eac9a1694f86407 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Mon, 3 Nov 2025 21:29:30 +0300 Subject: [PATCH 10/16] Move endian_tables_init_once to ignored.tsv --- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 - Tools/c-analyzer/cpython/ignored.tsv | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 33b70e5757d196..3c3cb2f9c86f16 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -419,7 +419,6 @@ Modules/_cursesmodule.c - curses_setupterm_called - Modules/_cursesmodule.c - curses_start_color_called - Modules/_cursesmodule.c - curses_screen_encoding - Modules/_elementtree.c - expat_capi - -Modules/_struct.c - endian_tables_init_once - Modules/readline.c - libedit_append_replace_history_offset - Modules/readline.c - using_libedit_emulation - Modules/readline.c - libedit_history_start - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 8b73189fb07dc5..81b69d2d385c71 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -24,6 +24,7 @@ Modules/posixmodule.c os_dup2_impl dup3_works - ## guards around resource init Python/thread_pthread.h PyThread__init_thread lib_initialized - +Modules/_struct.c - endian_tables_init_once - ##----------------------- ## other values (not Python-specific) From 44925202349c778140c5c7dd24c02a436724598e Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Mon, 3 Nov 2025 21:31:19 +0300 Subject: [PATCH 11/16] Import InterpreterPoolExecutor locally in test --- Lib/test/test_struct.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index b9ee745a8d1350..9141c8ce53bb8c 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -1,7 +1,6 @@ from collections import abc from itertools import combinations import array -from concurrent.futures import InterpreterPoolExecutor import gc import math import operator @@ -804,6 +803,8 @@ def test_c_complex_round_trip(self): def test_endian_table_init_subinterpreters(self): # Verify that the _struct extension module can be initialized # concurrently in subinterpreters (gh-140260). + from concurrent.futures import InterpreterPoolExecutor + code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: results = executor.map(support.run_in_subinterp, [code] * 5) From a1caec98641c3b327f50fc2a88735ce4fa9f24ff Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Mon, 3 Nov 2025 21:49:29 +0300 Subject: [PATCH 12/16] Skip subinterpreter init test unless GIL disabled Use an exec wrapper for subinterpreter execution and assert None results instead of calling support.run_in_subinterp --- Lib/test/test_struct.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 9141c8ce53bb8c..ee484a4fcdd7c7 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -800,16 +800,19 @@ def test_c_complex_round_trip(self): round_trip = struct.unpack(f, struct.pack(f, z))[0] self.assertComplexesAreIdentical(z, round_trip) + @unittest.skipUnless(support.Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') def test_endian_table_init_subinterpreters(self): # Verify that the _struct extension module can be initialized # concurrently in subinterpreters (gh-140260). from concurrent.futures import InterpreterPoolExecutor + def _exec_code(code_str): + exec(code_str) + code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: - results = executor.map(support.run_in_subinterp, [code] * 5) - for ret in results: - self.assertEqual(ret, 0) + results = executor.map(_exec_code, [code] * 5) + self.assertListEqual(list(results), [None] * 5) class UnpackIteratorTest(unittest.TestCase): From f1cf16942e09902a9e7dabead2e2d78cbaf954f3 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Wed, 5 Nov 2025 16:19:36 +0300 Subject: [PATCH 13/16] Inline exec and wrap skipUnless args --- Lib/test/test_struct.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index ee484a4fcdd7c7..95abbe9b34258f 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -800,18 +800,18 @@ def test_c_complex_round_trip(self): round_trip = struct.unpack(f, struct.pack(f, z))[0] self.assertComplexesAreIdentical(z, round_trip) - @unittest.skipUnless(support.Py_GIL_DISABLED, 'this test can only possibly fail with GIL disabled') + @unittest.skipUnless( + support.Py_GIL_DISABLED, + 'this test can only possibly fail with GIL disabled' + ) def test_endian_table_init_subinterpreters(self): # Verify that the _struct extension module can be initialized # concurrently in subinterpreters (gh-140260). from concurrent.futures import InterpreterPoolExecutor - def _exec_code(code_str): - exec(code_str) - code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: - results = executor.map(_exec_code, [code] * 5) + results = executor.map(exec(code_str), [code] * 5) self.assertListEqual(list(results), [None] * 5) From 14088965589880127f2826516d09ca103eff87f4 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Wed, 5 Nov 2025 16:23:21 +0300 Subject: [PATCH 14/16] Skip test when InterpreterPoolExecutor missing --- Lib/test/test_struct.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 95abbe9b34258f..a7e6e4bafc74ee 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -800,14 +800,13 @@ def test_c_complex_round_trip(self): round_trip = struct.unpack(f, struct.pack(f, z))[0] self.assertComplexesAreIdentical(z, round_trip) - @unittest.skipUnless( - support.Py_GIL_DISABLED, - 'this test can only possibly fail with GIL disabled' - ) def test_endian_table_init_subinterpreters(self): # Verify that the _struct extension module can be initialized # concurrently in subinterpreters (gh-140260). - from concurrent.futures import InterpreterPoolExecutor + try: + from concurrent.futures import InterpreterPoolExecutor + except ImportError: + raise unittest.SkipTest("InterpreterPoolExecutor not available") code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: From 359ae9a218d2acd34e079c9260069b17b803cbcd Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Wed, 5 Nov 2025 16:32:35 +0300 Subject: [PATCH 15/16] Fix executor.map usage in test_struct --- Lib/test/test_struct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index a7e6e4bafc74ee..17c263d5b10719 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -810,7 +810,7 @@ def test_endian_table_init_subinterpreters(self): code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: - results = executor.map(exec(code_str), [code] * 5) + results = executor.map(exec(code), [code] * 5) self.assertListEqual(list(results), [None] * 5) From 47494a4c3698159aeeea87d126ba6272bbf95156 Mon Sep 17 00:00:00 2001 From: Shamil Abdulaev Date: Wed, 5 Nov 2025 16:35:50 +0300 Subject: [PATCH 16/16] Pass exec to executor.map --- Lib/test/test_struct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index 17c263d5b10719..ff5c1d2505fde4 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -810,7 +810,7 @@ def test_endian_table_init_subinterpreters(self): code = "import struct" with InterpreterPoolExecutor(max_workers=5) as executor: - results = executor.map(exec(code), [code] * 5) + results = executor.map(exec, [code] * 5) self.assertListEqual(list(results), [None] * 5)