Skip to content

Commit c98c5b3

Browse files
authored
gh-131253: free-threaded build support for pystats (gh-137189)
Allow the --enable-pystats build option to be used with free-threading. The stats are now stored on a per-interpreter basis, rather than process global. For free-threaded builds, the stats structure is allocated per-thread and then periodically merged into the per-interpreter stats structure (on thread exit or when the reporting function is called). Most of the pystats related code has be moved into the file Python/pystats.c.
1 parent cf1a2c1 commit c98c5b3

24 files changed

+1265
-481
lines changed

Include/cpython/pystate.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,15 @@ struct _ts {
217217
*/
218218
PyObject *threading_local_sentinel;
219219
_PyRemoteDebuggerSupport remote_debugger_support;
220+
221+
#ifdef Py_STATS
222+
// Pointer to PyStats structure, NULL if recording is off. For the
223+
// free-threaded build, the structure is per-thread (stored as a pointer
224+
// in _PyThreadStateImpl). For the default build, the structure is stored
225+
// in the PyInterpreterState structure (threads do not have their own
226+
// structure and all share the same per-interpreter structure).
227+
PyStats *pystats;
228+
#endif
220229
};
221230

222231
/* other API */
@@ -239,6 +248,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
239248
// function is set, otherwise disable them.
240249
PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate);
241250

251+
#ifdef Py_STATS
252+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
253+
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
254+
255+
static inline PyStats*
256+
_PyThreadState_GetStatsFast(void)
257+
{
258+
if (_Py_tss_tstate == NULL) {
259+
return NULL; // no attached thread state
260+
}
261+
return _Py_tss_tstate->pystats;
262+
}
263+
#endif
264+
#endif // Py_STATS
265+
242266
/* PyGILState */
243267

244268
/* Helper/diagnostic function - return 1 if the current thread

Include/cpython/pystats.h

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//
55
// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
66
// and Py_DECREF().
7-
// - _Py_stats variable
7+
// - _PyStats_GET()
88
//
99
// Functions of the sys module:
1010
//
@@ -14,7 +14,7 @@
1414
// - sys._stats_dump()
1515
//
1616
// Python must be built with ./configure --enable-pystats to define the
17-
// Py_STATS macro.
17+
// _PyStats_GET() macro.
1818
//
1919
// Define _PY_INTERPRETER macro to increment interpreter_increfs and
2020
// interpreter_decrefs. Otherwise, increment increfs and decrefs.
@@ -109,6 +109,18 @@ typedef struct _gc_stats {
109109
uint64_t objects_not_transitively_reachable;
110110
} GCStats;
111111

112+
#ifdef Py_GIL_DISABLED
113+
// stats specific to free-threaded build
114+
typedef struct _ft_stats {
115+
// number of times interpreter had to spin or park when trying to acquire a mutex
116+
uint64_t mutex_sleeps;
117+
// number of times that the QSBR mechanism polled (compute read sequence value)
118+
uint64_t qsbr_polls;
119+
// number of times stop-the-world mechanism was used
120+
uint64_t world_stops;
121+
} FTStats;
122+
#endif
123+
112124
typedef struct _uop_stats {
113125
uint64_t execution_count;
114126
uint64_t miss;
@@ -173,22 +185,48 @@ typedef struct _stats {
173185
CallStats call_stats;
174186
ObjectStats object_stats;
175187
OptimizationStats optimization_stats;
188+
#ifdef Py_GIL_DISABLED
189+
FTStats ft_stats;
190+
#endif
176191
RareEventStats rare_event_stats;
177-
GCStats *gc_stats;
192+
GCStats gc_stats[3]; // must match NUM_GENERATIONS
178193
} PyStats;
179194

195+
// Export for most shared extensions
196+
PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
197+
198+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
199+
// use inline function version defined in cpython/pystate.h
200+
static inline PyStats *_PyThreadState_GetStatsFast(void);
201+
#define _PyStats_GET _PyThreadState_GetStatsFast
202+
#else
203+
#define _PyStats_GET _PyStats_GetLocal
204+
#endif
180205

181-
// Export for shared extensions like 'math'
182-
PyAPI_DATA(PyStats*) _Py_stats;
206+
#define _Py_STATS_EXPR(expr) \
207+
do { \
208+
PyStats *s = _PyStats_GET(); \
209+
if (s != NULL) { \
210+
s->expr; \
211+
} \
212+
} while (0)
213+
214+
#define _Py_STATS_COND_EXPR(cond, expr) \
215+
do { \
216+
PyStats *s = _PyStats_GET(); \
217+
if (s != NULL && (cond)) { \
218+
s->expr; \
219+
} \
220+
} while (0)
183221

184222
#ifdef _PY_INTERPRETER
185-
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0)
186-
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0)
187-
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0)
188-
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0)
223+
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
224+
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
225+
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
226+
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
189227
#else
190-
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0)
191-
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0)
192-
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0)
193-
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0)
228+
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
229+
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
230+
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
231+
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
194232
#endif

Include/internal/pycore_interp_structs.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ enum _GCPhase {
199199
};
200200

201201
/* If we change this, we need to change the default value in the
202-
signature of gc.collect. */
202+
signature of gc.collect and change the size of PyStats.gc_stats */
203203
#define NUM_GENERATIONS 3
204204

205205
struct _gc_runtime_state {
@@ -963,6 +963,18 @@ struct _is {
963963
# ifdef Py_STACKREF_CLOSE_DEBUG
964964
_Py_hashtable_t *closed_stackrefs_table;
965965
# endif
966+
#endif
967+
968+
#ifdef Py_STATS
969+
// true if recording of pystats is on, this is used when new threads
970+
// are created to decide if recording should be on for them
971+
int pystats_enabled;
972+
// allocated when (and if) stats are first enabled
973+
PyStats *pystats_struct;
974+
#ifdef Py_GIL_DISABLED
975+
// held when pystats related interpreter state is being updated
976+
PyMutex pystats_mutex;
977+
#endif
966978
#endif
967979

968980
/* the initial PyInterpreterState.threads.head */

Include/internal/pycore_pystats.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ extern "C" {
99
#endif
1010

1111
#ifdef Py_STATS
12-
extern void _Py_StatsOn(void);
12+
extern int _Py_StatsOn(void);
1313
extern void _Py_StatsOff(void);
1414
extern void _Py_StatsClear(void);
1515
extern int _Py_PrintSpecializationStats(int to_file);

Include/internal/pycore_stats.h

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,39 +15,56 @@ extern "C" {
1515

1616
#include "pycore_bitutils.h" // _Py_bit_length
1717

18-
#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
19-
#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
20-
#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
21-
#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
22-
#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
23-
#define OBJECT_STAT_INC_COND(name, cond) \
24-
do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
25-
#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
26-
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
27-
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
28-
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
29-
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
30-
#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0)
31-
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
32-
#define UOP_PAIR_INC(uopcode, lastuop) \
33-
do { \
34-
if (lastuop && _Py_stats) { \
35-
_Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
36-
} \
37-
lastuop = uopcode; \
18+
#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++)
19+
#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--)
20+
#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++)
21+
#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++)
22+
#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++)
23+
#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++)
24+
#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++)
25+
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++)
26+
#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n))
27+
#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++)
28+
#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n))
29+
#define UOP_STAT_INC(opname, name) \
30+
do { \
31+
PyStats *s = _PyStats_GET(); \
32+
if (s) { \
33+
assert(opname < 512); \
34+
s->optimization_stats.opcode[opname].name++; \
35+
} \
36+
} while (0)
37+
#define UOP_PAIR_INC(uopcode, lastuop) \
38+
do { \
39+
PyStats *s = _PyStats_GET(); \
40+
if (lastuop && s) { \
41+
s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
42+
} \
43+
lastuop = uopcode; \
3844
} while (0)
39-
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
40-
#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
45+
#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++)
46+
#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++)
4147
#define OPT_HIST(length, name) \
4248
do { \
43-
if (_Py_stats) { \
49+
PyStats *s = _PyStats_GET(); \
50+
if (s) { \
4451
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
4552
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
46-
_Py_stats->optimization_stats.name[bucket]++; \
53+
s->optimization_stats.name[bucket]++; \
4754
} \
4855
} while (0)
49-
#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
50-
#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
56+
#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++)
57+
#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++)
58+
59+
#ifdef Py_GIL_DISABLED
60+
#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++)
61+
#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++)
62+
#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++)
63+
#else
64+
#define FT_STAT_MUTEX_SLEEP_INC()
65+
#define FT_STAT_QSBR_POLL_INC()
66+
#define FT_STAT_WORLD_STOP_INC()
67+
#endif
5168

5269
// Export for '_opcode' shared extension
5370
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
7188
#define OPT_HIST(length, name) ((void)0)
7289
#define RARE_EVENT_STAT_INC(name) ((void)0)
7390
#define OPCODE_DEFERRED_INC(opname) ((void)0)
91+
#define FT_STAT_MUTEX_SLEEP_INC()
92+
#define FT_STAT_QSBR_POLL_INC()
93+
#define FT_STAT_WORLD_STOP_INC()
7494
#endif // !Py_STATS
7595

7696

@@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
90110
RARE_EVENT_INTERP_INC(interp, name); \
91111
} while (0); \
92112

113+
PyStatus _PyStats_InterpInit(PyInterpreterState *);
114+
bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *);
115+
void _PyStats_ThreadFini(_PyThreadStateImpl *);
116+
void _PyStats_Attach(_PyThreadStateImpl *);
117+
void _PyStats_Detach(_PyThreadStateImpl *);
93118

94119
#ifdef __cplusplus
95120
}

Include/internal/pycore_tstate.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,14 @@ typedef struct _PyThreadStateImpl {
7070

7171
// When >1, code objects do not immortalize their non-string constants.
7272
int suppress_co_const_immortalization;
73+
74+
#ifdef Py_STATS
75+
// per-thread stats, will be merged into interp->pystats_struct
76+
PyStats *pystats_struct; // allocated by _PyStats_ThreadInit()
7377
#endif
7478

79+
#endif // Py_GIL_DISABLED
80+
7581
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
7682
Py_ssize_t reftotal; // this thread's total refcount operations
7783
#endif

0 commit comments

Comments
 (0)