@@ -148,20 +148,35 @@ struct override_hash {
148148
149149using instance_map = std::unordered_multimap<const void *, instance *>;
150150
151+ #ifdef Py_GIL_DISABLED
152+ // Wrapper around PyMutex to provide BasicLockable semantics
153+ class pymutex {
154+ PyMutex mutex;
155+
156+ public:
157+ pymutex () : mutex({}) {}
158+ void lock () { PyMutex_Lock (&mutex); }
159+ void unlock () { PyMutex_Unlock (&mutex); }
160+ };
161+
151162// Instance map shards are used to reduce mutex contention in free-threaded Python.
152163struct instance_map_shard {
153- std::mutex mutex;
154164 instance_map registered_instances;
165+ pymutex mutex;
155166 // alignas(64) would be better, but causes compile errors in macOS before 10.14 (see #5200)
156- char padding[64 - (sizeof (std::mutex ) + sizeof (instance_map )) % 64 ];
167+ char padding[64 - (sizeof (instance_map ) + sizeof (pymutex )) % 64 ];
157168};
158169
170+ static_assert (sizeof (instance_map_shard) % 64 == 0 ,
171+ " instance_map_shard size is not a multiple of 64 bytes" );
172+ #endif
173+
159174// / Internal data structure used to track registered instances and types.
160175// / Whenever binary incompatible changes are made to this structure,
161176// / `PYBIND11_INTERNALS_VERSION` must be incremented.
162177struct internals {
163178#ifdef Py_GIL_DISABLED
164- std::mutex mutex;
179+ pymutex mutex;
165180#endif
166181 // std::type_index -> pybind11's type information
167182 type_map<type_info *> registered_types_cpp;
@@ -614,7 +629,7 @@ inline local_internals &get_local_internals() {
614629}
615630
616631#ifdef Py_GIL_DISABLED
617- # define PYBIND11_LOCK_INTERNALS (internals ) std::unique_lock<std::mutex > lock ((internals).mutex)
632+ # define PYBIND11_LOCK_INTERNALS (internals ) std::unique_lock<pymutex > lock ((internals).mutex)
618633#else
619634# define PYBIND11_LOCK_INTERNALS (internals )
620635#endif
@@ -651,7 +666,7 @@ inline auto with_instance_map(const void *ptr,
651666 auto idx = static_cast <size_t >(hash & internals.instance_shards_mask );
652667
653668 auto &shard = internals.instance_shards [idx];
654- std::unique_lock<std::mutex > lock (shard.mutex );
669+ std::unique_lock<pymutex > lock (shard.mutex );
655670 return cb (shard.registered_instances );
656671#else
657672 (void ) ptr;
@@ -667,7 +682,7 @@ inline size_t num_registered_instances() {
667682 size_t count = 0 ;
668683 for (size_t i = 0 ; i <= internals.instance_shards_mask ; ++i) {
669684 auto &shard = internals.instance_shards [i];
670- std::unique_lock<std::mutex > lock (shard.mutex );
685+ std::unique_lock<pymutex > lock (shard.mutex );
671686 count += shard.registered_instances .size ();
672687 }
673688 return count;
0 commit comments