From 2e82cf70609447977c72a44be775135e50244f86 Mon Sep 17 00:00:00 2001 From: Rene Greiner Date: Sun, 19 Oct 2025 07:00:36 +0000 Subject: [PATCH 1/4] Spinlock: Removed superfluous load() in try_lock and cleaned up lock() loop --- api/include/opentelemetry/common/spin_lock_mutex.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/api/include/opentelemetry/common/spin_lock_mutex.h b/api/include/opentelemetry/common/spin_lock_mutex.h index 7031fa4d23..791aff9e28 100644 --- a/api/include/opentelemetry/common/spin_lock_mutex.h +++ b/api/include/opentelemetry/common/spin_lock_mutex.h @@ -82,8 +82,7 @@ class SpinLockMutex */ bool try_lock() noexcept { - return !flag_.load(std::memory_order_relaxed) && - !flag_.exchange(true, std::memory_order_acquire); + return !flag_.exchange(true, std::memory_order_acquire); } /** @@ -95,13 +94,9 @@ class SpinLockMutex */ void lock() noexcept { - for (;;) + // Try once + while (!try_lock()) { - // Try once - if (!flag_.exchange(true, std::memory_order_acquire)) - { - return; - } // Spin-Fast (goal ~10ns) for (std::size_t i = 0; i < SPINLOCK_FAST_ITERATIONS; ++i) { From 30da17452a04aa024918a57b5e5a6aab092d64f1 Mon Sep 17 00:00:00 2001 From: Rene Greiner Date: Sun, 19 Oct 2025 15:07:38 +0200 Subject: [PATCH 2/4] added std::mutex check. On Apple M1 Pro std::mutex scales better with higher thread count. --- api/test/common/spinlock_benchmark.cc | 70 +++++++++++++++------------ 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/api/test/common/spinlock_benchmark.cc b/api/test/common/spinlock_benchmark.cc index 6cd1c9558d..823b0b8f05 100644 --- a/api/test/common/spinlock_benchmark.cc +++ b/api/test/common/spinlock_benchmark.cc @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include @@ -27,8 +27,8 @@ constexpr int TightLoopLocks = 10000; // // lock: A lambda denoting how to lock. Accepts a reference to `SpinLockType`. // unlock: A lambda denoting how to unlock. Accepts a reference to `SpinLockType`. -template -inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock) +template +void SpinThrash(benchmark::State &s, LockF lock, UnlockF unlock) { auto num_threads = s.range(0); // Value we will increment, fighting over a spinlock. @@ -49,9 +49,9 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, // to ensure maximum thread contention. for (int i = 0; i < TightLoopLocks; i++) { - lock(spinlock); + lock(); value++; - unlock(spinlock); + unlock(); } }); } @@ -63,35 +63,35 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, } // Benchmark of full spin-lock implementation. -static void BM_SpinLockThrashing(benchmark::State &s) +void BM_SpinLockThrashing(benchmark::State &s) { SpinLockMutex spinlock; - SpinThrash(s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); }); + SpinThrash(s, [&] { spinlock.lock(); }, [&] { spinlock.unlock(); }); } // Naive `while(try_lock()) {}` implementation of lock. -static void BM_NaiveSpinLockThrashing(benchmark::State &s) +void BM_NaiveSpinLockThrashing(benchmark::State &s) { SpinLockMutex spinlock; SpinThrash( - s, spinlock, - [](SpinLockMutex &m) { - while (!m.try_lock()) + s, + [&] { + while (!spinlock.try_lock()) { // Left this comment to keep the same format on old and new versions of clang-format } }, - [](SpinLockMutex &m) { m.unlock(); }); + [&] { spinlock.unlock(); }); } // Simple `while(try_lock()) { yield-processor }` -static void BM_ProcYieldSpinLockThrashing(benchmark::State &s) +void BM_ProcYieldSpinLockThrashing(benchmark::State &s) { SpinLockMutex spinlock; - SpinThrash( - s, spinlock, - [](SpinLockMutex &m) { - while (!m.try_lock()) + SpinThrash( + s, + [&] { + while (!spinlock.try_lock()) { #if defined(_MSC_VER) YieldProcessor(); @@ -108,33 +108,33 @@ static void BM_ProcYieldSpinLockThrashing(benchmark::State &s) #endif } }, - [](SpinLockMutex &m) { m.unlock(); }); + [&] { spinlock.unlock(); }); } // SpinLock thrashing with thread::yield(). -static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s) +void BM_ThreadYieldSpinLockThrashing(benchmark::State &s) { #if defined(__cpp_lib_atomic_value_initialization) && \ __cpp_lib_atomic_value_initialization >= 201911L std::atomic_flag mutex{}; #else - std::atomic_flag mutex = ATOMIC_FLAG_INIT; + alignas(8) std::atomic_flag mutex = ATOMIC_FLAG_INIT; #endif - SpinThrash( - s, mutex, - [](std::atomic_flag &l) { - uint32_t try_count = 0; - while (l.test_and_set(std::memory_order_acq_rel)) + SpinThrash( + s, + [&]() { + while (mutex.test_and_set(std::memory_order_acq_rel)) { - ++try_count; - if (try_count % 32) - { - std::this_thread::yield(); - } + std::this_thread::yield(); } - std::this_thread::yield(); }, - [](std::atomic_flag &l) { l.clear(std::memory_order_release); }); + [&] { mutex.clear(std::memory_order_release); }); +} + +void BM_StdMutexCheck(benchmark::State &s) +{ + std::mutex mtx; + SpinThrash(s, [&] { mtx.lock(); }, [&] { mtx.unlock(); }); } // Run the benchmarks at 2x thread/core and measure the amount of time to thrash around. @@ -162,6 +162,12 @@ BENCHMARK(BM_ThreadYieldSpinLockThrashing) ->MeasureProcessCPUTime() ->UseRealTime() ->Unit(benchmark::kMillisecond); +BENCHMARK(BM_StdMutexCheck) + ->RangeMultiplier(2) + ->Range(1, std::thread::hardware_concurrency()) + ->MeasureProcessCPUTime() + ->UseRealTime() + ->Unit(benchmark::kMillisecond); } // namespace From 0fac0b93c2eaa14999ea9efb0d3d4b31f34b5b0e Mon Sep 17 00:00:00 2001 From: Rene Greiner Date: Fri, 24 Oct 2025 21:34:01 +0200 Subject: [PATCH 3/4] fixed iwyu issue --- api/test/common/spinlock_benchmark.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/api/test/common/spinlock_benchmark.cc b/api/test/common/spinlock_benchmark.cc index 823b0b8f05..e0177db42b 100644 --- a/api/test/common/spinlock_benchmark.cc +++ b/api/test/common/spinlock_benchmark.cc @@ -3,6 +3,7 @@ #include #include +#include #include #include #include From 5bfa70d064d211dcc8b9af21c8df99b87a0c5b6f Mon Sep 17 00:00:00 2001 From: Rene Greiner Date: Sat, 25 Oct 2025 13:56:38 +0200 Subject: [PATCH 4/4] try to fix formatting complaints --- api/include/opentelemetry/common/spin_lock_mutex.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/api/include/opentelemetry/common/spin_lock_mutex.h b/api/include/opentelemetry/common/spin_lock_mutex.h index 791aff9e28..7d33fbedfd 100644 --- a/api/include/opentelemetry/common/spin_lock_mutex.h +++ b/api/include/opentelemetry/common/spin_lock_mutex.h @@ -80,11 +80,7 @@ class SpinLockMutex /** * Attempts to lock the mutex. Return immediately with `true` (success) or `false` (failure). */ - bool try_lock() noexcept - { - return !flag_.exchange(true, std::memory_order_acquire); - } - + bool try_lock() noexcept { return !flag_.exchange(true, std::memory_order_acquire); } /** * Blocks until a lock can be obtained for the current thread. *