Skip to content

Commit 30da174

Browse files
committed
added std::mutex check. On Apple M1 Pro std::mutex scales better with higher thread count.
1 parent 2e82cf7 commit 30da174

File tree

1 file changed

+38
-32
lines changed

1 file changed

+38
-32
lines changed

api/test/common/spinlock_benchmark.cc

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#include <benchmark/benchmark.h>
55
#include <atomic>
6-
#include <cstdint>
6+
#include <mutex>
77
#include <thread>
88
#include <vector>
99

@@ -27,8 +27,8 @@ constexpr int TightLoopLocks = 10000;
2727
//
2828
// lock: A lambda denoting how to lock. Accepts a reference to `SpinLockType`.
2929
// unlock: A lambda denoting how to unlock. Accepts a reference to `SpinLockType`.
30-
template <typename SpinLockType, typename LockF, typename UnlockF>
31-
inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock)
30+
template <typename LockF, typename UnlockF>
31+
void SpinThrash(benchmark::State &s, LockF lock, UnlockF unlock)
3232
{
3333
auto num_threads = s.range(0);
3434
// Value we will increment, fighting over a spinlock.
@@ -49,9 +49,9 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock,
4949
// to ensure maximum thread contention.
5050
for (int i = 0; i < TightLoopLocks; i++)
5151
{
52-
lock(spinlock);
52+
lock();
5353
value++;
54-
unlock(spinlock);
54+
unlock();
5555
}
5656
});
5757
}
@@ -63,35 +63,35 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock,
6363
}
6464

6565
// Benchmark of full spin-lock implementation.
66-
static void BM_SpinLockThrashing(benchmark::State &s)
66+
void BM_SpinLockThrashing(benchmark::State &s)
6767
{
6868
SpinLockMutex spinlock;
69-
SpinThrash(s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); });
69+
SpinThrash(s, [&] { spinlock.lock(); }, [&] { spinlock.unlock(); });
7070
}
7171

7272
// Naive `while(try_lock()) {}` implementation of lock.
73-
static void BM_NaiveSpinLockThrashing(benchmark::State &s)
73+
void BM_NaiveSpinLockThrashing(benchmark::State &s)
7474
{
7575
SpinLockMutex spinlock;
7676
SpinThrash(
77-
s, spinlock,
78-
[](SpinLockMutex &m) {
79-
while (!m.try_lock())
77+
s,
78+
[&] {
79+
while (!spinlock.try_lock())
8080
{
8181
// Left this comment to keep the same format on old and new versions of clang-format
8282
}
8383
},
84-
[](SpinLockMutex &m) { m.unlock(); });
84+
[&] { spinlock.unlock(); });
8585
}
8686

8787
// Simple `while(try_lock()) { yield-processor }`
88-
static void BM_ProcYieldSpinLockThrashing(benchmark::State &s)
88+
void BM_ProcYieldSpinLockThrashing(benchmark::State &s)
8989
{
9090
SpinLockMutex spinlock;
91-
SpinThrash<SpinLockMutex>(
92-
s, spinlock,
93-
[](SpinLockMutex &m) {
94-
while (!m.try_lock())
91+
SpinThrash(
92+
s,
93+
[&] {
94+
while (!spinlock.try_lock())
9595
{
9696
#if defined(_MSC_VER)
9797
YieldProcessor();
@@ -108,33 +108,33 @@ static void BM_ProcYieldSpinLockThrashing(benchmark::State &s)
108108
#endif
109109
}
110110
},
111-
[](SpinLockMutex &m) { m.unlock(); });
111+
[&] { spinlock.unlock(); });
112112
}
113113

114114
// SpinLock thrashing with thread::yield().
115-
static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s)
115+
void BM_ThreadYieldSpinLockThrashing(benchmark::State &s)
116116
{
117117
#if defined(__cpp_lib_atomic_value_initialization) && \
118118
__cpp_lib_atomic_value_initialization >= 201911L
119119
std::atomic_flag mutex{};
120120
#else
121-
std::atomic_flag mutex = ATOMIC_FLAG_INIT;
121+
alignas(8) std::atomic_flag mutex = ATOMIC_FLAG_INIT;
122122
#endif
123-
SpinThrash<std::atomic_flag>(
124-
s, mutex,
125-
[](std::atomic_flag &l) {
126-
uint32_t try_count = 0;
127-
while (l.test_and_set(std::memory_order_acq_rel))
123+
SpinThrash(
124+
s,
125+
[&]() {
126+
while (mutex.test_and_set(std::memory_order_acq_rel))
128127
{
129-
++try_count;
130-
if (try_count % 32)
131-
{
132-
std::this_thread::yield();
133-
}
128+
std::this_thread::yield();
134129
}
135-
std::this_thread::yield();
136130
},
137-
[](std::atomic_flag &l) { l.clear(std::memory_order_release); });
131+
[&] { mutex.clear(std::memory_order_release); });
132+
}
133+
134+
void BM_StdMutexCheck(benchmark::State &s)
135+
{
136+
std::mutex mtx;
137+
SpinThrash(s, [&] { mtx.lock(); }, [&] { mtx.unlock(); });
138138
}
139139

140140
// Run the benchmarks at 2x thread/core and measure the amount of time to thrash around.
@@ -162,6 +162,12 @@ BENCHMARK(BM_ThreadYieldSpinLockThrashing)
162162
->MeasureProcessCPUTime()
163163
->UseRealTime()
164164
->Unit(benchmark::kMillisecond);
165+
BENCHMARK(BM_StdMutexCheck)
166+
->RangeMultiplier(2)
167+
->Range(1, std::thread::hardware_concurrency())
168+
->MeasureProcessCPUTime()
169+
->UseRealTime()
170+
->Unit(benchmark::kMillisecond);
165171

166172
} // namespace
167173

0 commit comments

Comments
 (0)