Skip to content

Commit 8d8ed90

Browse files
Yangzheng BaiYangzheng Bai
authored andcommitted
CLH (Craig Landin Hagersten) spinlocks initial checkin
CLH is a starvation-free queue spinlock which has FCFS (first come, first served) order like MCS spinlock. CLH spinlock is simpler than MCS spinlock, and each thread is spinning on its previous node's wait flag. Our CLH spinlock has an optional parameter to disable SEVL and WFE.
1 parent f059bdc commit 8d8ed90

File tree

10 files changed

+709
-61
lines changed

10 files changed

+709
-61
lines changed

benchmarks/lockhammer/Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
override CFLAGS += -g -O3 -I. -I./include -I../../ext/mysql/include -I../../ext/linux/include -I../../ext/tbb/include
1+
# override keyword overwrites make command-line option LSE_ENABLE=y, therefore it has been removed
2+
CFLAGS += -g -O3 -I. -I./include -I../../ext/mysql/include -I../../ext/linux/include -I../../ext/tbb/include -I../../ext/sms/base
23

34
ifneq ($(DEBUG_LEVEL),)
45
ifeq ($(shell test $(DEBUG_LEVEL) -gt 0; echo $$?),0)
@@ -32,7 +33,8 @@ TEST_TARGETS=lh_swap_mutex \
3233
lh_empty \
3334
lh_jvm_objectmonitor \
3435
lh_tbb_spin_rw_mutex \
35-
lh_osq_lock
36+
lh_osq_lock \
37+
lh_clh_spinlock
3638

3739
ifeq ($(TARGET_ARCH),aarch64)
3840
TEST_TARGETS+=lh_hybrid_spinlock \
@@ -59,6 +61,9 @@ lh_hybrid_spinlock_fastdequeue: ../../ext/linux/hybrid_spinlock_fastdequeue.h in
5961
lh_osq_lock: ../../ext/linux/osq_lock.h ../../ext/linux/include/lk_atomics.h ../../ext/linux/include/lk_barrier.h ../../ext/linux/include/lk_cmpxchg.h include/atomics.h src/lockhammer.c
6062
${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS}
6163

64+
lh_clh_spinlock: ../../ext/sms/clh_spinlock.h ../../ext/sms/base/build_config.h ../../ext/sms/base/cpu.h ../../ext/sms/base/llsc.h src/lockhammer.c
65+
${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS}
66+
6267
lh_queued_spinlock: ../../ext/linux/queued_spinlock.h include/atomics.h ../../ext/linux/include/lk_atomics.h src/lockhammer.c
6368
${CC} ${CFLAGS} -DATOMIC_TEST=\"$<\" src/lockhammer.c -o build/$@ ${LDFLAGS}
6469

benchmarks/lockhammer/include/atomics.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@
3434
#ifndef __LH_ATOMICS_H_
3535
#define __LH_ATOMICS_H_
3636

37-
#ifndef initialize_lock
38-
#define initialize_lock(lock, thread)
39-
#endif
40-
#ifndef parse_test_args
41-
#define parse_test_args(args, argc, argv)
42-
#endif
4337

4438
static inline void spin_wait (unsigned long wait_iter) {
4539
#if defined(__aarch64__)

benchmarks/lockhammer/include/lockhammer.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@
3232
#ifndef __LOCKHAMMER_H__
3333
#define __LOCKHAMMER_H__
3434

35+
36+
#ifndef initialize_lock
37+
#define initialize_lock(lock, thread)
38+
#endif
39+
#ifndef parse_test_args
40+
#define parse_test_args(args, argc, argv)
41+
#endif
42+
#ifndef thread_local_init
43+
#define thread_local_init(smtid)
44+
#endif
45+
3546
enum units { NS,
3647
INSTS };
3748
typedef enum units Units;

benchmarks/lockhammer/scripts/lh_sweeptest_cfg.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ sweeptest:
6666
- lh_swap_mutex
6767
- lh_tbb_spin_rw_mutex
6868
- lh_ticket_spinlock
69+
- lh_clh_spinlock
6970
cmd_aarch64: [lh_hybrid_spinlock, lh_hybrid_spinlock_fastdequeue]
7071
cmd_x86_64:
7172
repeat: 9

benchmarks/lockhammer/scripts/runall_obsolete.sh

Lines changed: 0 additions & 53 deletions
This file was deleted.

benchmarks/lockhammer/src/lockhammer.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ void* hmr(void *ptr)
448448
synchronize_threads(&calibrate_lock, nthrds);
449449
}
450450

451+
thread_local_init(mycore);
452+
451453
#ifdef DDEBUG
452454
printf("%ld %ld\n", hold_count, post_count);
453455
#endif

ext/sms/base/build_config.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright (c) 2017 ARM Limited. All rights reserved.
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#pragma once
5+
6+
// Architecture detection is inferred from the toolchain. This relies on
7+
// the C compiler's system-specific macros.
8+
#if defined(__aarch64__)
9+
#define CONFIG_ARCH_ARM_V8
10+
#define CONFIG_ARCH_64BIT
11+
#elif defined(__arm__)
12+
#define CONFIG_ARCH_ARM_V7
13+
#define CONFIG_ARCH_32BIT
14+
#elif defined(__x86_64__)
15+
#define CONFIG_ARCH_X86_64
16+
#define CONFIG_ARCH_64BIT
17+
#elif defined(__i386__)
18+
#define CONFIG_ARCH_X86
19+
#define CONFIG_ARCH_32BIT
20+
#endif
21+
22+
#if !defined(CONFIG_ARCH_64BIT) && !defined(CONFIG_ARCH_32BIT)
23+
#error Please add support for N-bit computing to build_config.h
24+
// If you experience this C pre-processor error, take a look at the place
25+
// in this file where CONFIG_ARCH_64/32BIT are defined. If there are no issues
26+
// there and you are needing to add support for a new N-bit processor, please
27+
// search the source code for all occurances of CONFIG_ARCH_64BIT and
28+
// CONFIG_ARCH_32BIT to check whether further modification is necessary.
29+
// These places will not necessarily #error for unsupported N-bit computing.
30+
#endif
31+
32+
// OS detection is also inferred from the toolchain.
33+
#if defined(__APPLE__)
34+
#define OS_MACOSX 1
35+
#elif defined(__linux__)
36+
#define OS_LINUX 1
37+
#elif defined(__FreeBSD__)
38+
#define OS_FREEBSD 1
39+
#endif
40+
41+
#if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD)
42+
#define OS_POSIX 1
43+
#endif
44+
45+
#define MAX_THREADS 32
46+
47+
//Use LL/SC atomic primitives instead of __atomic_compare_exchange built-ins
48+
//This seems to be the most performant option on ARM but may violate
49+
//recommendations by the ARM architecture (e.g. no memory accesses between
50+
//LL and SC)
51+
//USE_LLSC overrides the use of __atomic_compare_exchange
52+
#ifdef __ARM_ARCH
53+
#define USE_LLSC
54+
#endif
55+
56+
//Use barrier + relaxed store (DMB;STR) instead of store-release (STRL)
57+
//This is more performant on Cortex-A57 and possibly also on Cortex-A53
58+
#if defined(__aarch64__)
59+
#define USE_DMB
60+
#endif
61+
62+
#if defined(USE_DMB) && defined(__arm__)
63+
#error USE_DMB optimization only applies to select ARMv8 processors
64+
#endif
65+
66+
//Use ARM wait-for-event mechanism when busy polling
67+
//This will minimise interconnect transactions and often increase system-wide
68+
//performance
69+
#if defined __ARM_ARCH
70+
#define USE_WFE
71+
#if defined(__arm__)
72+
//TODO: WFE on ARMv7
73+
#undef USE_WFE
74+
#endif
75+
#endif

ext/sms/base/cpu.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright (c) 2017 ARM Limited. All rights reserved.
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#pragma once
5+
6+
#ifndef CACHE_LINE
7+
// Default CPU cache line size
8+
#define CACHE_LINE 128
9+
#endif
10+
11+
static inline void doze(void)
12+
{
13+
#if defined(__ARM_ARCH)
14+
// YIELD hints the CPU to switch to another thread if available
15+
// but otherwise executes as a NOP
16+
// ISB flushes the pipeline, then restarts. This is guaranteed to stall
17+
// the CPU a number of cycles
18+
__asm__ volatile("isb" : : : "memory");
19+
#elif defined(__x86_64__)
20+
__asm__ volatile("pause" : : : "memory");
21+
#else
22+
#error Please add support for your CPU in cpu.h
23+
#endif
24+
}
25+
26+
int num_cpus(void);
27+
28+
unsigned long cpu_hz(void);

0 commit comments

Comments
 (0)