Skip to content

Commit b605475

Browse files
Yangzheng BaiYangzheng Bai
authored andcommitted
Use master thread heap array instead of gcc __thread
Because each thread may finish and join randomly, some thread may reference other thread's __thread (thread local storage) even if that thread has been joined (removed). This will cause segmentation fault. Another issue is DDEBUG mode printf may cause clh_spinlock live lock if thread number is too high.
1 parent 5a9ce88 commit b605475

File tree

2 files changed

+43
-19
lines changed

2 files changed

+43
-19
lines changed

benchmarks/lockhammer/core

40.4 MB
Binary file not shown.

ext/sms/clh_spinlock.h

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,37 @@ struct clh_node
106106
unsigned long wait;
107107
} __attribute__ ((aligned (CACHE_LINE)));
108108

109+
struct clh_node_pointer
110+
{
111+
struct clh_node *ptr;
112+
} __attribute__ ((aligned (CACHE_LINE)));
113+
109114
struct clh_lock
110115
{
111116
struct clh_node node;
117+
unsigned long num_cores;
112118
struct clh_node *tail __attribute__ ((aligned(CACHE_LINE)));
113119
};
114120

115121
static bool without_wfe;
116-
static struct clh_lock global_clh_lock;
117-
static __thread struct clh_node *clh_node_ptr;
118-
static __thread struct clh_node thread_clh_node;
122+
static struct clh_lock global_clh_lock; // clh lock queue
123+
/*
124+
* Cannot use __thread thread local storage because some threads
125+
* may be joined earlier and their node may be referenced by other
126+
* threads, this will cause memory access violation. We have to
127+
* use the main thread heap and share a common C array. Two arrays
128+
* are used here, one is used as a pointer array, which is fixed
129+
* for each thread. The other is a nodepool, whose node is assigned
130+
* to each thread according to its threadid initially. Then
131+
* according to CLH algorithm, current node will reuse its previous
132+
* node as the next available node. We just update the fixed pointer
133+
* array to reflect this change. That is, each thread will retrieve
134+
* its next available node from fixed pointer array by its thread
135+
* id offset, but the pointer value may point to any node in the
136+
* CLH nodepool.
137+
*/
138+
static struct clh_node_pointer *clh_nodeptr; // clh node pointer array
139+
static struct clh_node *clh_nodepool; // clh node struct array
119140

120141
/* additional parameter to enable WFE(default) or disable WFE */
121142
static void clh_parse_args(test_args unused, int argc, char** argv) {
@@ -150,34 +171,37 @@ static inline void clh_lock_init(uint64_t *u64_lock, unsigned long num_cores)
150171
/* default tail node should be set to 0 */
151172
global_clh_lock.node.prev = NULL;
152173
global_clh_lock.node.wait = 0;
174+
global_clh_lock.num_cores = num_cores;
153175
global_clh_lock.tail = &global_clh_lock.node;
176+
154177
/* save clh_lock pointer to global u64int_t */
155-
*u64_lock = (uint64_t)&global_clh_lock; // unused
178+
*u64_lock = (uint64_t)&global_clh_lock;
179+
180+
/* calloc will initialize all memory to zero automatically */
181+
clh_nodeptr = calloc(num_cores, sizeof(struct clh_node_pointer));
182+
if (clh_nodeptr == NULL) exit(errno);
183+
clh_nodepool = calloc(num_cores, sizeof(struct clh_node));
184+
if (clh_nodepool == NULL) exit(errno);
156185

157186
#ifdef DDEBUG
158-
printf("global_clh_lock: %llx\n", (long long unsigned int) &global_clh_lock);
187+
printf("CLH: global_clh_lock=%llx\n", (long long unsigned int) &global_clh_lock);
159188
#endif
160189
}
161190

162191
static inline void clh_thread_local_init(unsigned long smtid)
163192
{
164-
/* each thread has its own local clh_node pointed by clh_node_ptr */
165-
thread_clh_node.prev = NULL;
166-
thread_clh_node.wait = 0;
167-
clh_node_ptr = &thread_clh_node;
168-
#ifdef DDEBUG
169-
printf("thread/clh_node_ptr/thread_clh_node: %lu / %llx / %llx\n", smtid,
170-
(long long unsigned int) &clh_node_ptr, (long long unsigned int) &thread_clh_node);
171-
#endif
193+
/* initialize clh node pointer array individually */
194+
clh_nodepool[smtid].wait = 1;
195+
clh_nodeptr[smtid].ptr = &clh_nodepool[smtid];
172196
}
173197

174-
static inline void clh_lock(struct clh_lock *lock, struct clh_node *node, bool use_wfe)
198+
static inline void clh_lock(struct clh_lock *lock, struct clh_node *node, bool use_wfe, unsigned long tid)
175199
{
176200
/* must set wait to 1 first, otherwise next node after new tail will not spin */
177201
node->wait = 1;
178202
struct clh_node *prev = node->prev = __atomic_exchange_n(&lock->tail, node, __ATOMIC_ACQ_REL);
179203
#ifdef DDEBUG
180-
printf("lock/prev/node: %llx->%llx\n", (long long unsigned int)prev, (long long unsigned int)node);
204+
printf("T%lu LOCK: prev<-node: %llx<-%llx\n", tid, (long long unsigned int)prev, (long long unsigned int)node);
181205
#endif
182206

183207
/* CLH spinlock: spinning on previous node's wait status */
@@ -202,7 +226,7 @@ static inline void clh_lock(struct clh_lock *lock, struct clh_node *node, bool u
202226
}
203227

204228
/* return the previous node as reused node for the next clh_lock() */
205-
static inline struct clh_node* clh_unlock(struct clh_node *node)
229+
static inline struct clh_node* clh_unlock(struct clh_node *node, unsigned long tid)
206230
{
207231
/* CLH spinlock: release current node by resetting wait status */
208232
#ifdef USE_DMB
@@ -212,7 +236,7 @@ static inline struct clh_node* clh_unlock(struct clh_node *node)
212236
__atomic_store_n(&node->wait, 0, __ATOMIC_RELEASE);
213237
#endif
214238
#ifdef DDEBUG
215-
printf("unlock/node/wait: %llx:%lu\n", (long long unsigned int)node, node->wait);
239+
printf("T%lu UNLOCK: node: %llx\n", tid, (long long unsigned int)node);
216240
#endif
217241
return node->prev;
218242
}
@@ -221,11 +245,11 @@ static inline struct clh_node* clh_unlock(struct clh_node *node)
221245
static unsigned long __attribute__((noinline))
222246
lock_acquire (uint64_t *lock, unsigned long threadnum)
223247
{
224-
clh_lock(&global_clh_lock, clh_node_ptr, !without_wfe);
248+
clh_lock(&global_clh_lock, clh_nodeptr[threadnum].ptr, !without_wfe, threadnum);
225249
return 1;
226250
}
227251

228252
static inline void lock_release (uint64_t *lock, unsigned long threadnum)
229253
{
230-
clh_node_ptr = clh_unlock(clh_node_ptr);
254+
clh_nodeptr[threadnum].ptr = clh_unlock(clh_nodeptr[threadnum].ptr, threadnum);
231255
}

0 commit comments

Comments
 (0)