@@ -106,16 +106,37 @@ struct clh_node
106106 unsigned long wait ;
107107} __attribute__ ((aligned (CACHE_LINE )));
108108
109+ struct clh_node_pointer
110+ {
111+ struct clh_node * ptr ;
112+ } __attribute__ ((aligned (CACHE_LINE )));
113+
109114struct clh_lock
110115{
111116 struct clh_node node ;
117+ unsigned long num_cores ;
112118 struct clh_node * tail __attribute__ ((aligned (CACHE_LINE )));
113119};
114120
115121static bool without_wfe ;
116- static struct clh_lock global_clh_lock ;
117- static __thread struct clh_node * clh_node_ptr ;
118- static __thread struct clh_node thread_clh_node ;
122+ static struct clh_lock global_clh_lock ; // clh lock queue
123+ /*
124+ * Cannot use __thread thread local storage because some threads
125+ * may be joined earlier and their node may be referenced by other
126+ * threads, this will cause memory access violation. We have to
127+ * use the main thread heap and share a common C array. Two arrays
128+ * are used here, one is used as a pointer array, which is fixed
129+ * for each thread. The other is a nodepool, whose node is assigned
130+ * to each thread according to its threadid initially. Then
131+ * according to CLH algorithm, current node will reuse its previous
132+ * node as the next available node. We just update the fixed pointer
133+ * array to reflect this change. That is, each thread will retrieve
134+ * its next available node from fixed pointer array by its thread
135+ * id offset, but the pointer value may point to any node in the
136+ * CLH nodepool.
137+ */
138+ static struct clh_node_pointer * clh_nodeptr ; // clh node pointer array
139+ static struct clh_node * clh_nodepool ; // clh node struct array
119140
120141/* additional parameter to enable WFE(default) or disable WFE */
121142static void clh_parse_args (test_args unused , int argc , char * * argv ) {
@@ -150,34 +171,37 @@ static inline void clh_lock_init(uint64_t *u64_lock, unsigned long num_cores)
150171 /* default tail node should be set to 0 */
151172 global_clh_lock .node .prev = NULL ;
152173 global_clh_lock .node .wait = 0 ;
174+ global_clh_lock .num_cores = num_cores ;
153175 global_clh_lock .tail = & global_clh_lock .node ;
176+
154177 /* save clh_lock pointer to global u64int_t */
155- * u64_lock = (uint64_t )& global_clh_lock ; // unused
178+ * u64_lock = (uint64_t )& global_clh_lock ;
179+
180+ /* calloc will initialize all memory to zero automatically */
181+ clh_nodeptr = calloc (num_cores , sizeof (struct clh_node_pointer ));
182+ if (clh_nodeptr == NULL ) exit (errno );
183+ clh_nodepool = calloc (num_cores , sizeof (struct clh_node ));
184+ if (clh_nodepool == NULL ) exit (errno );
156185
157186#ifdef DDEBUG
158- printf ("global_clh_lock: %llx\n" , (long long unsigned int ) & global_clh_lock );
187+ printf ("CLH: global_clh_lock= %llx\n" , (long long unsigned int ) & global_clh_lock );
159188#endif
160189}
161190
162191static inline void clh_thread_local_init (unsigned long smtid )
163192{
164- /* each thread has its own local clh_node pointed by clh_node_ptr */
165- thread_clh_node .prev = NULL ;
166- thread_clh_node .wait = 0 ;
167- clh_node_ptr = & thread_clh_node ;
168- #ifdef DDEBUG
169- printf ("thread/clh_node_ptr/thread_clh_node: %lu / %llx / %llx\n" , smtid ,
170- (long long unsigned int ) & clh_node_ptr , (long long unsigned int ) & thread_clh_node );
171- #endif
193+ /* initialize clh node pointer array individually */
194+ clh_nodepool [smtid ].wait = 1 ;
195+ clh_nodeptr [smtid ].ptr = & clh_nodepool [smtid ];
172196}
173197
174- static inline void clh_lock (struct clh_lock * lock , struct clh_node * node , bool use_wfe )
198+ static inline void clh_lock (struct clh_lock * lock , struct clh_node * node , bool use_wfe , unsigned long tid )
175199{
176200 /* must set wait to 1 first, otherwise next node after new tail will not spin */
177201 node -> wait = 1 ;
178202 struct clh_node * prev = node -> prev = __atomic_exchange_n (& lock -> tail , node , __ATOMIC_ACQ_REL );
179203#ifdef DDEBUG
180- printf ("lock/ prev/ node: %llx-> %llx\n" , (long long unsigned int )prev , (long long unsigned int )node );
204+ printf ("T%lu LOCK: prev<- node: %llx<- %llx\n" , tid , (long long unsigned int )prev , (long long unsigned int )node );
181205#endif
182206
183207 /* CLH spinlock: spinning on previous node's wait status */
@@ -202,7 +226,7 @@ static inline void clh_lock(struct clh_lock *lock, struct clh_node *node, bool u
202226}
203227
204228/* return the previous node as reused node for the next clh_lock() */
205- static inline struct clh_node * clh_unlock (struct clh_node * node )
229+ static inline struct clh_node * clh_unlock (struct clh_node * node , unsigned long tid )
206230{
207231 /* CLH spinlock: release current node by resetting wait status */
208232#ifdef USE_DMB
@@ -212,7 +236,7 @@ static inline struct clh_node* clh_unlock(struct clh_node *node)
212236 __atomic_store_n (& node -> wait , 0 , __ATOMIC_RELEASE );
213237#endif
214238#ifdef DDEBUG
215- printf ("unlock/ node/wait : %llx:%lu \n" , (long long unsigned int )node , node -> wait );
239+ printf ("T%lu UNLOCK: node: %llx\n" , tid , (long long unsigned int )node );
216240#endif
217241 return node -> prev ;
218242}
@@ -221,11 +245,11 @@ static inline struct clh_node* clh_unlock(struct clh_node *node)
221245static unsigned long __attribute__((noinline ))
222246lock_acquire (uint64_t * lock , unsigned long threadnum )
223247{
224- clh_lock (& global_clh_lock , clh_node_ptr , !without_wfe );
248+ clh_lock (& global_clh_lock , clh_nodeptr [ threadnum ]. ptr , !without_wfe , threadnum );
225249 return 1 ;
226250}
227251
228252static inline void lock_release (uint64_t * lock , unsigned long threadnum )
229253{
230- clh_node_ptr = clh_unlock (clh_node_ptr );
254+ clh_nodeptr [ threadnum ]. ptr = clh_unlock (clh_nodeptr [ threadnum ]. ptr , threadnum );
231255}
0 commit comments