Skip to content

Commit ed4e1bc

Browse files
committed
Fixed the incorrect implementation of acquire and release semantics in RISCV, as well as some syntax errors
1 parent 6563dad commit ed4e1bc

File tree

5 files changed

+137
-59
lines changed

5 files changed

+137
-59
lines changed

benchmarks/lockhammer/include/atomics.h

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ static inline unsigned long fetchadd64_acquire_release (unsigned long *ptr, unsi
131131
: "memory");
132132
#elif defined(__riscv) && !defined(USE_BUILTIN)
133133
asm volatile("amoadd.d.aqrl %[old], %[val], %[ptr]"
134-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
134+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
135135
: [val] "r" (addend)
136136
: "memory");
137137
#else
@@ -169,7 +169,7 @@ static inline unsigned long fetchadd64_acquire (unsigned long *ptr, unsigned lon
169169
: "memory");
170170
#elif defined(__riscv) && !defined(USE_BUILTIN)
171171
asm volatile("amoadd.d.aq %[old], %[val], %[ptr]"
172-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
172+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
173173
: [val] "r" (addend)
174174
: "memory");
175175
#else
@@ -208,7 +208,7 @@ static inline unsigned long fetchadd64_release (unsigned long *ptr, unsigned lon
208208
: "memory");
209209
#elif defined(__riscv) && !defined(USE_BUILTIN)
210210
asm volatile("amoadd.d.rl %[old], %[val], %[ptr]"
211-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
211+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
212212
: [val] "r" (addend)
213213
: "memory");
214214
#else
@@ -246,7 +246,7 @@ static inline unsigned long fetchadd64 (unsigned long *ptr, unsigned long addend
246246
: "memory");
247247
#elif defined(__riscv) && !defined(USE_BUILTIN)
248248
asm volatile("amoadd.d %[old], %[val], %[ptr]"
249-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
249+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
250250
: [val] "r" (addend)
251251
: "memory");
252252
#else
@@ -288,7 +288,7 @@ static inline unsigned long fetchsub64 (unsigned long *ptr, unsigned long addend
288288
#elif defined(__riscv) && !defined(USE_BUILTIN)
289289
addend = (unsigned long) (-(long) addend);
290290
asm volatile("amoadd.d %[old], %[val], %[ptr]"
291-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
291+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
292292
: [val] "r" (addend)
293293
: "memory");
294294
#else
@@ -323,8 +323,8 @@ static inline unsigned long swap64 (unsigned long *ptr, unsigned long val) {
323323
: [val] "r" (val)
324324
: "memory");
325325
#elif defined(__riscv) && !defined(USE_BUILTIN)
326-
asm volatile("amoswap.d %[old], %[val], %[ptr]"
327-
: [old] "=&r" (old), [ptr] "+m" (*ptr)
326+
asm volatile("amoswap.d.aqrl %[old], %[val], %[ptr]"
327+
: [old] "=&r" (old), [ptr] "+A" (*(ptr))
328328
: [val] "r" (val)
329329
: "memory");
330330
#else
@@ -365,16 +365,16 @@ static inline unsigned long cas64 (unsigned long *ptr, unsigned long newval, uns
365365
unsigned long tmp;
366366

367367
asm volatile ( "1: lr.d %[old], %[ptr]\n"
368-
" bne %[old], %[exp], 2f\n"
369-
" sc.d %[tmp], %[val], %[ptr]\n"
370-
" bnez %[tmp], 1b\n"
371-
"2:"
372-
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+m" (*ptr)
368+
" bne %[old], %[exp], 2f\n"
369+
" sc.d %[tmp], %[val], %[ptr]\n"
370+
" bnez %[tmp], 1b\n"
371+
"2:"
372+
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr))
373373
: [exp] "r" (expected), [val] "r" (newval)
374374
: "memory");
375375
#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas)
376376
asm volatile("amocas.d %[exp], %[val], %[ptr]"
377-
: [exp] "=&r" (old), [ptr] "+Q" (*ptr)
377+
: [exp] "=&r" (old), [ptr] "+A" (*(ptr))
378378
: "r[exp]" (expected), [val] "r" (newval)
379379
: "memory");
380380
#else
@@ -420,12 +420,12 @@ static inline unsigned long cas64_acquire (unsigned long *ptr, unsigned long val
420420
" sc.d %[tmp], %[newval], %[ptr]\n"
421421
" bnez %[tmp], 1b\n"
422422
"2:"
423-
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+m" (*ptr)
423+
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr))
424424
: [exp] "r" (exp), [newval] "r" (val)
425425
: "memory");
426426
#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas)
427427
asm volatile("amocas.d %[exp], %[val], %[ptr]"
428-
: [exp] "=&r" (old), [ptr] "+Q" (*ptr)
428+
: [exp] "=&r" (old), [ptr] "+A" (*(ptr))
429429
: "r[exp]" (exp), [val] "r" (val)
430430
: "memory");
431431
#else
@@ -471,12 +471,12 @@ static inline unsigned long cas64_release (unsigned long *ptr, unsigned long val
471471
" sc.d.rl %[tmp], %[val], %[ptr]\n"
472472
" bnez %[tmp], 1b\n"
473473
"2:"
474-
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+m" (*ptr)
474+
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr))
475475
: [exp] "r" (exp), [val] "r" (val)
476476
: "memory");
477477
#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas)
478478
asm volatile("amocas.d.rl %[exp], %[val], %[ptr]"
479-
: [exp] "=&r" (old), [ptr] "+Q" (*ptr)
479+
: [exp] "=&r" (old), [ptr] "+A" (*(ptr))
480480
: "r[exp]" (exp), [val] "r" (val)
481481
: "memory");
482482
#else
@@ -522,12 +522,12 @@ static inline unsigned long cas64_acquire_release (unsigned long *ptr, unsigned
522522
" sc.d.rl %[tmp], %[val], %[ptr]\n"
523523
" bnez %[tmp], 1b\n"
524524
"2:"
525-
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+m" (*ptr)
525+
: [old] "=&r" (old), [tmp] "=&r" (tmp), [ptr] "+A" (*(ptr))
526526
: [exp] "r" (exp), [val] "r" (val)
527527
: "memory");
528528
#elif defined(__riscv) && !defined(USE_BUILTIN) && defined(__riscv_zacas)
529529
asm volatile("amocas.d.aqrl %[exp], %[val], %[ptr]"
530-
: [exp] "=&r" (old), [ptr] "+Q" (*ptr)
530+
: [exp] "=&r" (old), [ptr] "+A" (*(ptr))
531531
: "r[exp]" (exp), [val] "r" (val)
532532
: "memory");
533533
#else

benchmarks/lockhammer/include/cpu_relax.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,6 @@ static inline void __cpu_relax(void) {
7575

7676
}
7777
}
78-
#endif
78+
#endif // CPU_RELAX_H
7979

8080
/* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */

benchmarks/lockhammer/include/perf_timer.h

Lines changed: 109 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
#include <string.h>
5353
#include <unistd.h> /* for access() */
5454
#include <math.h>
55-
#include <sys/time.h>
5655

5756
#include "atomics.h"
5857

@@ -108,7 +107,7 @@ rdtscp(void)
108107

109108
asm volatile("rdtscp" :
110109
"=a" (tsc.lo_32),
111-
"=d" (tsc.hi_32) :: "ecx");
110+
"=d" (tsc.hi_32));
112111

113112
return tsc.tsc_64;
114113
}
@@ -205,9 +204,10 @@ static inline uint64_t __attribute__((always_inline))
205204
get_raw_counter(void) {
206205
uint64_t t;
207206
asm volatile(
208-
"fence rw, rw\n"
207+
"fence.i\n"
208+
"fence r, r\n"
209209
"rdtime %0"
210-
: "=&r"(t) : : "memory");
210+
: "=r"(t) : :);
211211
return t;
212212
}
213213
#endif
@@ -221,9 +221,10 @@ timer_reset_counter()
221221
prev_tsc = rdtscp();
222222
#elif __riscv
223223
asm volatile(
224-
"fence rw, rw\n"
225-
"rdtime %0"
226-
: "=&r"(prev_tsc) : : "memory");
224+
"fence.i\n"
225+
"fence r, r\n"
226+
"rdtime %0"
227+
: "=r"(prev_tsc) : :);
227228
#endif
228229
}
229230

@@ -241,9 +242,10 @@ timer_get_counter()
241242
#elif __riscv
242243
uint64_t counter_value;
243244
asm volatile(
244-
"fence rw, rw\n"
245+
"fence.i\n"
246+
"fence r, r\n"
245247
"rdtime %0"
246-
: "=&r"(counter_value) : : "memory");
248+
: "=r"(counter_value) : :);
247249
#endif
248250
return counter_value;
249251
}
@@ -262,9 +264,10 @@ timer_get_counter_start()
262264
#elif __riscv
263265
uint64_t counter_value;
264266
asm volatile(
265-
"fence rw, rw\n"
267+
"fence.i\n"
268+
"fence r, r\n"
266269
"rdtime %0"
267-
: "=&r"(counter_value) : : "memory");
270+
: "=r"(counter_value) : :);
268271
#endif
269272
return counter_value;
270273
}
@@ -284,10 +287,13 @@ timer_get_counter_end()
284287
#elif __riscv
285288
uint64_t counter_value;
286289
asm volatile(
287-
"fence rw, rw\n"
290+
"fence rw,rw\n"
291+
"fence.i\n"
292+
"fence r, r\n"
288293
"rdtime %0\n"
289-
"fence rw, rw"
290-
: "=&r"(counter_value) : : "memory");
294+
"fence.i\n"
295+
"fence r, r"
296+
: "=r"(counter_value) : :);
291297
#endif
292298
return counter_value;
293299
}
@@ -302,35 +308,106 @@ static inline void __attribute__((always_inline))
302308
timer_init() {
303309
}
304310

305-
// this function should be implemented in one .c file
306-
unsigned long estimate_hwclock_freq(size_t n, int verbose, struct timeval target_measurement_duration);
307-
308311
static inline uint64_t __attribute__((always_inline))
309312
timer_get_timer_freq(void)
310313
{
311314
extern unsigned long hwtimer_frequency;
312315
if (hwtimer_frequency) { return hwtimer_frequency; }
313316

317+
uint64_t cnt_freq;
314318
#ifdef __aarch64__
315-
__asm__ __volatile__ ("isb; mrs %0, cntfrq_el0" : "=r" (hwtimer_frequency));
316-
#elif __x86_64__
319+
__asm__ __volatile__ ("isb; mrs %0, cntfrq_el0" : "=r" (cnt_freq));
320+
#elif __riscv
321+
// This code attempts to get the TSC frequency. The assumption made
322+
// is TSC frequency equals the CPUFreq cpuinfo_max_freq attribute
323+
// value, which is the maximum operating frequency of the processor.
324+
// However, this equality is not always true, and less so in newer CPUs.
325+
// Also, the actual TSC frequency may not exactly match any nominal
326+
// frequency attribute value provided by CPUFreq, so the chances of
327+
// this returning the correct frequency have diminished.
317328

318-
// This measures the TSC frequency over a 3 durations of 0.1 seconds.
329+
// If the CPUFreq cpuinfo_max_freq attribute is not available, this code
330+
// then tries to quickly measure it.
319331

320332
// Use --timer-frequency flag to override the frequency value.
321-
// Use --estimate-timer-frequency to measure over a longer duration.
322-
323-
const struct timeval measurement_duration = { .tv_sec = 0, .tv_usec = 100000 };
324-
325-
hwtimer_frequency = estimate_hwclock_freq(1, 0, measurement_duration);
326-
#elif __riscv
327-
const struct timeval measurement_duration = { .tv_sec = 0, .tv_usec = 100000 };
328-
329-
hwtimer_frequency = estimate_hwclock_freq(1, 0, measurement_duration);
330-
#else
331-
#error "ERROR: timer_get_timer_freq() is not implemented for this system!"
333+
// Use --estimate-timer-frequency to explicitly measure it.
334+
335+
char buf[100];
336+
FILE * f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
337+
if (f == NULL) {
338+
printf("Failed to open cpuinfo_max_freq, error %s\n",
339+
strerror(errno));
340+
uint64_t iterations = 2;
341+
uint64_t time = 0;
342+
for (uint64_t i = 0; i < iterations; i++) {
343+
uint64_t start = get_raw_counter();
344+
sleep(1);
345+
uint64_t end = get_raw_counter();
346+
time += end - start;
347+
}
348+
349+
// round down cycles
350+
uint64_t tmp = (time/iterations);
351+
unsigned long len = log10(tmp);
352+
double div = pow(10, len-2);
353+
return floor(tmp/div)*div;
354+
}
355+
while (! feof(f) && ! ferror(f)) {
356+
size_t end = fread(buf, 1, sizeof(buf) - 1, f);
357+
buf[end] = 0;
358+
}
359+
fclose(f);
360+
361+
/* The ACPI cpufreq driver reports 'base' (aka non-turbo) frequency
362+
in cpuinfo_max_freq while the intel_pstate driver reports the
363+
turbo frequency. Warn if ACPI cpufreq is not found. */
364+
365+
if (access("/sys/devices/system/cpu/cpufreq", F_OK)) {
366+
printf("cpuinfo_max_freq is not from ACPI cpufreq driver! TSC frequency is probably turbo frequency.\n");
367+
}
368+
369+
cnt_freq = strtoul(buf, NULL, 0);
370+
cnt_freq = ((cnt_freq + 5000) / 10000) * 10000; /* round to nearest 10000 kHz */
371+
cnt_freq *= 1000; /* convert KHz to Hz */
372+
#elif __x86_64_
373+
char buf[100];
374+
FILE * f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
375+
if (f == NULL) {
376+
printf("Failed to open cpuinfo_max_freq, error %s\n",
377+
strerror(errno));
378+
uint64_t iterations = 2;
379+
uint64_t time = 0;
380+
for (uint64_t i = 0; i < iterations; i++) {
381+
uint64_t start = rdtscp_start();
382+
sleep(1);
383+
uint64_t end = rdtscp_end();
384+
time += end - start;
385+
}
386+
387+
// round down cycles
388+
uint64_t tmp = (time/iterations);
389+
unsigned long len = log10(tmp);
390+
double div = pow(10, len-2);
391+
return floor(tmp/div)*div;
392+
}
393+
while (! feof(f) && ! ferror(f)) {
394+
size_t end = fread(buf, 1, sizeof(buf) - 1, f);
395+
buf[end] = 0;
396+
}
397+
fclose(f);
398+
399+
/* The ACPI cpufreq driver reports 'base' (aka non-turbo) frequency
400+
in cpuinfo_max_freq while the intel_pstate driver reports the
401+
turbo frequency. Warn if ACPI cpufreq is not found. */
402+
if (access("/sys/devices/system/cpu/cpufreq", F_OK)) {
403+
printf("cpuinfo_max_freq is not from ACPI cpufreq driver! TSC frequency is probably turbo frequency.\n");
404+
}
405+
406+
cnt_freq = strtoul(buf, NULL, 0);
407+
cnt_freq = ((cnt_freq + 5000) / 10000) * 10000; /* round to nearest 10000 kHz */
408+
cnt_freq *= 1000; /* convert KHz to Hz */
332409
#endif
333-
return hwtimer_frequency;
410+
return cnt_freq;
334411
}
335412

336413
#define TOKENS_MAX_HIGH 1000000 /* good for ~41500 cntvct cycles */

benchmarks/lockhammer/src/measure.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void dump_mem(void * p, size_t n) {
6464
}
6565
printf("\n");
6666
}
67+
6768
#ifdef __aarch64__
6869
// The --disable-outline-atomics-lse flag is only relevant to tests built
6970
// using USE_BUILTIN=1 USE_LSE=0 such that the __atomics intrinsics are

0 commit comments

Comments
 (0)