|
1 | 1 | /* |
2 | | - * Copyright (c) 2017, The Linux Foundation. All rights reserved. |
| 2 | + * Copyright (c) 2017-2025, The Linux Foundation. All rights reserved. |
3 | 3 | * |
4 | 4 | * SPDX-License-Identifier: BSD-3-Clause |
5 | 5 | * |
|
33 | 33 | #define __LOCKHAMMER_H__ |
34 | 34 |
|
35 | 35 |
|
36 | | -#ifndef initialize_lock |
37 | | - #define initialize_lock(lock, thread) |
| 36 | +// PROGRESS_TICK_PROFILE - prints each thread's timer value at lock_acquires milestones to show thread concurrency |
| 37 | +#define PROGRESS_TICK_PROFILE |
| 38 | + |
| 39 | +enum units { NS, |
| 40 | + INSTS, NOT_SET }; |
| 41 | +typedef enum units Units; |
| 42 | + |
| 43 | +#define _stringify(x) #x |
| 44 | +#define stringify(x) _stringify(x) |
| 45 | + |
| 46 | +// per_thread_results_t - each thread returns its results in this struct (inside thread_args_t) |
| 47 | +typedef struct { |
| 48 | + unsigned long cpu_affined; // which CPU this was pinned on. |
| 49 | + |
| 50 | + unsigned long lock_acquires;// number of locks acquired-and-released per thread |
| 51 | + unsigned long cputime_ns; // this thread's CPU time in nanoseconds |
| 52 | + unsigned long walltime_ns; // this thread's wall clock time in nanoseconds |
| 53 | + unsigned long hmrdepth; // depth=lock-specific notion of contention |
| 54 | + |
| 55 | + unsigned long hwtimer_start; // timer value at start of measurement loop |
| 56 | + unsigned long hwtimer_end; // "' at end |
| 57 | + |
| 58 | + unsigned long hwtimer_10p; // timer value at 10% of work completion |
| 59 | + unsigned long hwtimer_25p; // "" at 25% |
| 60 | + unsigned long hwtimer_50p; // "" at 50% |
| 61 | + unsigned long hwtimer_75p; // "" at 75% |
| 62 | + unsigned long hwtimer_90p; // "" at 90% |
| 63 | + |
| 64 | + // hold/post durations from calibrate_timer() |
| 65 | + double hold_ns, post_ns; |
| 66 | + |
| 67 | + // metrics only for osq_lock |
| 68 | + unsigned long osq_lock_wait_next_spins; |
| 69 | + unsigned long osq_unlock_wait_next_spins; |
| 70 | + unsigned long osq_lock_locked_spins; |
| 71 | + unsigned long osq_lock_unqueue_spins; |
| 72 | + unsigned long osq_lock_acquire_backoffs; |
| 73 | + |
| 74 | +} per_thread_results_t; |
| 75 | + |
| 76 | + |
| 77 | +// thread_args_t -- pointer to an instance of this is passed to each thread |
| 78 | +typedef struct { |
| 79 | + unsigned long thread_num; // thread number, ordinal 0 |
| 80 | + unsigned long num_threads; // number of worker threads in total for experiment |
| 81 | + unsigned long num_acquires; // -a flag, aka nacqrs, aka number of acquires per thread to do |
| 82 | + unsigned long *lock; // pointer to the lock variable |
| 83 | + |
| 84 | + unsigned long *p_start_ns; // marshal thread's monotonic start time, in ns, for computing wall_elapsed_ns; only marshall thread sets this |
| 85 | + unsigned long hold, post; // ncrit, nparallel |
| 86 | + Units hold_unit, post_unit; // NS or INSTS, hold_unit = ncrit_units, post_unit = nparallel_units |
| 87 | + unsigned long hold_count; |
| 88 | + unsigned long post_count; |
| 89 | + |
| 90 | + double tickspns; // number of ticks_per_ns |
| 91 | + |
| 92 | + unsigned long run_on_this_cpu; // logical CPU on which a worker thread is to run |
| 93 | + |
| 94 | + unsigned long run_limit_ticks; // if non-zero, the number of timer ticks to run for when using --run-limit-ticks or --run-limit-seconds |
| 95 | + unsigned long run_limit_inner_loop_iters; // the number of lock acquire/release sequences to run before checking the hwtimer when using --run-limit-ticks or --run-limit-seconds |
| 96 | + unsigned long hwtimer_frequency; |
| 97 | + |
| 98 | + int verbose; |
| 99 | + unsigned long blackhole_numtries; |
| 100 | + |
| 101 | + per_thread_results_t results; // output data structure |
| 102 | + |
| 103 | +} thread_args_t; |
| 104 | + |
| 105 | +// pinorder_t - describes a set of CPUs on which to run worker threads |
| 106 | +typedef struct { |
| 107 | + int * cpu_list; // pointer to an array of int. index into this array is the thread number, each element is the logical CPU on which that thread is to run. |
| 108 | + size_t num_threads; // number of threads defined for this pinorder (i.e. length of the number of valid entries in the pinorder array). |
| 109 | +} pinorder_t; |
| 110 | + |
| 111 | + |
| 112 | +typedef struct { |
| 113 | + unsigned long t; // duration time, either in nanoseconds or iterations |
| 114 | + Units unit; // duration unit, either NS or INSTS |
| 115 | +} duration_t; |
| 116 | + |
| 117 | +// test_args_t - mostly command line parameters |
| 118 | +typedef struct { |
| 119 | + unsigned long num_acquires; // -a number of acquires (not documented?) |
| 120 | + duration_t * crits; // -c, --cn=, --ci= critical duration |
| 121 | + duration_t * pars; // -p, --pn=, --pi= parallel duration |
| 122 | + size_t num_crits; |
| 123 | + size_t num_pars; |
| 124 | + unsigned long ileave; // -i interleave value for SMT pinning |
| 125 | + int scheduling_policy; // -S use explicit scheduling policy |
| 126 | + size_t num_pinorders; |
| 127 | + pinorder_t * pinorders; // -o CPU pinning order |
| 128 | + unsigned long timeout_usec; // -A timeout_usec |
| 129 | + |
| 130 | + int hugepagesz; |
| 131 | + int use_mmap; |
| 132 | + int mmap_hugepage_offset_exists; |
| 133 | + int print_hugepage_physaddr; |
| 134 | + size_t mmap_hugepage_offset; |
| 135 | + size_t mmap_hugepage_physaddr; |
| 136 | + unsigned long hwtimer_frequency; |
| 137 | + unsigned long probed_hwtimer_frequency; |
| 138 | + long estimate_hwtimer_freq_cpu; |
| 139 | + |
| 140 | + double run_limit_seconds; |
| 141 | + unsigned long run_limit_ticks; |
| 142 | + unsigned long run_limit_inner_loop_iters; |
| 143 | + int ignore_unknown_scaling_governor; |
| 144 | + int suppress_cpu_frequency_warnings; |
| 145 | + const char * cpuorder_filename; |
| 146 | +#ifdef JSON_OUTPUT |
| 147 | + const char * json_output_filename; |
38 | 148 | #endif |
39 | | -#ifndef parse_test_args |
40 | | - #define parse_test_args(args, argc, argv) |
| 149 | +#ifdef __aarch64__ |
| 150 | + char disable_outline_atomics_lse; |
41 | 151 | #endif |
42 | | -#ifndef thread_local_init |
43 | | - #define thread_local_init(smtid) |
| 152 | + int verbose; |
| 153 | + size_t iterations; |
| 154 | + size_t blackhole_numtries; |
| 155 | +} test_args_t; |
| 156 | + |
| 157 | +// system_info_t - system configuration data |
| 158 | +typedef struct { |
| 159 | + unsigned long num_cores; // number of processors configured by the operating system |
| 160 | + size_t page_size_bytes; // page size in bytes |
| 161 | + size_t erg_bytes; // number of bytes per exclusive reservation granule (e.g. cache line/block) |
| 162 | + |
| 163 | + cpu_set_t avail_cores; // cores that the CPU affinity mask allows us to run on |
| 164 | + size_t num_avail_cores; // number of cores that the CPU affinity mask allows us to run on |
| 165 | + size_t num_online_cores; // the number of cores that getconf _NPROCESSORS_ONLN returns |
| 166 | + |
| 167 | + // num_online_cores can be less than num_cores because some may be offline or not permitted by affinity mask |
| 168 | + // num_avail_cores may be less than num_online_cores because some online cores may be isolated |
| 169 | +} system_info_t; |
| 170 | + |
| 171 | +// locks_t -- pointers to the actual locks to be used |
| 172 | +typedef struct { |
| 173 | + unsigned long * p_test_lock; // address of main lock |
| 174 | + unsigned long * p_ready_lock; // lock to synchronize all threads' entry into hmr() |
| 175 | + unsigned long * p_sync_lock; // lock to synchronize before blackhole cabliration |
| 176 | + unsigned long * p_calibrate_lock; // lock to synchronize after blackhole calibration |
| 177 | +} locks_t; |
| 178 | + |
| 179 | +// calibrate_blackhole -- (used in osq_lock) |
| 180 | +unsigned long calibrate_blackhole(unsigned long target, unsigned long tokens_low, unsigned long tokens_high, unsigned long core_id, unsigned long NUMTRIES); |
| 181 | + |
| 182 | +// evaluate_blackhole -- returns average duration of NUMTRIES |
| 183 | +int64_t evaluate_blackhole( const unsigned long tokens_mid, const unsigned long NUMTRIES); |
| 184 | + |
| 185 | +// blackhole() -- runs a small loop to consume time (also used in osq_lock) |
| 186 | +void blackhole(unsigned long iters); |
| 187 | + |
| 188 | +// measure_setup_initialize_lock() -- calls lock-specific setup routine if it exists |
| 189 | +void measure_setup_initialize_lock(locks_t * p_locks, pinorder_t * pinorder); |
| 190 | + |
| 191 | +// measure_setup_parse_test_args() -- calls lock-specific parsing routine if it exists |
| 192 | +void measure_setup_parse_test_args(test_args_t * p_test_args, int argc, char ** argv); |
| 193 | + |
| 194 | +// convert the struct timespec to only nanoseconds |
| 195 | +unsigned long timespec_to_ns (struct timespec * ts); |
| 196 | + |
| 197 | +// selectively disable LSE instructions in outline atomics/libgcc; in measure.c |
| 198 | +void handle_disable_outline_atomics_lse(void); |
| 199 | + |
| 200 | +#if __GNUC__==1 |
| 201 | +#define NOINLINE __attribute__((noinline)) |
| 202 | +#elif __clang__==1 |
| 203 | +#define NOINLINE __attribute__((noinline)) |
| 204 | +#else |
| 205 | +#define NOINLINE |
44 | 206 | #endif |
45 | 207 |
|
46 | | -enum units { NS, |
47 | | - INSTS }; |
48 | | -typedef enum units Units; |
| 208 | +#if __GNUC__==1 |
| 209 | +#define NO_UNROLL_LOOP _Pragma("GCC unroll 0") |
| 210 | +#elif __clang__==1 |
| 211 | +#define NO_UNROLL_LOOP _Pragma("clang loop unroll(disable)") |
| 212 | +#else |
| 213 | +#define NO_UNROLL_LOOP |
| 214 | +#endif |
49 | 215 |
|
50 | | -struct thread_args { |
51 | | - unsigned long ncores; |
52 | | - unsigned long nthrds; |
53 | | - unsigned long ileave; |
54 | | - unsigned long iter; |
55 | | - unsigned long *lock; |
56 | | - unsigned long *rst; |
57 | | - unsigned long *nsec; |
58 | | - unsigned long *real_nsec; |
59 | | - unsigned long *depth; |
60 | | - unsigned long *nstart; |
61 | | - unsigned long hold, post; |
62 | | - Units hold_unit, post_unit; |
63 | | - double tickspns; |
64 | | - int *pinorder; |
65 | | -}; |
66 | | -typedef struct thread_args thread_args; |
67 | | - |
68 | | -struct test_args { |
69 | | - unsigned long nthrds; |
70 | | - unsigned long nacqrs; |
71 | | - unsigned long ncrit; |
72 | | - Units ncrit_units; |
73 | | - unsigned long nparallel; |
74 | | - Units nparallel_units; |
75 | | - unsigned long ileave; |
76 | | - unsigned char safemode; |
77 | | - int *pinorder; |
78 | | -}; |
79 | | -typedef struct test_args test_args; |
80 | 216 |
|
81 | 217 | #endif |
| 218 | + |
| 219 | +/* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |
0 commit comments