Skip to content

Commit 560578b

Browse files
committed
Always measure TSC frequency on x86
The TSC frequency is not easy to obtain. The methods that this patch replace do not always work. However, the TSC frequency is used for blackhole and test durations, so an accurate number is important. This patch improves the TSC frequency measurement routine to support sub-second samples. On every run of the program, the frequency is measured for 3 samples of 0.1 seconds, dropping the highest and lowest. Change-Id: Ifc62210b8da586fbaebf0a3f22d72bc2eebda92b Signed-off-by: jty2 <46021128+jty2@users.noreply.github.com>
1 parent edcec8a commit 560578b

File tree

2 files changed

+97
-78
lines changed

2 files changed

+97
-78
lines changed

benchmarks/lockhammer/include/perf_timer.h

Lines changed: 14 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include <string.h>
5353
#include <unistd.h> /* for access() */
5454
#include <math.h>
55+
#include <sys/time.h>
5556

5657
#include "atomics.h"
5758

@@ -265,68 +266,31 @@ static inline void __attribute__((always_inline))
265266
timer_init() {
266267
}
267268

269+
// this function should be implemented in one .c file
270+
unsigned long estimate_hwclock_freq(size_t n, int verbose, struct timeval target_measurement_duration);
271+
268272
static inline uint64_t __attribute__((always_inline))
269273
timer_get_timer_freq(void)
270274
{
271275
extern unsigned long hwtimer_frequency;
272276
if (hwtimer_frequency) { return hwtimer_frequency; }
273277

274-
uint64_t cnt_freq;
275278
#ifdef __aarch64__
276-
__asm__ __volatile__ ("isb; mrs %0, cntfrq_el0" : "=r" (cnt_freq));
279+
__asm__ __volatile__ ("isb; mrs %0, cntfrq_el0" : "=r" (hwtimer_frequency));
277280
#elif __x86_64__
278-
// This code attempts to get the TSC frequency. The assumption made
279-
// is TSC frequency equals the CPUFreq cpuinfo_max_freq attribute
280-
// value, which is the maximum operating frequency of the processor.
281-
// However, this equality is not always true, and less so in newer CPUs.
282-
// Also, the actual TSC frequency may not exactly match any nominal
283-
// frequency attribute value provided by CPUFreq, so the chances of
284-
// this returning the correct frequency have diminished.
285281

286-
// If the CPUFreq cpuinfo_max_freq attribute is not available, this code
287-
// then tries to quickly measure it.
282+
// This measures the TSC frequency over a 3 durations of 0.1 seconds.
288283

289284
// Use --timer-frequency flag to override the frequency value.
290-
// Use --estimate-timer-frequency to explicitly measure it.
291-
292-
char buf[100];
293-
FILE * f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
294-
if (f == NULL) {
295-
printf("Failed to open cpuinfo_max_freq, error %s\n",
296-
strerror(errno));
297-
uint64_t iterations = 2;
298-
uint64_t time = 0;
299-
for (uint64_t i = 0; i < iterations; i++) {
300-
uint64_t start = rdtscp_start();
301-
sleep(1);
302-
uint64_t end = rdtscp_end();
303-
time += end - start;
304-
}
305-
306-
// round down cycles
307-
uint64_t tmp = (time/iterations);
308-
unsigned long len = log10(tmp);
309-
double div = pow(10, len-2);
310-
return floor(tmp/div)*div;
311-
}
312-
while (! feof(f) && ! ferror(f)) {
313-
size_t end = fread(buf, 1, sizeof(buf) - 1, f);
314-
buf[end] = 0;
315-
}
316-
fclose(f);
317-
318-
/* The ACPI cpufreq driver reports 'base' (aka non-turbo) frequency
319-
in cpuinfo_max_freq while the intel_pstate driver reports the
320-
turbo frequency. Warn if ACPI cpufreq is not found. */
321-
if (access("/sys/devices/system/cpu/cpufreq", F_OK)) {
322-
printf("cpuinfo_max_freq is not from ACPI cpufreq driver! TSC frequency is probably turbo frequency.\n");
323-
}
324-
325-
cnt_freq = strtoul(buf, NULL, 0);
326-
cnt_freq = ((cnt_freq + 5000) / 10000) * 10000; /* round to nearest 10000 kHz */
327-
cnt_freq *= 1000; /* convert KHz to Hz */
285+
// Use --estimate-timer-frequency to measure over a longer duration.
286+
287+
const struct timeval measurement_duration = { .tv_sec = 0, .tv_usec = 100000 };
288+
289+
hwtimer_frequency = estimate_hwclock_freq(1, 0, measurement_duration);
290+
#else
291+
#error "ERROR: timer_get_timer_freq() is not implemented for this system!"
328292
#endif
329-
return cnt_freq;
293+
return hwtimer_frequency;
330294
}
331295

332296
#define TOKENS_MAX_HIGH 1000000 /* good for ~41500 cntvct cycles */

benchmarks/lockhammer/src/lockhammer.c

Lines changed: 83 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -909,11 +909,11 @@ void disable_itimer (void) {
909909

910910
static unsigned long estimate_hwtimer_freq(long cpu_num) {
911911

912-
unsigned long n = 10;
913-
unsigned long hwtimer_start, hwtimer_stop, hwtimer_diff;
914-
unsigned long hwtimer_average = 0;
912+
const unsigned long n = 10;
913+
const struct timeval duration = { .tv_sec = 1, .tv_usec = 0 };
915914

916-
printf("Estimating HW timer frequency on CPU %ld for %lu iterations\n", cpu_num, n);
915+
printf("Estimating HW timer frequency on CPU %ld for %lu iterations of %lu.%06lu seconds each\n",
916+
cpu_num, n, duration.tv_sec, duration.tv_usec);
917917

918918
cpu_set_t cpu_mask;
919919

@@ -925,30 +925,7 @@ static unsigned long estimate_hwtimer_freq(long cpu_num) {
925925
exit(-1);
926926
}
927927

928-
for (unsigned long i = 0; i < n; i++) {
929-
930-
struct timeval ts_a, ts_b, ts_diff;
931-
932-
hwtimer_start = get_raw_counter();
933-
gettimeofday(&ts_a, NULL);
934-
935-
do {
936-
gettimeofday(&ts_b, NULL);
937-
timersub(&ts_b, &ts_a, &ts_diff);
938-
} while (ts_diff.tv_sec < 1);
939-
940-
hwtimer_stop = get_raw_counter();
941-
942-
hwtimer_diff = hwtimer_stop - hwtimer_start;
943-
944-
printf("hwtimer_diff = %lu\n", hwtimer_diff);
945-
946-
hwtimer_average += hwtimer_diff;
947-
}
948-
949-
hwtimer_average /= (double) n;
950-
951-
// printf("hwtimer_average = %lu\n", hwtimer_average);
928+
unsigned long hwtimer_average = estimate_hwclock_freq(n, 1, duration);
952929

953930
return hwtimer_average;
954931
}
@@ -1016,4 +993,82 @@ static int get_next_available_cpu (cpu_set_t * p_avail_cpus, int num_cores, int
1016993
}
1017994

1018995

996+
unsigned long estimate_hwclock_freq(size_t n, int verbose, struct timeval target_measurement_duration) {
997+
998+
unsigned long hwcounter_start, hwcounter_stop, hwcounter_diff;
999+
unsigned long hwcounter_average = 0;
1000+
1001+
assert(n != 0); // can't handle only 1 sample
1002+
1003+
size_t high_i = 0, low_i = 0;
1004+
1005+
unsigned long hwcounter_freq_high = 0;
1006+
unsigned long hwcounter_freq_low = -1;
1007+
1008+
for (size_t i = 0; i < n + 2; i++) {
1009+
1010+
struct timeval ts_a, ts_b, ts_target, ts_diff;
1011+
1012+
do {
1013+
hwcounter_start = get_raw_counter();
1014+
gettimeofday(&ts_a, NULL);
1015+
1016+
timeradd(&ts_a, &target_measurement_duration, &ts_target);
1017+
1018+
do {
1019+
gettimeofday(&ts_b, NULL);
1020+
hwcounter_stop = get_raw_counter();
1021+
} while (timercmp(&ts_b, &ts_target, < ));
1022+
1023+
1024+
timersub(&ts_b, &ts_target, &ts_diff);
1025+
1026+
if (0) // expect 0.000000
1027+
printf("ts_diff = %lu.%06lu\n", ts_diff.tv_sec, ts_diff.tv_usec);
1028+
1029+
} while (ts_diff.tv_sec > 0 || ts_diff.tv_usec > 100);
1030+
1031+
hwcounter_diff = hwcounter_stop - hwcounter_start;
1032+
1033+
timersub(&ts_b, &ts_a, &ts_diff);
1034+
1035+
unsigned long hwcounter_freq =
1036+
hwcounter_diff / (ts_diff.tv_sec + ts_diff.tv_usec * 0.000001);
1037+
1038+
if (verbose) {
1039+
printf("sample %zu, hwcounter_diff = %lu, freq = %lu\n",
1040+
i, hwcounter_diff, hwcounter_freq);
1041+
}
1042+
1043+
hwcounter_average += hwcounter_freq;
1044+
1045+
if (hwcounter_freq > hwcounter_freq_high) {
1046+
hwcounter_freq_high = hwcounter_freq;
1047+
high_i = i;
1048+
}
1049+
1050+
if (hwcounter_freq < hwcounter_freq_low) {
1051+
hwcounter_freq_low = hwcounter_freq;
1052+
low_i = i;
1053+
}
1054+
1055+
}
1056+
1057+
if (verbose) {
1058+
printf("dropped sample %zu, hwcounter_freq_low = %lu\n", low_i, hwcounter_freq_low);
1059+
printf("dropped sample %zu, hwcounter_freq_high = %lu\n", high_i, hwcounter_freq_high);
1060+
}
1061+
1062+
hwcounter_average -= hwcounter_freq_low;
1063+
hwcounter_average -= hwcounter_freq_high;
1064+
1065+
hwcounter_average /= (double) n;
1066+
1067+
// printf("hwcounter_average = %lu\n", hwcounter_average);
1068+
1069+
return hwcounter_average;
1070+
}
1071+
1072+
1073+
10191074
/* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */

0 commit comments

Comments
 (0)