Skip to content

Commit 5205c3d

Browse files
james-c-linaroacmel
authored andcommitted
perf tests: Don't retest sections in "Object code reading"
We already only test each kcore map once, but on slow systems (particularly with network filesystems) even the non-kcore maps are slow. The test can test the same objdump output over and over which only wastes time. Generalize the skipping mechanism to track all DSOs and addresses so that each section is only tested once. On a fully loaded ARM Juno (simulating a parallel 'perf test' run) with a network filesystem, the original runtime is: real 1m51.126s user 0m19.445s sys 1m15.431s And the new runtime is: real 0m48.873s user 0m8.031s sys 0m32.353s Committer testing: # perf test "code read" 22: Object code reading : Ok # Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: James Clark <james.clark@linaro.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Leo Yan <leo.yan@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 0a75ba3 commit 5205c3d

File tree

1 file changed

+85
-34
lines changed

1 file changed

+85
-34
lines changed

tools/perf/tests/code-reading.c

Lines changed: 85 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <errno.h>
33
#include <linux/kconfig.h>
44
#include <linux/kernel.h>
5+
#include <linux/rbtree.h>
56
#include <linux/types.h>
67
#include <inttypes.h>
78
#include <stdlib.h>
@@ -39,11 +40,64 @@
3940
#define BUFSZ 1024
4041
#define READLEN 128
4142

42-
struct state {
43-
u64 done[1024];
44-
size_t done_cnt;
43+
struct tested_section {
44+
struct rb_node rb_node;
45+
u64 addr;
46+
char path[PATH_MAX];
4547
};
4648

49+
static bool tested_code_insert_or_exists(const char *path, u64 addr,
50+
struct rb_root *tested_sections)
51+
{
52+
struct rb_node **node = &tested_sections->rb_node;
53+
struct rb_node *parent = NULL;
54+
struct tested_section *data;
55+
56+
while (*node) {
57+
int cmp;
58+
59+
parent = *node;
60+
data = rb_entry(*node, struct tested_section, rb_node);
61+
cmp = strcmp(path, data->path);
62+
if (!cmp) {
63+
if (addr < data->addr)
64+
cmp = -1;
65+
else if (addr > data->addr)
66+
cmp = 1;
67+
else
68+
return true; /* already tested */
69+
}
70+
71+
if (cmp < 0)
72+
node = &(*node)->rb_left;
73+
else
74+
node = &(*node)->rb_right;
75+
}
76+
77+
data = zalloc(sizeof(*data));
78+
if (!data)
79+
return true;
80+
81+
data->addr = addr;
82+
strlcpy(data->path, path, sizeof(data->path));
83+
rb_link_node(&data->rb_node, parent, node);
84+
rb_insert_color(&data->rb_node, tested_sections);
85+
return false;
86+
}
87+
88+
static void tested_sections__free(struct rb_root *root)
89+
{
90+
while (!RB_EMPTY_ROOT(root)) {
91+
struct rb_node *node = rb_first(root);
92+
struct tested_section *ts = rb_entry(node,
93+
struct tested_section,
94+
rb_node);
95+
96+
rb_erase(node, root);
97+
free(ts);
98+
}
99+
}
100+
47101
static size_t read_objdump_chunk(const char **line, unsigned char **buf,
48102
size_t *buf_len)
49103
{
@@ -316,13 +370,15 @@ static void dump_buf(unsigned char *buf, size_t len)
316370
}
317371

318372
static int read_object_code(u64 addr, size_t len, u8 cpumode,
319-
struct thread *thread, struct state *state)
373+
struct thread *thread,
374+
struct rb_root *tested_sections)
320375
{
321376
struct addr_location al;
322377
unsigned char buf1[BUFSZ] = {0};
323378
unsigned char buf2[BUFSZ] = {0};
324379
size_t ret_len;
325380
u64 objdump_addr;
381+
u64 skip_addr;
326382
const char *objdump_name;
327383
char decomp_name[KMOD_DECOMP_LEN];
328384
bool decomp = false;
@@ -350,6 +406,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
350406
goto out;
351407
}
352408

409+
/*
410+
* Don't retest the same addresses. objdump struggles with kcore - try
411+
* each map only once even if the address is different.
412+
*/
413+
skip_addr = dso__is_kcore(dso) ? map__start(al.map) : al.addr;
414+
if (tested_code_insert_or_exists(dso__long_name(dso), skip_addr,
415+
tested_sections)) {
416+
pr_debug("Already tested %s @ %#"PRIx64" - skipping\n",
417+
dso__long_name(dso), skip_addr);
418+
goto out;
419+
}
420+
353421
pr_debug("On file address is: %#"PRIx64"\n", al.addr);
354422

355423
if (len > BUFSZ)
@@ -387,24 +455,6 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
387455
goto out;
388456
}
389457

390-
/* objdump struggles with kcore - try each map only once */
391-
if (dso__is_kcore(dso)) {
392-
size_t d;
393-
394-
for (d = 0; d < state->done_cnt; d++) {
395-
if (state->done[d] == map__start(al.map)) {
396-
pr_debug("kcore map tested already");
397-
pr_debug(" - skipping\n");
398-
goto out;
399-
}
400-
}
401-
if (state->done_cnt >= ARRAY_SIZE(state->done)) {
402-
pr_debug("Too many kcore maps - skipping\n");
403-
goto out;
404-
}
405-
state->done[state->done_cnt++] = map__start(al.map);
406-
}
407-
408458
objdump_name = dso__long_name(dso);
409459
if (dso__needs_decompress(dso)) {
410460
if (dso__decompress_kmodule_path(dso, objdump_name,
@@ -471,9 +521,9 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
471521
return err;
472522
}
473523

474-
static int process_sample_event(struct machine *machine,
475-
struct evlist *evlist,
476-
union perf_event *event, struct state *state)
524+
static int process_sample_event(struct machine *machine, struct evlist *evlist,
525+
union perf_event *event,
526+
struct rb_root *tested_sections)
477527
{
478528
struct perf_sample sample;
479529
struct thread *thread;
@@ -494,18 +544,20 @@ static int process_sample_event(struct machine *machine,
494544
goto out;
495545
}
496546

497-
ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
547+
ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread,
548+
tested_sections);
498549
thread__put(thread);
499550
out:
500551
perf_sample__exit(&sample);
501552
return ret;
502553
}
503554

504555
static int process_event(struct machine *machine, struct evlist *evlist,
505-
union perf_event *event, struct state *state)
556+
union perf_event *event, struct rb_root *tested_sections)
506557
{
507558
if (event->header.type == PERF_RECORD_SAMPLE)
508-
return process_sample_event(machine, evlist, event, state);
559+
return process_sample_event(machine, evlist, event,
560+
tested_sections);
509561

510562
if (event->header.type == PERF_RECORD_THROTTLE ||
511563
event->header.type == PERF_RECORD_UNTHROTTLE)
@@ -525,7 +577,7 @@ static int process_event(struct machine *machine, struct evlist *evlist,
525577
}
526578

527579
static int process_events(struct machine *machine, struct evlist *evlist,
528-
struct state *state)
580+
struct rb_root *tested_sections)
529581
{
530582
union perf_event *event;
531583
struct mmap *md;
@@ -537,7 +589,7 @@ static int process_events(struct machine *machine, struct evlist *evlist,
537589
continue;
538590

539591
while ((event = perf_mmap__read_event(&md->core)) != NULL) {
540-
ret = process_event(machine, evlist, event, state);
592+
ret = process_event(machine, evlist, event, tested_sections);
541593
perf_mmap__consume(&md->core);
542594
if (ret < 0)
543595
return ret;
@@ -637,9 +689,7 @@ static int do_test_code_reading(bool try_kcore)
637689
.uses_mmap = true,
638690
},
639691
};
640-
struct state state = {
641-
.done_cnt = 0,
642-
};
692+
struct rb_root tested_sections = RB_ROOT;
643693
struct perf_thread_map *threads = NULL;
644694
struct perf_cpu_map *cpus = NULL;
645695
struct evlist *evlist = NULL;
@@ -773,7 +823,7 @@ static int do_test_code_reading(bool try_kcore)
773823

774824
evlist__disable(evlist);
775825

776-
ret = process_events(machine, evlist, &state);
826+
ret = process_events(machine, evlist, &tested_sections);
777827
if (ret < 0)
778828
goto out_put;
779829

@@ -793,6 +843,7 @@ static int do_test_code_reading(bool try_kcore)
793843
perf_thread_map__put(threads);
794844
machine__delete(machine);
795845
perf_env__exit(&host_env);
846+
tested_sections__free(&tested_sections);
796847

797848
return err;
798849
}

0 commit comments

Comments
 (0)