From 73534617dfa3c4cd95fe5ffaeff5315e9ffc2de6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2022 14:06:12 +0200 Subject: perf build: Fix btf__load_from_kernel_by_id() feature check The btf__load_from_kernel_by_id() only takes one arg, not two. Committer notes: I tested it just with an older libbpf, one where btf__load_from_kernel_by_id() wasn't introduced yet. A test with a newer dynamic libbpf would fail because the btf__load_from_kernel_by_id() is there, but takes just one arg. Fixes: 0ae065a5d265bc5a ("perf build: Fix check for btf__load_from_kernel_by_id() in libbpf") Signed-off-by: Jiri Olsa Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/YozLKby7ITEtchC9@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c index f7c084428735..a17647f7d5a4 100644 --- a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c +++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include int main(void) { - return btf__load_from_kernel_by_id(20151128, NULL); + btf__load_from_kernel_by_id(20151128); + return 0; } -- cgit v1.2.3 From 5c83eff38194ab2c69a7dc1a64a0a3683f0a3c3a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2022 13:04:43 +0200 Subject: perf build: Stop using __weak bpf_prog_load() to handle older libbpf versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By adding a feature test for bpf_prog_load() and providing a fallback if it isn't present in older versions of libbpf. Committer testing: $ rpm -q libbpf-devel libbpf-devel-0.4.0-2.fc35.x86_64 $ make -C tools/perf LIBBPF_DYNAMIC=1 O=/tmp/build/perf install-bin $ cat /tmp/build/perf/feature/test-libbpf-bpf_prog_load.make.output test-libbpf-bpf_prog_load.c: In function ‘main’: test-libbpf-bpf_prog_load.c:6:16: error: implicit declaration of function ‘bpf_prog_load’ [-Werror=implicit-function-declaration] 6 | return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */, | ^~~~~~~~~~~~~ cc1: all warnings being treated as errors $ $ objdump -dS /tmp/build/perf/perf | grep ':' -A20 00000000005b2d70 : { 5b2d70: 55 push %rbp 5b2d71: 48 89 ce mov %rcx,%rsi 5b2d74: 4c 89 c8 mov %r9,%rax 5b2d77: 49 89 d2 mov %rdx,%r10 5b2d7a: 4c 89 c2 mov %r8,%rdx 5b2d7d: 48 89 e5 mov %rsp,%rbp 5b2d80: 48 83 ec 18 sub $0x18,%rsp 5b2d84: 64 48 8b 0c 25 28 00 mov %fs:0x28,%rcx 5b2d8b: 00 00 5b2d8d: 48 89 4d f8 mov %rcx,-0x8(%rbp) 5b2d91: 31 c9 xor %ecx,%ecx return bpf_load_program(prog_type, insns, insn_cnt, license, 5b2d93: 41 8b 49 5c mov 0x5c(%r9),%ecx 5b2d97: 51 push %rcx 5b2d98: 4d 8b 49 60 mov 0x60(%r9),%r9 5b2d9c: 4c 89 d1 mov %r10,%rcx 5b2d9f: 44 8b 40 1c mov 0x1c(%rax),%r8d 5b2da3: e8 f8 aa e5 ff call 40d8a0 } $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/YozLKby7ITEtchC9@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-bpf_prog_load.c | 9 +++++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/bpf-event.c | 12 +++++++----- 5 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 tools/build/feature/test-libbpf-bpf_prog_load.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c6a48d0ef9ff..fa5f7b7d722c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -99,6 +99,7 @@ FEATURE_TESTS_EXTRA := \ clang \ libbpf \ libbpf-btf__load_from_kernel_by_id \ + libbpf-bpf_prog_load \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cb4a2a4fa2e4..b3fdcc6c5dd7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -58,6 +58,7 @@ FILES= \ test-bpf.bin \ test-libbpf.bin \ test-libbpf-btf__load_from_kernel_by_id.bin \ + test-libbpf-bpf_prog_load.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -291,6 +292,9 @@ $(OUTPUT)test-libbpf.bin: $(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-bpf_prog_load.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-bpf_prog_load.c b/tools/build/feature/test-libbpf-bpf_prog_load.c new file mode 100644 index 000000000000..47f516d63ebc --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_prog_load.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */, + NULL /* license */, NULL /* insns */, + 0 /* insn_cnt */, NULL /* opts */); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e0304e70f182..63c92fdb1df4 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -573,11 +573,16 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID endif + $(call feature_check,libbpf-bpf_prog_load) + ifeq ($(feature-libbpf-bpf_prog_load), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 8271ab764eb5..289bb55aede1 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -35,11 +35,12 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) } #endif -int __weak bpf_prog_load(enum bpf_prog_type prog_type, - const char *prog_name __maybe_unused, - const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +#ifndef HAVE_LIBBPF_BPF_PROG_LOAD +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name __maybe_unused, + const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -47,6 +48,7 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type, opts->kern_version, opts->log_buf, opts->log_size); #pragma GCC diagnostic pop } +#endif struct bpf_program * __weak bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) -- cgit v1.2.3 From 8916d72554e5f06df5ba17bfabc87c7977294ba4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2022 13:09:42 +0200 Subject: perf build: Stop using __weak bpf_object__next_program() to handle older libbpf versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By adding a feature test for bpf_object__next_program() and providing a fallback if it isn't present in older versions of libbpf. Committer testing: $ rpm -q libbpf-devel libbpf-devel-0.4.0-2.fc35.x86_64 $ make -C tools/perf LIBBPF_DYNAMIC=1 O=/tmp/build/perf install-bin $ cat /tmp/build/perf/feature/test-libbpf-bpf_object__next_program.make.output test-libbpf-bpf_object__next_program.c: In function ‘main’: test-libbpf-bpf_object__next_program.c:6:9: error: implicit declaration of function ‘bpf_object__next_program’; did you mean ‘bpf_object__unpin_programs’? [-Werror=implicit-function-declaration] 6 | bpf_object__next_program(NULL /* obj */, NULL /* prev */); | ^~~~~~~~~~~~~~~~~~~~~~~~ | bpf_object__unpin_programs cc1: all warnings being treated as errors $ $ objdump -dS /tmp/build/perf/perf | grep ':' -A20 00000000005b2dc0 : { 5b2dc0: 55 push %rbp 5b2dc1: 48 89 e5 mov %rsp,%rbp 5b2dc4: 48 83 ec 10 sub $0x10,%rsp 5b2dc8: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 5b2dcf: 00 00 5b2dd1: 48 89 45 f8 mov %rax,-0x8(%rbp) 5b2dd5: 31 c0 xor %eax,%eax return bpf_program__next(prev, obj); 5b2dd7: 48 8b 45 f8 mov -0x8(%rbp),%rax 5b2ddb: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 5b2de2: 00 00 5b2de4: 75 0f jne 5b2df5 } 5b2de6: c9 leave 5b2de7: 49 89 f8 mov %rdi,%r8 5b2dea: 48 89 f7 mov %rsi,%rdi return bpf_program__next(prev, obj); 5b2ded: 4c 89 c6 mov %r8,%rsi 5b2df0: e9 3b b4 e5 ff jmp 40e230 $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/YozLKby7ITEtchC9@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-bpf_object__next_program.c | 8 ++++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/bpf-event.c | 4 +++- 5 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-bpf_object__next_program.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index fa5f7b7d722c..64f8cfa6c9af 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -100,6 +100,7 @@ FEATURE_TESTS_EXTRA := \ libbpf \ libbpf-btf__load_from_kernel_by_id \ libbpf-bpf_prog_load \ + libbpf-bpf_object__next_program \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b3fdcc6c5dd7..6eb829704cb9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -59,6 +59,7 @@ FILES= \ test-libbpf.bin \ test-libbpf-btf__load_from_kernel_by_id.bin \ test-libbpf-bpf_prog_load.bin \ + test-libbpf-bpf_object__next_program.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -295,6 +296,9 @@ $(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: $(OUTPUT)test-libbpf-bpf_prog_load.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-bpf_object__next_program.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-bpf_object__next_program.c b/tools/build/feature/test-libbpf-bpf_object__next_program.c new file mode 100644 index 000000000000..8bf4fd26b545 --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_object__next_program.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + bpf_object__next_program(NULL /* obj */, NULL /* prev */); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63c92fdb1df4..046e10f65ae7 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -577,12 +577,17 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-bpf_prog_load), 1) CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD endif + $(call feature_check,libbpf-bpf_object__next_program) + ifeq ($(feature-libbpf-bpf_object__next_program), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 289bb55aede1..c68d88ca1ece 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -50,7 +50,8 @@ int bpf_prog_load(enum bpf_prog_type prog_type, } #endif -struct bpf_program * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM +struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { #pragma GCC diagnostic push @@ -58,6 +59,7 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) return bpf_program__next(prev, obj); #pragma GCC diagnostic pop } +#endif struct bpf_map * __weak bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) -- cgit v1.2.3 From 739c9180cfa487cc92e2721e224564e4672c578e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2022 13:13:17 +0200 Subject: perf build: Stop using __weak bpf_object__next_map() to handle older libbpf versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By adding a feature test for bpf_object__next_map() and providing a fallback if it isn't present in older versions of libbpf. Committer testing: $ rpm -q libbpf-devel libbpf-devel-0.4.0-2.fc35.x86_64 $ make -C tools/perf LIBBPF_DYNAMIC=1 O=/tmp/build/perf install-bin $ cat /tmp/build/perf/feature/test-libbpf-bpf_object__next_map.make.output test-libbpf-bpf_object__next_map.c: In function ‘main’: test-libbpf-bpf_object__next_map.c:6:9: error: implicit declaration of function ‘bpf_object__next_map’; did you mean ‘bpf_object__next’? [-Werror=implicit-function-declaration] 6 | bpf_object__next_map(NULL /* obj */, NULL /* prev */); | ^~~~~~~~~~~~~~~~~~~~ | bpf_object__next cc1: all warnings being treated as errors $ $ objdump -dS /tmp/build/perf/perf | grep ':' -A20 00000000005b2e00 : { 5b2e00: 55 push %rbp 5b2e01: 48 89 e5 mov %rsp,%rbp 5b2e04: 48 83 ec 10 sub $0x10,%rsp 5b2e08: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 5b2e0f: 00 00 5b2e11: 48 89 45 f8 mov %rax,-0x8(%rbp) 5b2e15: 31 c0 xor %eax,%eax return bpf_map__next(prev, obj); 5b2e17: 48 8b 45 f8 mov -0x8(%rbp),%rax 5b2e1b: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 5b2e22: 00 00 5b2e24: 75 0f jne 5b2e35 } 5b2e26: c9 leave 5b2e27: 49 89 f8 mov %rdi,%r8 5b2e2a: 48 89 f7 mov %rsi,%rdi return bpf_map__next(prev, obj); 5b2e2d: 4c 89 c6 mov %r8,%rsi 5b2e30: e9 cb b1 e5 ff jmp 40e000 $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/YozLKby7ITEtchC9@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-bpf_object__next_map.c | 8 ++++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/bpf-event.c | 4 +++- 5 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-bpf_object__next_map.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 64f8cfa6c9af..34cf2bff72ca 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -101,6 +101,7 @@ FEATURE_TESTS_EXTRA := \ libbpf-btf__load_from_kernel_by_id \ libbpf-bpf_prog_load \ libbpf-bpf_object__next_program \ + libbpf-bpf_object__next_map \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 6eb829704cb9..aecb6a28a770 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -60,6 +60,7 @@ FILES= \ test-libbpf-btf__load_from_kernel_by_id.bin \ test-libbpf-bpf_prog_load.bin \ test-libbpf-bpf_object__next_program.bin \ + test-libbpf-bpf_object__next_map.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -299,6 +300,9 @@ $(OUTPUT)test-libbpf-bpf_prog_load.bin: $(OUTPUT)test-libbpf-bpf_object__next_program.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-bpf_object__next_map.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-bpf_object__next_map.c b/tools/build/feature/test-libbpf-bpf_object__next_map.c new file mode 100644 index 000000000000..64adb519e97e --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_object__next_map.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + bpf_object__next_map(NULL /* obj */, NULL /* prev */); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 046e10f65ae7..a7a80e2280bd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -581,6 +581,10 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-bpf_object__next_program), 1) CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM endif + $(call feature_check,libbpf-bpf_object__next_map) + ifeq ($(feature-libbpf-bpf_object__next_map), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif @@ -588,6 +592,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index c68d88ca1ece..91c4c5dcab7f 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -61,7 +61,8 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) } #endif -struct bpf_map * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP +struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { #pragma GCC diagnostic push @@ -69,6 +70,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) return bpf_map__next(prev, obj); #pragma GCC diagnostic pop } +#endif const void * __weak btf__raw_data(const struct btf *btf_ro, __u32 *size) -- cgit v1.2.3 From 982be4775164d4780d9c6a2f38b365f5b5bd16d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2022 13:16:20 +0200 Subject: perf build: Stop using __weak btf__raw_data() to handle older libbpf versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By adding a feature test for btf__raw_data() and providing a fallback if it isn't present in older versions of libbpf. Committer testing: $ rpm -q libbpf-devel libbpf-devel-0.4.0-2.fc35.x86_64 $ make -C tools/perf LIBBPF_DYNAMIC=1 O=/tmp/build/perf install-bin $ cat /tmp/build/perf/feature/test-libbpf-btf__raw_data.make.output test-libbpf-btf__raw_data.c: In function ‘main’: test-libbpf-btf__raw_data.c:6:9: error: implicit declaration of function ‘btf__raw_data’; did you mean ‘btf__get_raw_data’? [-Werror=implicit-function-declaration] 6 | btf__raw_data(NULL /* btf_ro */, NULL /* size */); | ^~~~~~~~~~~~~ | btf__get_raw_data cc1: all warnings being treated as errors $ objdump -dS /tmp/build/perf/perf | grep ':' -A20 00000000005b3050 : { 5b3050: 55 push %rbp 5b3051: 48 89 e5 mov %rsp,%rbp 5b3054: 48 83 ec 10 sub $0x10,%rsp 5b3058: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 5b305f: 00 00 5b3061: 48 89 45 f8 mov %rax,-0x8(%rbp) 5b3065: 31 c0 xor %eax,%eax return btf__get_raw_data(btf_ro, size); 5b3067: 48 8b 45 f8 mov -0x8(%rbp),%rax 5b306b: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 5b3072: 00 00 5b3074: 75 06 jne 5b307c } 5b3076: c9 leave return btf__get_raw_data(btf_ro, size); 5b3077: e9 14 99 e5 ff jmp 40c990 5b307c: e8 af a7 e5 ff call 40d830 <__stack_chk_fail@plt> 5b3081: 66 66 2e 0f 1f 84 00 data16 cs nopw 0x0(%rax,%rax,1) 5b3088: 00 00 00 00 $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/YozLKby7ITEtchC9@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-btf__raw_data.c | 8 ++++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/bpf-event.c | 4 +++- 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-btf__raw_data.c (limited to 'tools') diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index aecb6a28a770..5b31a6d063d7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -61,6 +61,7 @@ FILES= \ test-libbpf-bpf_prog_load.bin \ test-libbpf-bpf_object__next_program.bin \ test-libbpf-bpf_object__next_map.bin \ + test-libbpf-btf__raw_data.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -303,6 +304,9 @@ $(OUTPUT)test-libbpf-bpf_object__next_program.bin: $(OUTPUT)test-libbpf-bpf_object__next_map.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-btf__raw_data.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-btf__raw_data.c b/tools/build/feature/test-libbpf-btf__raw_data.c new file mode 100644 index 000000000000..57da31dd7581 --- /dev/null +++ b/tools/build/feature/test-libbpf-btf__raw_data.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + btf__raw_data(NULL /* btf_ro */, NULL /* size */); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a7a80e2280bd..c27fd00865c5 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -585,6 +585,10 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-bpf_object__next_map), 1) CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP endif + $(call feature_check,libbpf-btf__raw_data) + ifeq ($(feature-libbpf-btf__raw_data), 1) + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif @@ -593,6 +597,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA endif endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 91c4c5dcab7f..eee64ddb766d 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -72,7 +72,8 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) } #endif -const void * __weak +#ifndef HAVE_LIBBPF_BTF__RAW_DATA +const void * btf__raw_data(const struct btf *btf_ro, __u32 *size) { #pragma GCC diagnostic push @@ -80,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size) return btf__get_raw_data(btf_ro, size); #pragma GCC diagnostic pop } +#endif static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { -- cgit v1.2.3 From df76e0038370d364d4b2154fa7ddbbccc29f629f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 26 May 2022 11:48:58 -0300 Subject: perf build: Stop using __weak bpf_map_create() to handle older libbpf versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By adding a feature test for bpf_map_create() and providing a fallback if it isn't present in older versions of libbpf. This also fixes the build with torvalds/master at this point: $ git log --oneline -5 torvalds/master babf0bb978e3c9fc (torvalds/master) Merge tag 'xfs-5.19-for-linus' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux e375780b631a5fc2 Merge tag 'fsnotify_for_v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs 8b728edc5be16179 Merge tag 'fs_for_v5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs 3f306ea2e18568f6 Merge tag 'dma-mapping-5.19-2022-05-25' of git://git.infradead.org/users/hch/dma-mapping fbe86daca0ba878b Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi $ Coping with: $ git log --oneline -2 d16495a982324f75 d16495a982324f75 libbpf: remove bpf_create_map*() APIs e2371b1632b1c61c libbpf: start 1.0 development cycle $ As the __weak function fails to build as it calls the now removed bpf_create_map() API. Testing: $ rpm -q libbpf-devel libbpf-devel-0.4.0-2.fc35.x86_64 $ $ make -C tools/perf BUILD_BPF_SKEL=1 LIBBPF_DYNAMIC=1 O=/tmp/build/perf install-bin $ cat /tmp/build/perf/feature/test-libbpf-bpf_map_create.make.output test-libbpf-bpf_map_create.c: In function ‘main’: test-libbpf-bpf_map_create.c:6:16: error: implicit declaration of function ‘bpf_map_create’; did you mean ‘bpf_map_freeze’? [-Werror=implicit-function-declaration] 6 | return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */, | ^~~~~~~~~~~~~~ | bpf_map_freeze test-libbpf-bpf_map_create.c:6:87: error: expected expression before ‘,’ token 6 | return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */, | ^ cc1: all warnings being treated as errors $ $ objdump -dS /tmp/build/perf/perf | grep ':' -A20 000000000058b290 : { 58b290: 55 push %rbp 58b291: 48 89 e5 mov %rsp,%rbp 58b294: 48 83 ec 10 sub $0x10,%rsp 58b298: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax 58b29f: 00 00 58b2a1: 48 89 45 f8 mov %rax,-0x8(%rbp) 58b2a5: 31 c0 xor %eax,%eax return bpf_create_map(map_type, key_size, value_size, max_entries, 0); 58b2a7: 48 8b 45 f8 mov -0x8(%rbp),%rax 58b2ab: 64 48 2b 04 25 28 00 sub %fs:0x28,%rax 58b2b2: 00 00 58b2b4: 75 10 jne 58b2c6 } 58b2b6: c9 leave 58b2b7: 89 d6 mov %edx,%esi 58b2b9: 89 ca mov %ecx,%edx 58b2bb: 44 89 c1 mov %r8d,%ecx return bpf_create_map(map_type, key_size, value_size, max_entries, 0); 58b2be: 45 31 c0 xor %r8d,%r8d $ Cc: Andrii Nakryiko Cc: Heiko Carstens Cc: Ian Rogers Cc: Ilya Leoshkevich Cc: Jiri Olsa Cc: Song Liu Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/linux-perf-users/Yo+XvQNKL4K5khl2@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libbpf-bpf_map_create.c | 8 ++++++++ tools/perf/Makefile.config | 5 +++++ tools/perf/util/bpf_counter.c | 6 +++++- 5 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tools/build/feature/test-libbpf-bpf_map_create.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 34cf2bff72ca..888a0421d43b 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -102,6 +102,7 @@ FEATURE_TESTS_EXTRA := \ libbpf-bpf_prog_load \ libbpf-bpf_object__next_program \ libbpf-bpf_object__next_map \ + libbpf-bpf_create_map \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 5b31a6d063d7..7c2a17e23c30 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -59,6 +59,7 @@ FILES= \ test-libbpf.bin \ test-libbpf-btf__load_from_kernel_by_id.bin \ test-libbpf-bpf_prog_load.bin \ + test-libbpf-bpf_map_create.bin \ test-libbpf-bpf_object__next_program.bin \ test-libbpf-bpf_object__next_map.bin \ test-libbpf-btf__raw_data.bin \ @@ -298,6 +299,9 @@ $(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: $(OUTPUT)test-libbpf-bpf_prog_load.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-bpf_map_create.bin: + $(BUILD) -lbpf + $(OUTPUT)test-libbpf-bpf_object__next_program.bin: $(BUILD) -lbpf diff --git a/tools/build/feature/test-libbpf-bpf_map_create.c b/tools/build/feature/test-libbpf-bpf_map_create.c new file mode 100644 index 000000000000..b9f550e332c8 --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_map_create.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */, + 0 /* value_size */, 0 /* max_entries */, NULL /* opts */); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c27fd00865c5..73e0762092fe 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,6 +589,10 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-btf__raw_data), 1) CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA endif + $(call feature_check,libbpf-bpf_map_create) + ifeq ($(feature-libbpf-bpf_map_create), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif @@ -598,6 +602,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE endif endif diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index d4931f54e1dd..ef1c15e4aeba 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -312,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } -int __weak +#ifndef HAVE_LIBBPF_BPF_MAP_CREATE +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +int bpf_map_create(enum bpf_map_type map_type, const char *map_name __maybe_unused, __u32 key_size, @@ -325,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type, return bpf_create_map(map_type, key_size, value_size, max_entries, 0); #pragma GCC diagnostic pop } +#endif static int bperf_lock_attr_map(struct target *target) { -- cgit v1.2.3 From 1097b38fb7583789ac5e33f3fefb1404bd087f99 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:22 +0300 Subject: perf intel-pt: Add a test for system-wide side band Add a test for system-wide side band even when tracing selected CPUs. The test fails before the patches up to "perf tools: Allow system-wide events to keep their own CPUs" are applied, passes afterwards. Signed-off-by: Adrian Hunter Tested-by: Ian Rogers Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_intel_pt.sh | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100755 tools/perf/tests/shell/test_intel_pt.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh new file mode 100755 index 000000000000..a3298643884d --- /dev/null +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# Miscellaneous Intel PT testing +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# Skip if no Intel PT +perf list | grep -q 'intel_pt//' || exit 2 + +skip_cnt=0 +ok_cnt=0 +err_cnt=0 + +tmpfile=`mktemp` +perfdatafile=`mktemp` + +can_cpu_wide() +{ + perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2 + return 0 +} + +test_system_wide_side_band() +{ + # Need CPU 0 and CPU 1 + can_cpu_wide 0 || return $? + can_cpu_wide 1 || return $? + + # Record on CPU 0 a task running on CPU 1 + perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname + + # Should get MMAP events from CPU 1 because they can be needed to decode + mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l` + + if [ ${mmap_cnt} -gt 0 ] ; then + return 0 + fi + + echo "Failed to record MMAP events on CPU 1 when tracing CPU 0" + return 1 +} + +count_result() +{ + if [ $1 -eq 2 ] ; then + skip_cnt=`expr ${skip_cnt} \+ 1` + return + fi + if [ $1 -eq 0 ] ; then + ok_cnt=`expr ${ok_cnt} \+ 1` + return + fi + err_cnt=`expr ${err_cnt} \+ 1` +} + +test_system_wide_side_band + +count_result $? + +rm -f ${tmpfile} +rm -f ${perfdatafile} + +if [ ${err_cnt} -gt 0 ] ; then + exit 1 +fi + +if [ ${ok_cnt} -gt 0 ] ; then + exit 0 +fi + +exit 2 -- cgit v1.2.3 From d01508f2df21d6793f1642b20d4c1fe2f1c7fcba Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:23 +0300 Subject: perf auxtrace: Add mmap_needed to auxtrace_mmap_params Add mmap_needed to auxtrace_mmap_params. Currently an auxtrace mmap is always attempted even if the event is not an auxtrace event. That works because, when AUX area tracing, there is always an auxtrace event first for every mmap. Prepare for that not being the case, which it won't be when sideband tracking events are allowed on all CPUs even when auxtrace is limited to selected CPUs. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Ian Rogers Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 10 ++++++++-- tools/perf/util/auxtrace.h | 11 +++++++++-- tools/perf/util/evlist.c | 5 +++-- tools/perf/util/mmap.c | 1 + 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index b11549ae39df..b446cfa66469 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->tid = mp->tid; mm->cpu = mp->cpu.cpu; - if (!mp->len) { + if (!mp->len || !mp->mmap_needed) { mm->base = NULL; return 0; } @@ -168,9 +168,15 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, } void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, + struct evlist *evlist, + struct evsel *evsel, int idx, bool per_cpu) { + mp->mmap_needed = evsel->needs_auxtrace_mmap; + + if (!mp->mmap_needed) + return; + mp->idx = idx; if (per_cpu) { diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index dc38b6f57232..695591b73ae1 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -344,6 +344,10 @@ struct auxtrace_mmap { * @idx: index of this mmap * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu * mmap) otherwise %0 + * @mmap_needed: set to %false for non-auxtrace events. This is needed because + * auxtrace mmapping is done in the same code path as non-auxtrace + * mmapping but not every evsel that needs non-auxtrace mmapping + * also needs auxtrace mmapping. * @cpu: cpu number for a per-cpu mmap otherwise %-1 */ struct auxtrace_mmap_params { @@ -353,6 +357,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; + bool mmap_needed; struct perf_cpu cpu; }; @@ -490,7 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, + struct evlist *evlist, + struct evsel *evsel, int idx, bool per_cpu); typedef int (*process_auxtrace_t)(struct perf_tool *tool, @@ -863,7 +869,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, + struct evlist *evlist, + struct evsel *evsel, int idx, bool per_cpu); #define ITRACE_HELP "" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7f9f588e88c6..9e0fabfb096d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -747,15 +747,16 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, - struct perf_evsel *_evsel __maybe_unused, + struct perf_evsel *_evsel, struct perf_mmap_param *_mp, int idx) { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus); + struct evsel *evsel = container_of(_evsel, struct evsel, core); - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx, per_cpu); } static struct perf_mmap* diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 50502b4a7ca4..de59c4da852b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -62,6 +62,7 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, struct evlist *evlist __maybe_unused, + struct evsel *evsel __maybe_unused, int idx __maybe_unused, bool per_cpu __maybe_unused) { -- cgit v1.2.3 From 84bd5aba88af7b6ec46ea88e01588f93c6aa782f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:24 +0300 Subject: perf auxtrace: Remove auxtrace_mmap_params__set_idx() per_cpu parameter Remove auxtrace_mmap_params__set_idx() per_cpu parameter because it isn't needed. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.c | 5 +++-- tools/perf/util/auxtrace.h | 6 ++---- tools/perf/util/evlist.c | 3 +-- tools/perf/util/mmap.c | 3 +-- 4 files changed, 7 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index b446cfa66469..ac4e4660932d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -169,9 +169,10 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, - struct evsel *evsel, int idx, - bool per_cpu) + struct evsel *evsel, int idx) { + bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + mp->mmap_needed = evsel->needs_auxtrace_mmap; if (!mp->mmap_needed) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 695591b73ae1..cd0d25c2751c 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -496,8 +496,7 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, - struct evsel *evsel, int idx, - bool per_cpu); + struct evsel *evsel, int idx); typedef int (*process_auxtrace_t)(struct perf_tool *tool, struct mmap *map, @@ -870,8 +869,7 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, - struct evsel *evsel, int idx, - bool per_cpu); + struct evsel *evsel, int idx); #define ITRACE_HELP "" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9e0fabfb096d..157867bc337a 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -753,10 +753,9 @@ perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); - bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus); struct evsel *evsel = container_of(_evsel, struct evsel, core); - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx, per_cpu); + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx); } static struct perf_mmap* diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index de59c4da852b..a4dff881be39 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -63,8 +63,7 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, struct evlist *evlist __maybe_unused, struct evsel *evsel __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) + int idx __maybe_unused) { } -- cgit v1.2.3 From 82944899149d2ea77c920333364dd0a6de0015ba Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:25 +0300 Subject: perf evlist: Factor out evlist__dummy_event() Factor out evlist__dummy_event() so it can be reused. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 157867bc337a..efad0e691045 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int evlist__add_dummy(struct evlist *evlist) +static struct evsel *evlist__dummy_event(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); + + return evsel__new_idx(&attr, evlist->core.nr_entries); +} + +int evlist__add_dummy(struct evlist *evlist) +{ + struct evsel *evsel = evlist__dummy_event(evlist); if (evsel == NULL) return -ENOMEM; -- cgit v1.2.3 From 126d68fdcabed8c2ca5ffaba785add93ef722da8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:26 +0300 Subject: perf evlist: Add evlist__add_dummy_on_all_cpus() Add evlist__add_dummy_on_all_cpus() to enable creating a system-wide dummy event that sets up the system-wide maps before map propagation. For convenience, add evlist__add_aux_dummy() so that the logic can be used whether or not the event needs to be system-wide. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Ian Rogers Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist.h | 5 +++++ 2 files changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index efad0e691045..48af7d379d82 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -264,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist) return 0; } +static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) +{ + evsel->core.system_wide = true; + + /* + * All CPUs. + * + * Note perf_event_open() does not accept CPUs that are not online, so + * in fact this CPU list will include only all online CPUs. + */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + /* No threads */ + perf_thread_map__put(evsel->core.threads); + evsel->core.threads = perf_thread_map__new_dummy(); + + evlist__add(evlist, evsel); +} + +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel = evlist__dummy_event(evlist); + + if (!evsel) + return NULL; + + evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_guest = 1; + evsel->core.attr.exclude_hv = 1; + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); + + if (system_wide) + evlist__add_on_all_cpus(evlist, evsel); + else + evlist__add(evlist, evsel); + + return evsel; +} + static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4062f5aebfc1..1bde9ccf4e7d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist); struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); +static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) +{ + return evlist__add_aux_dummy(evlist, true); +} int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); -- cgit v1.2.3 From 921e3be5a5648f483f80c9ba21ca2942d82d581c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:27 +0300 Subject: perf record: Use evlist__add_dummy_on_all_cpus() in record__config_text_poke() Use evlist__add_dummy_on_all_cpus() in record__config_text_poke() in preparation for allowing system-wide events on all CPUs while the user requested events are on only user requested CPUs. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a5cf6a99d67f..c8a79f3a8dff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -869,7 +869,6 @@ static int record__auxtrace_init(struct record *rec __maybe_unused) static int record__config_text_poke(struct evlist *evlist) { struct evsel *evsel; - int err; /* Nothing to do if text poke is already configured */ evlist__for_each_entry(evlist, evsel) { @@ -877,27 +876,13 @@ static int record__config_text_poke(struct evlist *evlist) return 0; } - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; - - evsel = evlist__last(evlist); + evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!evsel) + return -ENOMEM; - evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = 1; evsel->core.attr.text_poke = 1; evsel->core.attr.ksymbol = 1; - - evsel->core.system_wide = true; - evsel->no_aux_samples = true; evsel->immediate = true; - - /* Text poke must be collected on all CPUs */ - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = perf_cpu_map__new(NULL); - perf_cpu_map__put(evsel->core.cpus); - evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); - evsel__set_sample_bit(evsel, TIME); return 0; -- cgit v1.2.3 From e665c82a769164a976add4d793e91079ec3d1bae Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:28 +0300 Subject: perf intel-pt: Use evlist__add_dummy_on_all_cpus() for switch tracking Use evlist__add_dummy_on_all_cpus() for switch tracking in preparation for allowing system-wide events on all CPUs while the user requested events are on only user requested CPUs. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 2eaac4638aab..0ee93894a0da 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -811,18 +811,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + switch_evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!switch_evsel) + return -ENOMEM; - switch_evsel = evlist__last(evlist); - - switch_evsel->core.attr.freq = 0; - switch_evsel->core.attr.sample_period = 1; switch_evsel->core.attr.context_switch = 1; - - switch_evsel->core.system_wide = true; - switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; evsel__set_sample_bit(switch_evsel, TID); -- cgit v1.2.3 From 7d189cadbeebc778fe19c245447d155458e6f1e3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:29 +0300 Subject: perf intel-pt: Track sideband system-wide when needed User space tasks can migrate between CPUs, so when tracing selected CPUs, sideband for all CPUs is still needed. This is in preparation for allowing system-wide events on all CPUs while the user requested events are on only user requested CPUs. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/intel-pt.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 0ee93894a0da..06c2cdfd8f2f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -864,20 +864,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { + bool need_system_wide_tracking; struct evsel *tracking_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + /* + * User space tasks can migrate between CPUs, so when tracing + * selected CPUs, sideband for all CPUs is still needed. + */ + need_system_wide_tracking = evlist->core.has_user_cpus && + !intel_pt_evsel->core.attr.exclude_user; - tracking_evsel = evlist__last(evlist); + tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); + if (!tracking_evsel) + return -ENOMEM; evlist__set_tracking_event(evlist, tracking_evsel); - tracking_evsel->core.attr.freq = 0; - tracking_evsel->core.attr.sample_period = 1; - - tracking_evsel->no_aux_samples = true; if (need_immediate) tracking_evsel->immediate = true; -- cgit v1.2.3 From 7be1fedd2a0a5b8f20952a675c611815254b74b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:30 +0300 Subject: perf tools: Allow all_cpus to be a superset of user_requested_cpus To support collection of system-wide events with user requested CPUs, all_cpus must be a superset of user_requested_cpus. In order to support all_cpus to be a superset of user_requested_cpus, all_cpus must be used instead of user_requested_cpus when dealing with CPUs of all events instead of CPUs of requested events. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 12 ++++++------ tools/perf/builtin-record.c | 18 ++++++++++++------ tools/perf/util/auxtrace.c | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index ed66f2e38464..ec0e4b5da874 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -298,7 +298,7 @@ add: int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -430,7 +430,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -540,7 +540,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -565,8 +565,8 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus); - if (perf_cpu_map__empty(evlist->user_requested_cpus)) + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__empty(evlist->all_cpus)) nr_mmaps = perf_thread_map__nr(evlist->threads); return nr_mmaps; @@ -577,7 +577,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->user_requested_cpus; + const struct perf_cpu_map *cpus = evlist->all_cpus; if (!ops || !ops->get || !ops->mmap) return -EINVAL; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c8a79f3a8dff..cf9a7ce429df 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -967,14 +967,20 @@ static void record__thread_data_close_pipes(struct record_thread *thread_data) } } +static bool evlist__per_thread(struct evlist *evlist) +{ + return cpu_map__is_dummy(evlist->core.user_requested_cpus); +} + static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) { int m, tm, nr_mmaps = evlist->core.nr_mmaps; struct mmap *mmap = evlist->mmap; struct mmap *overwrite_mmap = evlist->overwrite_mmap; - struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = evlist->core.all_cpus; + bool per_thread = evlist__per_thread(evlist); - if (cpu_map__is_dummy(cpus)) + if (per_thread) thread_data->nr_mmaps = nr_mmaps; else thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, @@ -995,7 +1001,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { - if (cpu_map__is_dummy(cpus) || + if (per_thread || test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { if (thread_data->maps) { thread_data->maps[tm] = &mmap[m]; @@ -1870,7 +1876,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize cpu map.\n"); @@ -3668,12 +3674,12 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu static int record__init_thread_masks(struct record *rec) { int ret = 0; - struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus); - if (cpu_map__is_dummy(cpus)) { + if (evlist__per_thread(rec->evlist)) { pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); return -EINVAL; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ac4e4660932d..511dd3caa1bc 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -181,7 +181,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else -- cgit v1.2.3 From ae4f8ae16a07896403c90305d4b9be27f657c1fc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:31 +0300 Subject: libperf evlist: Allow mixing per-thread and per-cpu mmaps mmap_per_evsel() will skip events that do not match the CPU, so all CPUs can be iterated in any case. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index ec0e4b5da874..eae1f6179dad 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -512,29 +512,6 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, return 0; } -static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->threads); - - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - static int mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) @@ -565,9 +542,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; + /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + if (perf_cpu_map__empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -577,7 +559,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->all_cpus; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -596,9 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, ops, mp); - return mmap_per_cpu(evlist, ops, mp); } -- cgit v1.2.3 From 4ce47d842d4c16c07b135b8a7975b8f0672bcc0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:32 +0300 Subject: libperf evlist: Check nr_mmaps is correct Print an error message if the predetermined number of mmaps is incorrect. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index eae1f6179dad..f51fdb899d19 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include #include #include +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -428,7 +429,7 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; @@ -484,6 +485,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { @@ -518,6 +521,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, { int nr_threads = perf_thread_map__nr(evlist->threads); int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -526,11 +530,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: -- cgit v1.2.3 From d3345fecf9e5f63be7946a1e5bf1f5695c67b445 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:33 +0300 Subject: perf stat: Add requires_cpu flag for uncore Uncore events require a CPU i.e. it cannot be -1. The evsel system_wide flag is intended for events that should be on every CPU, which does not make sense for uncore events because uncore events do not map one-to-one with CPUs. These 2 requirements are not exactly the same, so introduce a new flag 'requires_cpu' for the uncore case. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 4 +++- tools/lib/perf/include/internal/evsel.h | 1 + tools/perf/builtin-stat.c | 5 +---- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index f51fdb899d19..1c801f8da44f 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -43,7 +43,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (!evsel->own_cpus || evlist->has_user_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) { + } else if (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus)) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd7968e..77fbb8b97e5c 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -50,6 +50,7 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; bool system_wide; + bool requires_cpu; int idx; }; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e6cc8bdf061..4ce87a8eb7d7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -382,9 +382,6 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ if (!counter->supported) return -ENOENT; - if (counter->core.system_wide) - nthreads = 1; - for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; @@ -2261,7 +2258,7 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->core.system_wide && + if (!counter->core.requires_cpu && strcmp(counter->name, "duration_time")) { return; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef169ad15236..050b1c69a738 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -409,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; + evsel->core.requires_cpu = orig->core.requires_cpu; if (orig->name) { evsel->name = strdup(orig->name); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 30a9d915853d..7ed235740431 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -365,7 +365,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->core.system_wide = pmu ? pmu->is_uncore : false; + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) -- cgit v1.2.3 From f5fb6d4efe15a2f0d2c0c175c3827ac594023996 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:34 +0300 Subject: libperf evsel: Add comments for booleans Add comments for 'system_wide' and 'requires_cpu' booleans Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Ian Rogers Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/evsel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 77fbb8b97e5c..2a912a1f1989 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -49,7 +49,17 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Map propagation will set cpus to + * this event's own_cpus, whereby they will contribute to evlist + * all_cpus. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ bool requires_cpu; int idx; }; -- cgit v1.2.3 From 298613b8e3f68a1aef2370cd6a9dad462b6c0457 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:35 +0300 Subject: perf tools: Allow system-wide events to keep their own CPUs Currently, user_requested_cpus supplants system-wide CPUs when the evlist has_user_cpus. Change that so that system-wide events retain their own CPUs and they are added to all_cpus. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 1c801f8da44f..9a6801b53274 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -40,12 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (!evsel->system_wide && - !evsel->requires_cpu && - perf_cpu_map__empty(evlist->user_requested_cpus)) { + if (!evsel->own_cpus || + (!evsel->system_wide && evlist->has_user_cpus) || + (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus))) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { -- cgit v1.2.3 From a41e24f6c3ffdd001f976f9bd76634f2163715f5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:36 +0300 Subject: perf tools: Allow system-wide events to keep their own threads System-wide events do not have threads, so do not propagate threads to them. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9a6801b53274..e6c98a6e3908 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -52,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); + if (!evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } -- cgit v1.2.3 From 7473ee56dbc91c9803e7a80768e71de02ab0b54d Mon Sep 17 00:00:00 2001 From: Claire Jensen Date: Tue, 24 May 2022 22:38:12 -0700 Subject: perf test: Add checking for perf stat CSV output. Counts expected fields for various commands. No testing added for summary mode since it is broken. An example of the summary output is: summary,263831,,instructions:u,1435072,100.0,0.46,insn per cycle ,,,,,1.37,stalled cycles per insn This should be: summary,263831,,instructions:u,1435072,100.0,0.46,insn per cycle summary,,,,,,1.37,stalled cycles per insn The output has 7 fields when it should have 8. Additionally, the newline spacing is wrong, so it was excluded from testing until a fix is made. Committer testing: $ perf test "perf stat CSV output" 88: perf stat CSV output linter : Ok $ $ perf test -v "perf stat CSV output" Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc 88: perf stat CSV output linter : --- start --- test child forked, pid 2622839 Checking CSV output: no args [Success] Checking CSV output: system wide [Skip] paranoid and not root Checking CSV output: system wide [Skip] paranoid and not root Checking CSV output: interval [Success] Checking CSV output: event [Success] Checking CSV output: per core [Skip] paranoid and not root Checking CSV output: per thread [Skip] paranoid and not root Checking CSV output: per die [Skip] paranoid and not root Checking CSV output: per node [Skip] paranoid and not root Checking CSV output: per socket [Skip] paranoid and not root test child finished with 0 ---- end ---- perf stat CSV output linter: Ok $ I did a s/parnoia/paranoid/g on the [Skip] lines. Signed-off-by: Claire Jensen Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Alyssa Ross Cc: Claire Jensen Cc: Florian Fischer Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Like Xu Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Stephane Eranian Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20220525053814.3265216-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/perf_csv_output_lint.py | 48 +++++++ tools/perf/tests/shell/stat+csv_output.sh | 147 +++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 tools/perf/tests/shell/lib/perf_csv_output_lint.py create mode 100755 tools/perf/tests/shell/stat+csv_output.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py new file mode 100644 index 000000000000..714f283cfb1b --- /dev/null +++ b/tools/perf/tests/shell/lib/perf_csv_output_lint.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import sys + +# Basic sanity check of perf CSV output as specified in the man page. +# Currently just checks the number of fields per line in output. + +ap = argparse.ArgumentParser() +ap.add_argument('--no-args', action='store_true') +ap.add_argument('--interval', action='store_true') +ap.add_argument('--system-wide-no-aggr', action='store_true') +ap.add_argument('--system-wide', action='store_true') +ap.add_argument('--event', action='store_true') +ap.add_argument('--per-core', action='store_true') +ap.add_argument('--per-thread', action='store_true') +ap.add_argument('--per-die', action='store_true') +ap.add_argument('--per-node', action='store_true') +ap.add_argument('--per-socket', action='store_true') +ap.add_argument('--separator', default=',', nargs='?') +args = ap.parse_args() + +Lines = sys.stdin.readlines() + +def check_csv_output(exp): + for line in Lines: + if 'failed' not in line: + count = line.count(args.separator) + if count != exp: + sys.stdout.write(''.join(Lines)) + raise RuntimeError(f'wrong number of fields. expected {exp} in {line}') + +try: + if args.no_args or args.system_wide or args.event: + expected_items = 6 + elif args.interval or args.per_thread or args.system_wide_no_aggr: + expected_items = 7 + elif args.per_core or args.per_socket or args.per_node or args.per_die: + expected_items = 8 + else: + ap.print_help() + raise RuntimeError('No checking option specified') + check_csv_output(expected_items) + +except: + sys.stdout.write('Test failed for input: ' + ''.join(Lines)) + raise diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh new file mode 100755 index 000000000000..983220ef3cb4 --- /dev/null +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# perf stat CSV output linter +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Tests various perf stat CSV output commands for the +# correct number of fields and the CSV separator set to ','. + +set -e + +pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py +if [ "x$PYTHON" == "x" ] +then + if which python3 > /dev/null + then + PYTHON=python3 + elif which python > /dev/null + then + PYTHON=python + else + echo Skipping test, python not detected please set environment variable PYTHON. + exit 2 + fi +fi + +# Return true if perf_event_paranoid is > $1 and not running as root. +function ParanoidAndNotRoot() +{ + [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] +} + +check_no_args() +{ + echo -n "Checking CSV output: no args " + perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args + echo "[Success]" +} + +check_system_wide() +{ + echo -n "Checking CSV output: system wide " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide + echo "[Success]" +} + +check_system_wide_no_aggr() +{ + echo -n "Checking CSV output: system wide " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + echo -n "Checking CSV output: system wide no aggregation " + perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr + echo "[Success]" +} + +check_interval() +{ + echo -n "Checking CSV output: interval " + perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval + echo "[Success]" +} + + +check_event() +{ + echo -n "Checking CSV output: event " + perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event + echo "[Success]" +} + +check_per_core() +{ + echo -n "Checking CSV output: per core " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core + echo "[Success]" +} + +check_per_thread() +{ + echo -n "Checking CSV output: per thread " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread + echo "[Success]" +} + +check_per_die() +{ + echo -n "Checking CSV output: per die " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die + echo "[Success]" +} + +check_per_node() +{ + echo -n "Checking CSV output: per node " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node + echo "[Success]" +} + +check_per_socket() +{ + echo -n "Checking CSV output: per socket " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket + echo "[Success]" +} + +check_no_args +check_system_wide +check_system_wide_no_aggr +check_interval +check_event +check_per_core +check_per_thread +check_per_die +check_per_node +check_per_socket +exit 0 -- cgit v1.2.3 From 303ead45c4459df9e38d4f2bd38ef6238c5898de Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:20 -0700 Subject: perf report: Do not extend sample type of bpf-output event Currently evsel__new_idx() sets more sample_type bits when it finds a BPF-output event. But it should honor what's recorded in the perf data file rather than blindly sets the bits. Otherwise it could lead to a parse error when it recorded with a modified sample_type. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 050b1c69a738..a0d5753e363e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -296,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (evsel__is_bpf_output(evsel)) { - evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + if (evsel__is_bpf_output(evsel) && !attr->sample_type) { + evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } -- cgit v1.2.3 From edc41a1099c2d08ccfd4ed7d59688501e3749015 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:21 -0700 Subject: perf record: Enable off-cpu analysis with BPF Add --off-cpu option to enable the off-cpu profiling with BPF. It'd use a bpf_output event and rename it to "offcpu-time". Samples will be synthesized at the end of the record session using data from a BPF map which contains the aggregated off-cpu time at context switches. So it needs root privilege to get the off-cpu profiling. Each sample will have a separate user stacktrace so it will skip kernel threads. The sample ip will be set from the stacktrace and other sample data will be updated accordingly. Currently it only handles some basic sample types. The sample timestamp is set to a dummy value just not to bother with other events during the sorting. So it has a very big initial value and increase it on processing each samples. Good thing is that it can be used together with regular profiling like cpu cycles. If you don't want to that, you can use a dummy event to enable off-cpu profiling only. Example output: $ sudo perf record --off-cpu perf bench sched messaging -l 1000 $ sudo perf report --stdio --call-graph=no # Total Lost Samples: 0 # # Samples: 41K of event 'cycles' # Event count (approx.): 42137343851 ... # Samples: 1K of event 'offcpu-time' # Event count (approx.): 587990831640 # # Children Self Command Shared Object Symbol # ........ ........ ............... .................. ......................... # 81.66% 0.00% sched-messaging libc-2.33.so [.] __libc_start_main 81.66% 0.00% sched-messaging perf [.] cmd_bench 81.66% 0.00% sched-messaging perf [.] main 81.66% 0.00% sched-messaging perf [.] run_builtin 81.43% 0.00% sched-messaging perf [.] bench_sched_messaging 40.86% 40.86% sched-messaging libpthread-2.33.so [.] __read 37.66% 37.66% sched-messaging libpthread-2.33.so [.] __write 2.91% 2.91% sched-messaging libc-2.33.so [.] __poll ... As you can see it spent most of off-cpu time in read and write in bench_sched_messaging(). The --call-graph=no was added just to make the output concise here. It uses perf hooks facility to control BPF program during the record session rather than adding new BPF/off-cpu specific calls. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 10 ++ tools/perf/Makefile.perf | 1 + tools/perf/builtin-record.c | 25 ++++ tools/perf/util/Build | 1 + tools/perf/util/bpf_off_cpu.c | 204 +++++++++++++++++++++++++++++++ tools/perf/util/bpf_skel/off_cpu.bpf.c | 139 +++++++++++++++++++++ tools/perf/util/off_cpu.h | 24 ++++ 7 files changed, 404 insertions(+) create mode 100644 tools/perf/util/bpf_off_cpu.c create mode 100644 tools/perf/util/bpf_skel/off_cpu.bpf.c create mode 100644 tools/perf/util/off_cpu.h (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 465be4e62a17..b4e9ef7edfef 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -758,6 +758,16 @@ include::intel-hybrid.txt[] If the URLs is not specified, the value of DEBUGINFOD_URLS system environment variable is used. +--off-cpu:: + Enable off-cpu profiling with BPF. The BPF program will collect + task scheduling information with (user) stacktrace and save them + as sample data of a software event named "offcpu-time". The + sample period will have the time the task slept in nanoseconds. + + Note that BPF can collect stack traces using frame pointer ("fp") + only, as of now. So the applications built without the frame + pointer might see bogus addresses. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6e5aded855cc..8f738e11356d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1038,6 +1038,7 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h +SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index cf9a7ce429df..b76f57e3ec73 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,6 +49,7 @@ #include "util/clockid.h" #include "util/pmu-hybrid.h" #include "util/evlist-hybrid.h" +#include "util/off_cpu.h" #include "asm/bug.h" #include "perf.h" #include "cputopo.h" @@ -162,6 +163,7 @@ struct record { bool buildid_mmap; bool timestamp_filename; bool timestamp_boundary; + bool off_cpu; struct switch_output switch_output; unsigned long long samples; unsigned long output_max_size; /* = 0: unlimited */ @@ -888,6 +890,11 @@ static int record__config_text_poke(struct evlist *evlist) return 0; } +static int record__config_off_cpu(struct record *rec) +{ + return off_cpu_prepare(rec->evlist); +} + static bool record__kcore_readable(struct machine *machine) { char kcore[PATH_MAX]; @@ -2591,6 +2598,9 @@ out_free_threads: } else status = err; + if (rec->off_cpu) + rec->bytes_written += off_cpu_write(rec->session); + record__synthesize(rec, true); /* this will be recalculated during process_buildids() */ rec->samples = 0; @@ -3315,6 +3325,7 @@ static struct option __record_options[] = { OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", "write collected trace data into several data files using parallel threads", record__parse_threads), + OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), OPT_END() }; @@ -3734,6 +3745,12 @@ int cmd_record(int argc, const char **argv) set_nobuild('\0', "vmlinux", true); # undef set_nobuild # undef REASON +#endif + +#ifndef HAVE_BPF_SKEL +# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) + set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); +# undef set_nobuild #endif rec->opts.affinity = PERF_AFFINITY_SYS; @@ -3972,6 +3989,14 @@ int cmd_record(int argc, const char **argv) } } + if (rec->off_cpu) { + err = record__config_off_cpu(rec); + if (err) { + pr_err("record__config_off_cpu failed, error %d\n", err); + goto out; + } + } + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9a7209a99e16..a51267d88ca9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c new file mode 100644 index 000000000000..9ed7aca3f4ac --- /dev/null +++ b/tools/perf/util/bpf_off_cpu.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/bpf_counter.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/off_cpu.h" +#include "util/perf-hooks.h" +#include "util/session.h" +#include + +#include "bpf_skel/off_cpu.skel.h" + +#define MAX_STACKS 32 +/* we don't need actual timestamp, just want to put the samples at last */ +#define OFF_CPU_TIMESTAMP (~0ull << 32) + +static struct off_cpu_bpf *skel; + +struct off_cpu_key { + u32 pid; + u32 tgid; + u32 stack_id; + u32 state; +}; + +union off_cpu_data { + struct perf_event_header hdr; + u64 array[1024 / sizeof(u64)]; +}; + +static int off_cpu_config(struct evlist *evlist) +{ + struct evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .size = sizeof(attr), /* to capture ABI version */ + }; + char *evname = strdup(OFFCPU_EVENT); + + if (evname == NULL) + return -ENOMEM; + + evsel = evsel__new(&attr); + if (!evsel) { + free(evname); + return -ENOMEM; + } + + evsel->core.attr.freq = 1; + evsel->core.attr.sample_period = 1; + /* off-cpu analysis depends on stack trace */ + evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; + + evlist__add(evlist, evsel); + + free(evsel->name); + evsel->name = evname; + + return 0; +} + +static void off_cpu_start(void *arg __maybe_unused) +{ + skel->bss->enabled = 1; +} + +static void off_cpu_finish(void *arg __maybe_unused) +{ + skel->bss->enabled = 0; + off_cpu_bpf__destroy(skel); +} + +int off_cpu_prepare(struct evlist *evlist) +{ + int err; + + if (off_cpu_config(evlist) < 0) { + pr_err("Failed to config off-cpu BPF event\n"); + return -1; + } + + set_max_rlimit(); + + skel = off_cpu_bpf__open_and_load(); + if (!skel) { + pr_err("Failed to open off-cpu BPF skeleton\n"); + return -1; + } + + err = off_cpu_bpf__attach(skel); + if (err) { + pr_err("Failed to attach off-cpu BPF skeleton\n"); + goto out; + } + + if (perf_hooks__set_hook("record_start", off_cpu_start, NULL) || + perf_hooks__set_hook("record_end", off_cpu_finish, NULL)) { + pr_err("Failed to attach off-cpu skeleton\n"); + goto out; + } + + return 0; + +out: + off_cpu_bpf__destroy(skel); + return -1; +} + +int off_cpu_write(struct perf_session *session) +{ + int bytes = 0, size; + int fd, stack; + u64 sample_type, val, sid = 0; + struct evsel *evsel; + struct perf_data_file *file = &session->data->file; + struct off_cpu_key prev, key; + union off_cpu_data data = { + .hdr = { + .type = PERF_RECORD_SAMPLE, + .misc = PERF_RECORD_MISC_USER, + }, + }; + u64 tstamp = OFF_CPU_TIMESTAMP; + + skel->bss->enabled = 0; + + evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return 0; + } + + sample_type = evsel->core.attr.sample_type; + + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { + if (evsel->core.id) + sid = evsel->core.id[0]; + } + + fd = bpf_map__fd(skel->maps.off_cpu); + stack = bpf_map__fd(skel->maps.stacks); + memset(&prev, 0, sizeof(prev)); + + while (!bpf_map_get_next_key(fd, &prev, &key)) { + int n = 1; /* start from perf_event_header */ + int ip_pos = -1; + + bpf_map_lookup_elem(fd, &key, &val); + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_IP) { + ip_pos = n; + data.array[n++] = 0; /* will be updated */ + } + if (sample_type & PERF_SAMPLE_TID) + data.array[n++] = (u64)key.pid << 32 | key.tgid; + if (sample_type & PERF_SAMPLE_TIME) + data.array[n++] = tstamp; + if (sample_type & PERF_SAMPLE_ID) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_CPU) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_PERIOD) + data.array[n++] = val; + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int len = 0; + + /* data.array[n] is callchain->nr (updated later) */ + data.array[n + 1] = PERF_CONTEXT_USER; + data.array[n + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); + while (data.array[n + 2 + len]) + len++; + + /* update length of callchain */ + data.array[n] = len + 1; + + /* update sample ip with the first callchain entry */ + if (ip_pos >= 0) + data.array[ip_pos] = data.array[n + 2]; + + /* calculate sample callchain data array length */ + n += len + 2; + } + /* TODO: handle more sample types */ + + size = n * sizeof(u64); + data.hdr.size = size; + bytes += size; + + if (perf_data_file__write(file, &data, size) < 0) { + pr_err("failed to write perf data, error: %m\n"); + return bytes; + } + + prev = key; + /* increase dummy timestamp to sort later samples */ + tstamp++; + } + return bytes; +} diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c new file mode 100644 index 000000000000..5173ed882fdf --- /dev/null +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2022 Google +#include "vmlinux.h" +#include +#include +#include + +/* task->flags for off-cpu analysis */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + +/* task->state for off-cpu analysis */ +#define TASK_INTERRUPTIBLE 0x0001 +#define TASK_UNINTERRUPTIBLE 0x0002 + +#define MAX_STACKS 32 +#define MAX_ENTRIES 102400 + +struct tstamp_data { + __u32 stack_id; + __u32 state; + __u64 timestamp; +}; + +struct offcpu_key { + __u32 pid; + __u32 tgid; + __u32 stack_id; + __u32 state; +}; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(__u32)); + __uint(value_size, MAX_STACKS * sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} stacks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tstamp_data); +} tstamp SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct offcpu_key)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} off_cpu SEC(".maps"); + +/* old kernel task_struct definition */ +struct task_struct___old { + long state; +} __attribute__((preserve_access_index)); + +int enabled = 0; + +/* + * Old kernel used to call it task_struct->state and now it's '__state'. + * Use BPF CO-RE "ignored suffix rule" to deal with it like below: + * + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes + */ +static inline int get_task_state(struct task_struct *t) +{ + if (bpf_core_field_exists(t->__state)) + return BPF_CORE_READ(t, __state); + + /* recast pointer to capture task_struct___old type for compiler */ + struct task_struct___old *t_old = (void *)t; + + /* now use old "state" name of the field */ + return BPF_CORE_READ(t_old, state); +} + +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + __u64 ts; + int state; + __u32 stack_id; + struct task_struct *prev, *next; + struct tstamp_data *pelem; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + state = get_task_state(prev); + + ts = bpf_ktime_get_ns(); + + if (prev->flags & PF_KTHREAD) + goto next; + if (state != TASK_INTERRUPTIBLE && + state != TASK_UNINTERRUPTIBLE) + goto next; + + stack_id = bpf_get_stackid(ctx, &stacks, + BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK); + + pelem = bpf_task_storage_get(&tstamp, prev, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!pelem) + goto next; + + pelem->timestamp = ts; + pelem->state = state; + pelem->stack_id = stack_id; + +next: + pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); + + if (pelem && pelem->timestamp) { + struct offcpu_key key = { + .pid = next->pid, + .tgid = next->tgid, + .stack_id = pelem->stack_id, + .state = pelem->state, + }; + __u64 delta = ts - pelem->timestamp; + __u64 *total; + + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + + /* prevent to reuse the timestamp later */ + pelem->timestamp = 0; + } + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h new file mode 100644 index 000000000000..375d03c424ea --- /dev/null +++ b/tools/perf/util/off_cpu.h @@ -0,0 +1,24 @@ +#ifndef PERF_UTIL_OFF_CPU_H +#define PERF_UTIL_OFF_CPU_H + +struct evlist; +struct perf_session; + +#define OFFCPU_EVENT "offcpu-time" + +#ifdef HAVE_BPF_SKEL +int off_cpu_prepare(struct evlist *evlist); +int off_cpu_write(struct perf_session *session); +#else +static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused) +{ + return -1; +} + +static inline int off_cpu_write(struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + +#endif /* PERF_UTIL_OFF_CPU_H */ -- cgit v1.2.3 From 10742d0c0771d9fb0329d03bb7c7620c8738f065 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:22 -0700 Subject: perf record: Implement basic filtering for off-cpu It should honor cpu and task filtering with -a, -C or -p, -t options. Committer testing: # perf record --off-cpu --cpu 1 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.722 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.446 MB perf.data (7248 samples) ] # # perf script | head -20 perf 97164 [001] 38287.696761: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696764: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696765: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696767: 212 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696768: 5130 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696770: 123063 cycles: ffffffffb6e0011e syscall_return_via_sysret+0x38 (vmlinux) perf 97164 [001] 38287.696803: 2292748 cycles: ffffffffb636c82d __fput+0xad (vmlinux) swapper 0 [001] 38287.702852: 1927474 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97513 97513 [001] 38287.767207: 1172536 cycles: ffffffffb612ff65 newidle_balance+0x5 (vmlinux) swapper 0 [001] 38287.769567: 1073081 cycles: ffffffffb618216d ktime_get_mono_fast_ns+0xd (vmlinux) :97533 97533 [001] 38287.770962: 984460 cycles: ffffffffb65b2900 selinux_socket_sendmsg+0x0 (vmlinux) :97540 97540 [001] 38287.772242: 883462 cycles: ffffffffb6d0bf59 irqentry_exit_to_user_mode+0x9 (vmlinux) swapper 0 [001] 38287.773633: 741963 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97552 97552 [001] 38287.774539: 606680 cycles: ffffffffb62eda0a page_add_file_rmap+0x7a (vmlinux) :97556 97556 [001] 38287.775333: 502254 cycles: ffffffffb634f964 get_obj_cgroup_from_current+0xc4 (vmlinux) :97561 97561 [001] 38287.776163: 427891 cycles: ffffffffb61b1522 cgroup_rstat_updated+0x22 (vmlinux) swapper 0 [001] 38287.776854: 359030 cycles: ffffffffb612fc5e load_balance+0x9ce (vmlinux) :97567 97567 [001] 38287.777312: 330371 cycles: ffffffffb6a8d8d0 skb_set_owner_w+0x0 (vmlinux) :97566 97566 [001] 38287.777589: 311622 cycles: ffffffffb614a7a8 native_queued_spin_lock_slowpath+0x148 (vmlinux) :97512 97512 [001] 38287.777671: 307851 cycles: ffffffffb62e0f35 find_vma+0x55 (vmlinux) # # perf record --off-cpu --cpu 4 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.613 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.415 MB perf.data (6729 samples) ] # perf script | head -20 perf 97650 [004] 38323.728036: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728040: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728041: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728042: 208 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728044: 5026 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728046: 119970 cycles: ffffffffb6d0bebc syscall_exit_to_user_mode+0x1c (vmlinux) perf 97650 [004] 38323.728078: 2190103 cycles: 54b756 perf_tool__process_synth_event+0x16 (/home/acme/bin/perf) swapper 0 [004] 38323.783357: 1593139 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.785352: 1593139 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.797330: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.802350: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.806333: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97996 97996 [004] 38323.807145: 1418936 cycles: 7f5db9be6917 [unknown] ([unknown]) :97959 97959 [004] 38323.807730: 1445074 cycles: ffffffffb6329d36 memcg_slab_post_alloc_hook+0x146 (vmlinux) :97959 97959 [004] 38323.808103: 1341584 cycles: ffffffffb62fd90f get_page_from_freelist+0x112f (vmlinux) :97959 97959 [004] 38323.808451: 1227537 cycles: ffffffffb65b2905 selinux_socket_sendmsg+0x5 (vmlinux) :97959 97959 [004] 38323.808768: 1184321 cycles: ffffffffb6d1ba35 _raw_spin_lock_irqsave+0x15 (vmlinux) :97959 97959 [004] 38323.809073: 1153017 cycles: ffffffffb6a8d92d skb_set_owner_w+0x5d (vmlinux) :97959 97959 [004] 38323.809402: 1126875 cycles: ffffffffb6329c64 memcg_slab_post_alloc_hook+0x74 (vmlinux) :97959 97959 [004] 38323.809695: 1073248 cycles: ffffffffb6e0001d entry_SYSCALL_64+0x1d (vmlinux) # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/bpf_off_cpu.c | 78 ++++++++++++++++++++++++++++++---- tools/perf/util/bpf_skel/off_cpu.bpf.c | 52 +++++++++++++++++++++-- tools/perf/util/off_cpu.h | 6 ++- 4 files changed, 123 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b76f57e3ec73..96014387f553 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -892,7 +892,7 @@ static int record__config_text_poke(struct evlist *evlist) static int record__config_off_cpu(struct record *rec) { - return off_cpu_prepare(rec->evlist); + return off_cpu_prepare(rec->evlist, &rec->opts.target); } static bool record__kcore_readable(struct machine *machine) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 9ed7aca3f4ac..b5e2d038da50 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -6,6 +6,9 @@ #include "util/off_cpu.h" #include "util/perf-hooks.h" #include "util/session.h" +#include "util/target.h" +#include "util/cpumap.h" +#include "util/thread_map.h" #include #include "bpf_skel/off_cpu.skel.h" @@ -60,8 +63,23 @@ static int off_cpu_config(struct evlist *evlist) return 0; } -static void off_cpu_start(void *arg __maybe_unused) +static void off_cpu_start(void *arg) { + struct evlist *evlist = arg; + + /* update task filter for the given workload */ + if (!skel->bss->has_cpu && !skel->bss->has_task && + perf_thread_map__pid(evlist->core.threads, 0) != -1) { + int fd; + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + pid = perf_thread_map__pid(evlist->core.threads, 0); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + skel->bss->enabled = 1; } @@ -71,31 +89,75 @@ static void off_cpu_finish(void *arg __maybe_unused) off_cpu_bpf__destroy(skel); } -int off_cpu_prepare(struct evlist *evlist) +int off_cpu_prepare(struct evlist *evlist, struct target *target) { - int err; + int err, fd, i; + int ncpus = 1, ntasks = 1; if (off_cpu_config(evlist) < 0) { pr_err("Failed to config off-cpu BPF event\n"); return -1; } - set_max_rlimit(); - - skel = off_cpu_bpf__open_and_load(); + skel = off_cpu_bpf__open(); if (!skel) { pr_err("Failed to open off-cpu BPF skeleton\n"); return -1; } + /* don't need to set cpu filter for system-wide mode */ + if (target->cpu_list) { + ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(target)) { + ntasks = perf_thread_map__nr(evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + set_max_rlimit(); + + err = off_cpu_bpf__load(skel); + if (err) { + pr_err("Failed to load off-cpu skeleton\n"); + goto out; + } + + if (target->cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); goto out; } - if (perf_hooks__set_hook("record_start", off_cpu_start, NULL) || - perf_hooks__set_hook("record_end", off_cpu_finish, NULL)) { + if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) || + perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) { pr_err("Failed to attach off-cpu skeleton\n"); goto out; } diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 5173ed882fdf..78cdcc8ff863 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -49,12 +49,28 @@ struct { __uint(max_entries, MAX_ENTRIES); } off_cpu SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + /* old kernel task_struct definition */ struct task_struct___old { long state; } __attribute__((preserve_access_index)); int enabled = 0; +int has_cpu = 0; +int has_task = 0; /* * Old kernel used to call it task_struct->state and now it's '__state'. @@ -74,6 +90,37 @@ static inline int get_task_state(struct task_struct *t) return BPF_CORE_READ(t_old, state); } +static inline int can_record(struct task_struct *t, int state) +{ + /* kernel threads don't have user stack */ + if (t->flags & PF_KTHREAD) + return 0; + + if (state != TASK_INTERRUPTIBLE && + state != TASK_UNINTERRUPTIBLE) + return 0; + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u8 *ok; + __u32 pid = t->pid; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + return 1; +} + SEC("tp_btf/sched_switch") int on_switch(u64 *ctx) { @@ -92,10 +139,7 @@ int on_switch(u64 *ctx) ts = bpf_ktime_get_ns(); - if (prev->flags & PF_KTHREAD) - goto next; - if (state != TASK_INTERRUPTIBLE && - state != TASK_UNINTERRUPTIBLE) + if (!can_record(prev, state)) goto next; stack_id = bpf_get_stackid(ctx, &stacks, diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 375d03c424ea..f47af0232e55 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -2,15 +2,17 @@ #define PERF_UTIL_OFF_CPU_H struct evlist; +struct target; struct perf_session; #define OFFCPU_EVENT "offcpu-time" #ifdef HAVE_BPF_SKEL -int off_cpu_prepare(struct evlist *evlist); +int off_cpu_prepare(struct evlist *evlist, struct target *target); int off_cpu_write(struct perf_session *session); #else -static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused) +static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, + struct target *target __maybe_unused) { return -1; } -- cgit v1.2.3 From b36888f71c8542cd49ecaf29cd1ba874c733b5fe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:23 -0700 Subject: perf record: Handle argument change in sched_switch Recently sched_switch tracepoint added a new argument for prev_state, but it's hard to handle the change in a BPF program. Instead, we can check the function prototype in BTF before loading the program. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_off_cpu.c | 28 +++++++++++++++++++++++++++ tools/perf/util/bpf_skel/off_cpu.bpf.c | 35 +++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index b5e2d038da50..874856c55101 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -89,6 +89,33 @@ static void off_cpu_finish(void *arg __maybe_unused) off_cpu_bpf__destroy(skel); } +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 4th arg */ + skel->rodata->has_prev_state = true; + } +} + int off_cpu_prepare(struct evlist *evlist, struct target *target) { int err, fd, i; @@ -117,6 +144,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) } set_max_rlimit(); + check_sched_switch_args(); err = off_cpu_bpf__load(skel); if (err) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 78cdcc8ff863..986d7db6e75d 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -72,6 +72,8 @@ int enabled = 0; int has_cpu = 0; int has_task = 0; +const volatile bool has_prev_state = false; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state) return 1; } -SEC("tp_btf/sched_switch") -int on_switch(u64 *ctx) +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) { __u64 ts; - int state; __u32 stack_id; - struct task_struct *prev, *next; struct tstamp_data *pelem; - if (!enabled) - return 0; - - prev = (struct task_struct *)ctx[1]; - next = (struct task_struct *)ctx[2]; - state = get_task_state(prev); - ts = bpf_ktime_get_ns(); if (!can_record(prev, state)) @@ -180,4 +173,24 @@ next: return 0; } +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (has_prev_state) + prev_state = (int)ctx[3]; + else + prev_state = get_task_state(prev); + + return off_cpu_stat(ctx, prev, next, prev_state); +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; -- cgit v1.2.3 From 685439a7a037d8677e3d1acf0302624002ee6a6d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:24 -0700 Subject: perf record: Add cgroup support for off-cpu profiling This covers two different use cases. The first one is cgroup filtering given by -G/--cgroup option which controls the off-cpu profiling for tasks in the given cgroups only. The other use case is cgroup sampling which is enabled by --all-cgroups option and it adds PERF_SAMPLE_CGROUP to the sample_type to set the cgroup id of the task in the sample data. Example output. $ sudo perf record -a --off-cpu --all-cgroups sleep 1 $ sudo perf report --stdio -s comm,cgroup --call-graph=no ... # Samples: 144 of event 'offcpu-time' # Event count (approx.): 48452045427 # # Children Self Command Cgroup # ........ ........ ............... .......................................... # 61.57% 5.60% Chrome_ChildIOT /user.slice/user-657345.slice/user@657345.service/app.slice/... 29.51% 7.38% Web Content /user.slice/user-657345.slice/user@657345.service/app.slice/... 17.48% 1.59% Chrome_IOThread /user.slice/user-657345.slice/user@657345.service/app.slice/... 16.48% 4.12% pipewire-pulse /user.slice/user-657345.slice/user@657345.service/session.slice/... 14.48% 2.07% perf /user.slice/user-657345.slice/user@657345.service/app.slice/... 14.30% 7.15% CompositorTileW /user.slice/user-657345.slice/user@657345.service/app.slice/... 13.33% 6.67% Timer /user.slice/user-657345.slice/user@657345.service/app.slice/... ... Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/bpf_off_cpu.c | 48 ++++++++++++++++++++++++++++++++-- tools/perf/util/bpf_skel/off_cpu.bpf.c | 33 +++++++++++++++++++++++ tools/perf/util/off_cpu.h | 7 +++-- 4 files changed, 85 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 96014387f553..9a71f0330137 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -892,7 +892,7 @@ static int record__config_text_poke(struct evlist *evlist) static int record__config_off_cpu(struct record *rec) { - return off_cpu_prepare(rec->evlist, &rec->opts.target); + return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); } static bool record__kcore_readable(struct machine *machine) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 874856c55101..b73e84a02264 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -5,10 +5,12 @@ #include "util/evlist.h" #include "util/off_cpu.h" #include "util/perf-hooks.h" +#include "util/record.h" #include "util/session.h" #include "util/target.h" #include "util/cpumap.h" #include "util/thread_map.h" +#include "util/cgroup.h" #include #include "bpf_skel/off_cpu.skel.h" @@ -24,6 +26,7 @@ struct off_cpu_key { u32 tgid; u32 stack_id; u32 state; + u64 cgroup_id; }; union off_cpu_data { @@ -116,10 +119,11 @@ static void check_sched_switch_args(void) } } -int off_cpu_prepare(struct evlist *evlist, struct target *target) +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts) { int err, fd, i; - int ncpus = 1, ntasks = 1; + int ncpus = 1, ntasks = 1, ncgrps = 1; if (off_cpu_config(evlist) < 0) { pr_err("Failed to config off-cpu BPF event\n"); @@ -143,6 +147,21 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) bpf_map__set_max_entries(skel->maps.task_filter, ntasks); } + if (evlist__first(evlist)->cgrp) { + ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */ + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + if (opts->record_cgroup) { + skel->rodata->needs_cgroup = true; + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + set_max_rlimit(); check_sched_switch_args(); @@ -178,6 +197,29 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) } } + if (evlist__first(evlist)->cgrp) { + struct evsel *evsel; + u8 val = 1; + + skel->bss->has_cgroup = 1; + fd = bpf_map__fd(skel->maps.cgroup_filter); + + evlist__for_each_entry(evlist, evsel) { + struct cgroup *cgrp = evsel->cgrp; + + if (cgrp == NULL) + continue; + + if (!cgrp->id && read_cgroup_id(cgrp) < 0) { + pr_err("Failed to read cgroup id of %s\n", + cgrp->name); + goto out; + } + + bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY); + } + } + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); @@ -275,6 +317,8 @@ int off_cpu_write(struct perf_session *session) /* calculate sample callchain data array length */ n += len + 2; } + if (sample_type & PERF_SAMPLE_CGROUP) + data.array[n++] = key.cgroup_id; /* TODO: handle more sample types */ size = n * sizeof(u64); diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index 986d7db6e75d..792ae2847080 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -26,6 +26,7 @@ struct offcpu_key { __u32 tgid; __u32 stack_id; __u32 state; + __u64 cgroup_id; }; struct { @@ -63,6 +64,13 @@ struct { __uint(max_entries, 1); } task_filter SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cgroup_filter SEC(".maps"); + /* old kernel task_struct definition */ struct task_struct___old { long state; @@ -71,8 +79,11 @@ struct task_struct___old { int enabled = 0; int has_cpu = 0; int has_task = 0; +int has_cgroup = 0; const volatile bool has_prev_state = false; +const volatile bool needs_cgroup = false; +const volatile bool uses_cgroup_v1 = false; /* * Old kernel used to call it task_struct->state and now it's '__state'. @@ -92,6 +103,18 @@ static inline int get_task_state(struct task_struct *t) return BPF_CORE_READ(t_old, state); } +static inline __u64 get_cgroup_id(struct task_struct *t) +{ + struct cgroup *cgrp; + + if (uses_cgroup_v1) + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); + else + cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + + return BPF_CORE_READ(cgrp, kn, id); +} + static inline int can_record(struct task_struct *t, int state) { /* kernel threads don't have user stack */ @@ -120,6 +143,15 @@ static inline int can_record(struct task_struct *t, int state) return 0; } + if (has_cgroup) { + __u8 *ok; + __u64 cgrp_id = get_cgroup_id(t); + + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id); + if (!ok) + return 0; + } + return 1; } @@ -156,6 +188,7 @@ next: .tgid = next->tgid, .stack_id = pelem->stack_id, .state = pelem->state, + .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0, }; __u64 delta = ts - pelem->timestamp; __u64 *total; diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index f47af0232e55..548008f74d42 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -4,15 +4,18 @@ struct evlist; struct target; struct perf_session; +struct record_opts; #define OFFCPU_EVENT "offcpu-time" #ifdef HAVE_BPF_SKEL -int off_cpu_prepare(struct evlist *evlist, struct target *target); +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts); int off_cpu_write(struct perf_session *session); #else static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, - struct target *target __maybe_unused) + struct target *target __maybe_unused, + struct record_opts *opts __maybe_unused) { return -1; } -- cgit v1.2.3 From 831d06c8d1b37b722d110579d52b1c661e618302 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 18 May 2022 15:47:25 -0700 Subject: perf test: Add a basic offcpu profiling test $ sudo ./perf test -v offcpu 88: perf record offcpu profiling tests : --- start --- test child forked, pid 685966 Basic off-cpu test Basic off-cpu test [Success] test child finished with 0 ---- end ---- perf record offcpu profiling tests: Ok Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Andi Kleen Cc: Blake Jones Cc: Hao Luo Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record_offcpu.sh | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 tools/perf/tests/shell/record_offcpu.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh new file mode 100755 index 000000000000..96e0739f7478 --- /dev/null +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# perf record offcpu profiling tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) + +cleanup() { + rm -f ${perfdata} + rm -f ${perfdata}.old + trap - exit term int +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup exit term int + +test_offcpu() { + echo "Basic off-cpu test" + if [ `id -u` != 0 ] + then + echo "Basic off-cpu test [Skipped permission]" + err=2 + return + fi + if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL + then + echo "Basic off-cpu test [Skipped missing BPF support]" + err=2 + return + fi + if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null + then + echo "Basic off-cpu test [Failed record]" + err=1 + return + fi + if ! perf evlist -i ${perfdata} | grep -q "offcpu-time" + then + echo "Basic off-cpu test [Failed record]" + err=1 + return + fi + if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep + then + echo "Basic off-cpu test [Failed missing output]" + err=1 + return + fi + echo "Basic off-cpu test [Success]" +} + +test_offcpu + +cleanup +exit $err -- cgit v1.2.3 From c4040212bc97d16040712a410335f93bc94d2262 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 26 May 2022 22:54:00 +0800 Subject: perf c2c: Use stdio interface if slang is not supported If the slang lib is not installed on the system, perf c2c tool disables TUI mode and roll back to use stdio mode; but the flag 'c2c.use_stdio' is missed to set true and thus it wrongly applies UI quirks in the function ui_quirks(). This commit forces to use stdio interface if slang is not supported, and it can avoid to apply the UI quirks and show the correct metric header. Before: ================================================= Shared Cache Line Distribution Pareto ================================================= ------------------------------------------------------------------------------- 0 0 0 99 0 0 0 0xaaaac17d6000 ------------------------------------------------------------------------------- 0.00% 0.00% 6.06% 0.00% 0.00% 0.00% 0x20 N/A 0 0xaaaac17c25ac 0 0 43 375 18469 2 [.] 0x00000000000025ac memstress memstress[25ac] 0 0.00% 0.00% 93.94% 0.00% 0.00% 0.00% 0x29 N/A 0 0xaaaac17c3e88 0 0 173 180 135 2 [.] 0x0000000000003e88 memstress memstress[3e88] 0 After: ================================================= Shared Cache Line Distribution Pareto ================================================= ------------------------------------------------------------------------------- 0 0 0 99 0 0 0 0xaaaac17d6000 ------------------------------------------------------------------------------- 0.00% 0.00% 6.06% 0.00% 0.00% 0.00% 0x20 N/A 0 0xaaaac17c25ac 0 0 43 375 18469 2 [.] 0x00000000000025ac memstress memstress[25ac] 0 0.00% 0.00% 93.94% 0.00% 0.00% 0.00% 0x29 N/A 0 0xaaaac17c3e88 0 0 173 180 135 2 [.] 0x0000000000003e88 memstress memstress[3e88] 0 Fixes: 5a1a99cd2e4e1557 ("perf c2c report: Add main TUI browser") Reported-by: Joe Mario Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20220526145400.611249-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c8230c48125f..80b525c065ed 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2801,9 +2801,7 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), -#ifdef HAVE_SLANG_SUPPORT OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), -#endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, @@ -2833,6 +2831,10 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, options); +#ifndef HAVE_SLANG_SUPPORT + c2c.use_stdio = true; +#endif + if (c2c.stats_only) c2c.use_stdio = true; -- cgit v1.2.3 From 8803880f7d1cf85cbc110e47400165a6b85e13df Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 25 May 2022 16:41:10 +0100 Subject: perf unwind arm64: Use perf's copy of kernel headers Fix this include path to use perf's copy of the kernel header rather than the one from the root of the repo. This fixes build errors when only applying the perf tools part of a patchset rather than both sides. Reported-by: German Gomez Signed-off-by: James Clark Tested-by: German Gomez Cc: Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220525154114.718321-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/libunwind/arm64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 15f60fd09424..014d82159656 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -24,7 +24,7 @@ #include "unwind.h" #include "libunwind-aarch64.h" #define perf_event_arm_regs perf_event_arm64_regs -#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include <../../../arch/arm64/include/uapi/asm/perf_regs.h> #undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" -- cgit v1.2.3 From f450f11b2d3f48e7eb0c9ca34ee5c873521e0b7b Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 25 May 2022 16:41:11 +0100 Subject: perf tools arm64: Copy perf_regs.h from the kernel Get the updated header for the newly added VG register. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Cc: Alexander Shishkin Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220525154114.718321-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/perf_regs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h index d54daafa89e3..fd157f46727e 100644 --- a/tools/arch/arm64/include/uapi/asm/perf_regs.h +++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h @@ -36,6 +36,11 @@ enum perf_event_arm_regs { PERF_REG_ARM64_LR, PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, - PERF_REG_ARM64_MAX, + + /* Extended/pseudo registers */ + PERF_REG_ARM64_VG = 46, // SVE Vector Granule + + PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1, + PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1 }; #endif /* _ASM_ARM64_PERF_REGS_H */ -- cgit v1.2.3 From 721052048bba2c8df1928d013963e12eca84f58c Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 25 May 2022 16:41:12 +0100 Subject: perf unwind: Use dynamic register set for DWARF unwind Architectures can detect availability of extra registers at runtime so use this more complete set for unwinding. This will include the VG register on arm64 in a later commit. If the function isn't implemented then PERF_REGS_MASK is returned and there is no change. Committer notes: Added util/perf_regs.c to tools/perf/util/python-ext-sources so that 'perf test python' passes, i.e. the perf python binding has all the symbols it needs, addressing: $ perf test -v python 19: 'import perf' in python : --- start --- test child forked, pid 2037817 python usage test: "echo "import sys ; sys.path.append('/tmp/build/perf/python'); import perf" | '/usr/bin/python3' " Traceback (most recent call last): File "", line 1, in ImportError: /tmp/build/perf/python/perf.cpython-310-x86_64-linux-gnu.so: undefined symbol: arch__user_reg_mask test child finished with -1 ---- end ---- 'import perf' in python: FAILED! $ Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Brown Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220525154114.718321-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- tools/perf/util/python-ext-sources | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a0d5753e363e..ce499c5da8d7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -897,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o "specifying a subset with --user-regs may render DWARF unwinding unreliable, " "so the minimal registers set (IP, SP) is explicitly forced.\n"); } else { - attr->sample_regs_user |= PERF_REGS_MASK; + attr->sample_regs_user |= arch__user_reg_mask(); } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a685d20165f7..aa5156c2bcff 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -38,5 +38,6 @@ util/units.c util/affinity.c util/rwsem.c util/hashmap.c +util/perf_regs.c util/pmu-hybrid.c util/fncache.c -- cgit v1.2.3 From d511578b9d215e2ff27e10c1b9d5d414383018dc Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 25 May 2022 16:41:13 +0100 Subject: perf unwind arm64: Decouple Libunwind register names from Perf DWARF register numbers and real register numbers on aarch64 are equivalent. Remove the references to the register names from Libunwind so that new registers are supported without having to add build time feature checks for each new register. The unwinder won't ask for a register that it doesn't know about and Perf will already report an error for an unknown or unrecorded register in the perf_reg_value() function so extra validation isn't needed. After this change the new VG register can be read by libunwind. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Brown Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220525154114.718321-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/unwind-libunwind.c | 73 +-------------------------- 1 file changed, 2 insertions(+), 71 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 5aecf88e3de6..871af5992298 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -10,77 +10,8 @@ int LIBUNWIND__ARCH_REG_ID(int regnum) { - switch (regnum) { - case UNW_AARCH64_X0: - return PERF_REG_ARM64_X0; - case UNW_AARCH64_X1: - return PERF_REG_ARM64_X1; - case UNW_AARCH64_X2: - return PERF_REG_ARM64_X2; - case UNW_AARCH64_X3: - return PERF_REG_ARM64_X3; - case UNW_AARCH64_X4: - return PERF_REG_ARM64_X4; - case UNW_AARCH64_X5: - return PERF_REG_ARM64_X5; - case UNW_AARCH64_X6: - return PERF_REG_ARM64_X6; - case UNW_AARCH64_X7: - return PERF_REG_ARM64_X7; - case UNW_AARCH64_X8: - return PERF_REG_ARM64_X8; - case UNW_AARCH64_X9: - return PERF_REG_ARM64_X9; - case UNW_AARCH64_X10: - return PERF_REG_ARM64_X10; - case UNW_AARCH64_X11: - return PERF_REG_ARM64_X11; - case UNW_AARCH64_X12: - return PERF_REG_ARM64_X12; - case UNW_AARCH64_X13: - return PERF_REG_ARM64_X13; - case UNW_AARCH64_X14: - return PERF_REG_ARM64_X14; - case UNW_AARCH64_X15: - return PERF_REG_ARM64_X15; - case UNW_AARCH64_X16: - return PERF_REG_ARM64_X16; - case UNW_AARCH64_X17: - return PERF_REG_ARM64_X17; - case UNW_AARCH64_X18: - return PERF_REG_ARM64_X18; - case UNW_AARCH64_X19: - return PERF_REG_ARM64_X19; - case UNW_AARCH64_X20: - return PERF_REG_ARM64_X20; - case UNW_AARCH64_X21: - return PERF_REG_ARM64_X21; - case UNW_AARCH64_X22: - return PERF_REG_ARM64_X22; - case UNW_AARCH64_X23: - return PERF_REG_ARM64_X23; - case UNW_AARCH64_X24: - return PERF_REG_ARM64_X24; - case UNW_AARCH64_X25: - return PERF_REG_ARM64_X25; - case UNW_AARCH64_X26: - return PERF_REG_ARM64_X26; - case UNW_AARCH64_X27: - return PERF_REG_ARM64_X27; - case UNW_AARCH64_X28: - return PERF_REG_ARM64_X28; - case UNW_AARCH64_X29: - return PERF_REG_ARM64_X29; - case UNW_AARCH64_X30: - return PERF_REG_ARM64_LR; - case UNW_AARCH64_SP: - return PERF_REG_ARM64_SP; - case UNW_AARCH64_PC: - return PERF_REG_ARM64_PC; - default: - pr_err("unwind: invalid reg id %d\n", regnum); + if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX) return -EINVAL; - } - return -EINVAL; + return regnum; } -- cgit v1.2.3 From 2be00431c576f7fc2299673301134b0d190699a9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 25 May 2022 16:41:14 +0100 Subject: perf tools arm64: Add support for VG register Add the name of the VG register so it can be used in --user-regs The event will fail to open if the register is requested but not available so only add it to the mask if the kernel supports sve and also if it supports that specific register. Committer notes: Add conditional definition of HWCAP_SVE, as suggested by Leo Yan, to build on older systems where this is not available in the system headers. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Brown Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220525154114.718321-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/perf_regs.c | 38 ++++++++++++++++++++++++++++++++++ tools/perf/util/perf_regs.c | 2 ++ 2 files changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 476b037eea1c..006692c9b040 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -2,13 +2,19 @@ #include #include #include +#include #include #include +#include "../../../perf-sys.h" #include "../../../util/debug.h" #include "../../../util/event.h" #include "../../../util/perf_regs.h" +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + const struct sample_reg sample_reg_masks[] = { SMPL_REG(x0, PERF_REG_ARM64_X0), SMPL_REG(x1, PERF_REG_ARM64_X1), @@ -43,6 +49,7 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(lr, PERF_REG_ARM64_LR), SMPL_REG(sp, PERF_REG_ARM64_SP), SMPL_REG(pc, PERF_REG_ARM64_PC), + SMPL_REG(vg, PERF_REG_ARM64_VG), SMPL_REG_END }; @@ -131,3 +138,34 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) return SDT_ARG_VALID; } + +uint64_t arch__user_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_USER, + .disabled = 1, + .exclude_kernel = 1, + .sample_period = 1, + .sample_regs_user = PERF_REGS_MASK + }; + int fd; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) + attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG); + + /* + * Check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + if (attr.sample_regs_user != PERF_REGS_MASK) { + event_attr_init(&attr); + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return attr.sample_regs_user; + } + } + return PERF_REGS_MASK; +} diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a982e40ee5a9..872dd3d38782 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id) return "lr"; case PERF_REG_ARM64_PC: return "pc"; + case PERF_REG_ARM64_VG: + return "vg"; default: return NULL; } -- cgit v1.2.3 From f4df0dbbe62ee8e4405a57b27ccd54393971c773 Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Wed, 25 May 2022 22:04:10 +0800 Subject: perf jevents: Fix event syntax error caused by ExtSel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the origin code, when "ExtSel" is 1, the eventcode will change to "eventcode |= 1 << 21”. For event “UNC_Q_RxL_CREDITS_CONSUMED_VN0.DRS", its "ExtSel" is "1", its eventcode will change from 0x1E to 0x20001E, but in fact the eventcode should <=0x1FF, so this will cause the parse fail: # perf stat -e "UNC_Q_RxL_CREDITS_CONSUMED_VN0.DRS" -a sleep 0.1 event syntax error: '.._RxL_CREDITS_CONSUMED_VN0.DRS' \___ value too big for format, maximum is 511 On the perf kernel side, the kernel assumes the valid bits are continuous. It will adjust the 0x100 (bit 8 for perf tool) to bit 21 in HW. DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); So the perf tool follows the kernel side and just set bit8 other than bit21. Fixes: fedb2b518239cbc0 ("perf jevents: Add support for parsing uncore json files") Reviewed-by: Kan Liang Signed-off-by: Xing Zhengjun Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220525140410.1706851-1-zhengjun.xing@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index cee61c4ed59e..e597e4bac90f 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -605,7 +605,7 @@ static int json_events(const char *fn, } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0) << 21; + eventcode |= strtoul(code, NULL, 0) << 8; free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &je.name, "", "", val); -- cgit v1.2.3 From c4f462235c0f61a0eff2ca0f965a3fdceb80347d Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sat, 21 May 2022 21:04:45 +0800 Subject: perf scripting python: Expose dso and map information This change adds dso build_id and corresponding map's start and end address. The info of dso build_id can be used to find dso file path, and we can validate if a branch address falls into the range of map's start and end addresses. In addition, the map's start address can be used as an offset for disassembly. Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Al Grant Cc: Alexander Shishkin Cc: Eelco Chaudron Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephen Brennan Cc: Tanmay Jagdale Cc: coresight@lists.linaro.org Cc: zengshun . wu Link: https://lore.kernel.org/r/20220521130446.4163597-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/scripting-engines/trace-event-python.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 659eb4e4b34b..adba01b7d9dd 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -755,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict, } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, - const char *dso_field, const char *sym_field, - const char *symoff_field) + const char *dso_field, const char *dso_bid_field, + const char *dso_map_start, const char *dso_map_end, + const char *sym_field, const char *symoff_field) { + char sbuild_id[SBUILD_ID_SIZE]; + if (al->map) { pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(al->map->dso->name)); + build_id__sprintf(&al->map->dso->bid, sbuild_id); + pydict_set_item_string_decref(dict, dso_bid_field, + _PyUnicode_FromString(sbuild_id)); + pydict_set_item_string_decref(dict, dso_map_start, + PyLong_FromUnsignedLong(al->map->start)); + pydict_set_item_string_decref(dict, dso_map_end, + PyLong_FromUnsignedLong(al->map->end)); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -840,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); - set_sym_in_dict(dict, al, "dso", "symbol", "symoff"); + set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", + "symbol", "symoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -856,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (addr_al) { pydict_set_item_string_decref(dict_sample, "addr_correlates_sym", PyBool_FromLong(1)); - set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff"); + set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", + "addr_dso_map_start", "addr_dso_map_end", + "addr_symbol", "addr_symoff"); } if (sample->flags) -- cgit v1.2.3 From 12fdd6c009da0d029ae54cff67242be02ea42a7a Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sat, 21 May 2022 21:04:46 +0800 Subject: perf scripts python: Support Arm CoreSight trace data disassembly This commit adds python script to parse CoreSight tracing event and print out source line and disassembly, it generates readable program execution flow for easier humans inspecting. The script receives CoreSight tracing packet with below format: +------------+------------+------------+ packet(n): | addr | ip | cpu | +------------+------------+------------+ packet(n+1): | addr | ip | cpu | +------------+------------+------------+ packet::addr presents the start address of the coming branch sample, and packet::ip is the last address of the branch smple. Therefore, a code section between branches starts from packet(n)::addr and it stops at packet(n+1)::ip. As results we combines the two continuous packets to generate the address range for instructions: [ sample(n)::addr .. sample(n+1)::ip ] The script supports both objdump or llvm-objdump for disassembly with specifying option '-d'. If doesn't specify option '-d', the script simply outputs source lines and symbols. Below shows usages with llvm-objdump or objdump to output disassembly. # perf script -s scripts/python/arm-cs-trace-disasm.py -- -d llvm-objdump-11 -k ./vmlinux ARM CoreSight Trace Data Assembler Dump ffff800008eb3198 : ffff800008eb3310: c0 38 00 35 cbnz w0, 0xffff800008eb3a28 ffff800008eb3314: 9f 3f 03 d5 dsb sy ffff800008eb3318: df 3f 03 d5 isb ffff800008eb331c: f5 5b 42 a9 ldp x21, x22, [sp, #32] ffff800008eb3320: fb 73 45 a9 ldp x27, x28, [sp, #80] ffff800008eb3324: e0 82 40 39 ldrb w0, [x23, #32] ffff800008eb3328: 60 00 00 34 cbz w0, 0xffff800008eb3334 ffff800008eb332c: e0 03 19 aa mov x0, x25 ffff800008eb3330: 8c fe ff 97 bl 0xffff800008eb2d60 main 6728/6728 [0004] 0.000000000 etm4_enable_hw+0x198 [kernel.kallsyms] ffff800008eb2d60 : ffff800008eb2d60: 1f 20 03 d5 nop ffff800008eb2d64: 1f 20 03 d5 nop ffff800008eb2d68: 3f 23 03 d5 hint #25 ffff800008eb2d6c: 00 00 40 f9 ldr x0, [x0] ffff800008eb2d70: 9f 3f 03 d5 dsb sy ffff800008eb2d74: 00 c0 3e 91 add x0, x0, #4016 ffff800008eb2d78: 1f 00 00 b9 str wzr, [x0] ffff800008eb2d7c: bf 23 03 d5 hint #29 ffff800008eb2d80: c0 03 5f d6 ret main 6728/6728 [0004] 0.000000000 etm4_cs_lock.isra.0.part.0+0x20 # perf script -s scripts/python/arm-cs-trace-disasm.py -- -d objdump -k ./vmlinux ARM CoreSight Trace Data Assembler Dump ffff800008eb3310 : ffff800008eb3310: 350038c0 cbnz w0, ffff800008eb3a28 ffff800008eb3314: d5033f9f dsb sy ffff800008eb3318: d5033fdf isb ffff800008eb331c: a9425bf5 ldp x21, x22, [sp, #32] ffff800008eb3320: a94573fb ldp x27, x28, [sp, #80] ffff800008eb3324: 394082e0 ldrb w0, [x23, #32] ffff800008eb3328: 34000060 cbz w0, ffff800008eb3334 ffff800008eb332c: aa1903e0 mov x0, x25 ffff800008eb3330: 97fffe8c bl ffff800008eb2d60 main 6728/6728 [0004] 0.000000000 etm4_enable_hw+0x198 [kernel.kallsyms] ffff800008eb2d60 : ffff800008eb2d60: d503201f nop ffff800008eb2d64: d503201f nop ffff800008eb2d68: d503233f paciasp ffff800008eb2d6c: f9400000 ldr x0, [x0] ffff800008eb2d70: d5033f9f dsb sy ffff800008eb2d74: 913ec000 add x0, x0, #0xfb0 ffff800008eb2d78: b900001f str wzr, [x0] ffff800008eb2d7c: d50323bf autiasp ffff800008eb2d80: d65f03c0 ret main 6728/6728 [0004] 0.000000000 etm4_cs_lock.isra.0.part.0+0x20 Signed-off-by: Leo Yan Co-authored-by: Al Grant Co-authored-by: Mathieu Poirier Co-authored-by: Tor Jeremiassen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Eelco Chaudron Cc: German Gomez Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephen Brennan Cc: Tanmay Jagdale Cc: coresight@lists.linaro.org Cc: zengshun . wu Link: https://lore.kernel.org/r/20220521130446.4163597-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 272 +++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100755 tools/perf/scripts/python/arm-cs-trace-disasm.py (limited to 'tools') diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py new file mode 100755 index 000000000000..5f57d9829956 --- /dev/null +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: GPL-2.0 +# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember +# +# Author: Tor Jeremiassen +# Mathieu Poirier +# Leo Yan +# Al Grant + +from __future__ import print_function +import os +from os import path +import sys +import re +from subprocess import * +from optparse import OptionParser, make_option + +from perf_trace_context import perf_set_itrace_options, \ + perf_sample_insn, perf_sample_srccode + +# Below are some example commands for using this script. +# +# Output disassembly with objdump: +# perf script -s scripts/python/arm-cs-trace-disasm.py \ +# -- -d objdump -k path/to/vmlinux +# Output disassembly with llvm-objdump: +# perf script -s scripts/python/arm-cs-trace-disasm.py \ +# -- -d llvm-objdump-11 -k path/to/vmlinux +# Output only source line and symbols: +# perf script -s scripts/python/arm-cs-trace-disasm.py + +# Command line parsing. +option_list = [ + # formatting options for the bottom entry of the stack + make_option("-k", "--vmlinux", dest="vmlinux_name", + help="Set path to vmlinux file"), + make_option("-d", "--objdump", dest="objdump_name", + help="Set path to objdump executable file"), + make_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Enable debugging log") +] + +parser = OptionParser(option_list=option_list) +(options, args) = parser.parse_args() + +# Initialize global dicts and regular expression +disasm_cache = dict() +cpu_data = dict() +disasm_re = re.compile("^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") +cache_size = 64*1024 + +glb_source_file_name = None +glb_line_number = None +glb_dso = None + +def get_optional(perf_dict, field): + if field in perf_dict: + return perf_dict[field] + return "[unknown]" + +def get_offset(perf_dict, field): + if field in perf_dict: + return f"+0x{perf_dict[field]:x}" + return "" + +def get_dso_file_path(dso_name, dso_build_id): + if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): + if (options.vmlinux_name): + return options.vmlinux_name; + else: + return dso_name + + if (dso_name == "[vdso]") : + append = "/vdso" + else: + append = "/elf" + + dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}" + # Replace duplicate slash chars to single slash char + dso_path = dso_path.replace('//', '/', 1) + return dso_path + +def read_disam(dso_fname, dso_start, start_addr, stop_addr): + addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname + + # Don't let the cache get too big, clear it when it hits max size + if (len(disasm_cache) > cache_size): + disasm_cache.clear(); + + if addr_range in disasm_cache: + disasm_output = disasm_cache[addr_range]; + else: + start_addr = start_addr - dso_start; + stop_addr = stop_addr - dso_start; + disasm = [ options.objdump_name, "-d", "-z", + f"--start-address=0x{start_addr:x}", + f"--stop-address=0x{stop_addr:x}" ] + disasm += [ dso_fname ] + disasm_output = check_output(disasm).decode('utf-8').split('\n') + disasm_cache[addr_range] = disasm_output + + return disasm_output + +def print_disam(dso_fname, dso_start, start_addr, stop_addr): + for line in read_disam(dso_fname, dso_start, start_addr, stop_addr): + m = disasm_func_re.search(line) + if m is None: + m = disasm_re.search(line) + if m is None: + continue + print(f"\t{line}") + +def print_sample(sample): + print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \ + f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \ + f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}") + +def trace_begin(): + print('ARM CoreSight Trace Data Assembler Dump') + +def trace_end(): + print('End') + +def trace_unhandled(event_name, context, event_fields_dict): + print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) + +def common_start_str(comm, sample): + sec = int(sample["time"] / 1000000000) + ns = sample["time"] % 1000000000 + cpu = sample["cpu"] + pid = sample["pid"] + tid = sample["tid"] + return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} " + +# This code is copied from intel-pt-events.py for printing source code +# line and symbols. +def print_srccode(comm, param_dict, sample, symbol, dso): + ip = sample["ip"] + if symbol == "[unknown]": + start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40) + else: + offs = get_offset(param_dict, "symoff") + start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40) + + global glb_source_file_name + global glb_line_number + global glb_dso + + source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context) + if source_file_name: + if glb_line_number == line_number and glb_source_file_name == source_file_name: + src_str = "" + else: + if len(source_file_name) > 40: + src_file = ("..." + source_file_name[-37:]) + " " + else: + src_file = source_file_name.ljust(41) + + if source_line is None: + src_str = src_file + str(line_number).rjust(4) + " " + else: + src_str = src_file + str(line_number).rjust(4) + " " + source_line + glb_dso = None + elif dso == glb_dso: + src_str = "" + else: + src_str = dso + glb_dso = dso + + glb_line_number = line_number + glb_source_file_name = source_file_name + + print(f"{start_str}{src_str}") + +def process_event(param_dict): + global cache_size + global options + + sample = param_dict["sample"] + comm = param_dict["comm"] + + name = param_dict["ev_name"] + dso = get_optional(param_dict, "dso") + dso_bid = get_optional(param_dict, "dso_bid") + dso_start = get_optional(param_dict, "dso_map_start") + dso_end = get_optional(param_dict, "dso_map_end") + symbol = get_optional(param_dict, "symbol") + + if (options.verbose == True): + print(f"Event type: {name}") + print_sample(sample) + + # If cannot find dso so cannot dump assembler, bail out + if (dso == '[unknown]'): + return + + # Validate dso start and end addresses + if ((dso_start == '[unknown]') or (dso_end == '[unknown]')): + print(f"Failed to find valid dso map for dso {dso}") + return + + if (name[0:12] == "instructions"): + print_srccode(comm, param_dict, sample, symbol, dso) + return + + # Don't proceed if this event is not a branch sample, . + if (name[0:8] != "branches"): + return + + cpu = sample["cpu"] + ip = sample["ip"] + addr = sample["addr"] + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = addr + return + + # The format for packet is: + # + # +------------+------------+------------+ + # sample_prev: | addr | ip | cpu | + # +------------+------------+------------+ + # sample_next: | addr | ip | cpu | + # +------------+------------+------------+ + # + # We need to combine the two continuous packets to get the instruction + # range for sample_prev::cpu: + # + # [ sample_prev::addr .. sample_next::ip ] + # + # For this purose, sample_prev::addr is stored into cpu_data structure + # and read back for 'start_addr' when the new packet comes, and we need + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for + # 'stop_addr' is for the sake of objdump so the final assembler dump can + # include last instruction for sample_next::ip. + start_addr = cpu_data[str(cpu) + 'addr'] + stop_addr = ip + 4 + + # Record for previous sample packet + cpu_data[str(cpu) + 'addr'] = addr + + # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 + if (start_addr == 0 and stop_addr == 4): + print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted") + return + + if (start_addr < int(dso_start) or start_addr > int(dso_end)): + print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + return + + if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): + print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + return + + if (options.objdump_name != None): + # It doesn't need to decrease virtual memory offset for disassembly + # for kernel dso, so in this case we set vm_start to zero. + if (dso == "[kernel.kallsyms]"): + dso_vm_start = 0 + else: + dso_vm_start = int(dso_start) + + dso_fname = get_dso_file_path(dso, dso_bid) + if path.exists(dso_fname): + print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + else: + print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]") + + print_srccode(comm, param_dict, sample, symbol, dso) -- cgit v1.2.3 From 9dde6cadb92b5670b23b97ec53091df0530ec38b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Aug 2020 08:45:47 -0300 Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes in: db1af12929c99d15 ("x86/msr-index: Define INTEGRITY_CAPABILITIES MSR") 089be16d5992dd0b ("x86/msr: Add PerfCntrGlobal* registers") f52ba93190457aa2 ("tools/power turbostat: Add Power Limit4 support") Addressing these tools/perf build warnings: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' That makes the beautification scripts to pick some new entries: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2022-05-26 12:50:01.228612839 -0300 +++ after 2022-05-26 12:50:07.699776166 -0300 @@ -116,6 +116,7 @@ [0x0000026f] = "MTRRfix4K_F8000", [0x00000277] = "IA32_CR_PAT", [0x00000280] = "IA32_MC0_CTL2", + [0x000002d9] = "INTEGRITY_CAPS", [0x000002ff] = "MTRRdefType", [0x00000309] = "CORE_PERF_FIXED_CTR0", [0x0000030a] = "CORE_PERF_FIXED_CTR1", @@ -176,6 +177,7 @@ [0x00000586] = "IA32_RTIT_ADDR3_A", [0x00000587] = "IA32_RTIT_ADDR3_B", [0x00000600] = "IA32_DS_AREA", + [0x00000601] = "VR_CURRENT_CONFIG", [0x00000606] = "RAPL_POWER_UNIT", [0x0000060a] = "PKGC3_IRTL", [0x0000060b] = "PKGC6_IRTL", @@ -260,6 +262,10 @@ [0xc0000102 - x86_64_specific_MSRs_offset] = "KERNEL_GS_BASE", [0xc0000103 - x86_64_specific_MSRs_offset] = "TSC_AUX", [0xc0000104 - x86_64_specific_MSRs_offset] = "AMD64_TSC_RATIO", + [0xc000010f - x86_64_specific_MSRs_offset] = "AMD_DBG_EXTN_CFG", + [0xc0000300 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_STATUS", + [0xc0000301 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_CTL", + [0xc0000302 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_STATUS_CLR", }; #define x86_AMD_V_KVM_MSRs_offset 0xc0010000 @@ -318,4 +324,5 @@ [0xc00102b4 - x86_AMD_V_KVM_MSRs_offset] = "AMD_CPPC_STATUS", [0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL", [0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN", + [0xc0010300 - x86_AMD_V_KVM_MSRs_offset] = "AMD_SAMP_BR_FROM", }; $ Now one can trace systemwide asking to see backtraces to where those MSRs are being read/written, see this example with a previous update: # perf trace -e msr:*_msr/max-stack=32/ --filter="msr>=IA32_U_CET && msr<=IA32_INT_SSP_TAB" ^C# If we use -v (verbose mode) we can see what it does behind the scenes: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr>=IA32_U_CET && msr<=IA32_INT_SSP_TAB" Using CPUID AuthenticAMD-25-21-0 0x6a0 0x6a8 New filter for msr:read_msr: (msr>=0x6a0 && msr<=0x6a8) && (common_pid != 597499 && common_pid != 3313) 0x6a0 0x6a8 New filter for msr:write_msr: (msr>=0x6a0 && msr<=0x6a8) && (common_pid != 597499 && common_pid != 3313) mmap size 528384B ^C# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 Timer/2525383 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) futex_wait_queue_me ([kernel.kallsyms]) futex_wait ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __futex_abstimed_wait_common64 (/usr/lib64/libpthread-2.33.so) 0.030 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Cc: Adrian Hunter Cc: Hans de Goede Cc: Ian Rogers Cc: Jiri Olsa Cc: Len Brown Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Sumeet Pawnikar Cc: Tony Luck Link: https://lore.kernel.org/lkml/Yo+i%252Fj5+UtE9dcix@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ee15311b6be1..403e83b4adc8 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -76,6 +76,8 @@ /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT) #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) @@ -154,6 +156,11 @@ #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_POWER_CTL_BIT_EE 19 +/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ +#define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -312,6 +319,7 @@ /* Run Time Average Power Limiting (RAPL) Interface */ +#define MSR_VR_CURRENT_CONFIG 0x00000601 #define MSR_RAPL_POWER_UNIT 0x00000606 #define MSR_PKG_POWER_LIMIT 0x00000610 @@ -502,8 +510,10 @@ #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) +#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f @@ -524,6 +534,11 @@ #define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) #define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* AMD Performance Counter Global Status and Control MSRs */ +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 +#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -688,6 +703,10 @@ #define MSR_IA32_PERF_CTL 0x00000199 #define INTEL_PERF_CTL_MASK 0xffff +/* AMD Branch Sampling configuration */ +#define MSR_AMD_DBG_EXTN_CFG 0xc000010f +#define MSR_AMD_SAMP_BR_FROM 0xc0010300 + #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 -- cgit v1.2.3