Age | Commit message (Collapse) | Author |
|
Avoid open-coding that simple expression each time by moving
BYTES_TO_BITS() from the probes code to <linux/bitops.h> to export
it to the rest of the kernel.
Simplify the macro while at it. `BITS_PER_LONG / sizeof(long)` always
equals to %BITS_PER_BYTE, regardless of the target architecture.
Do the same for the tools ecosystem as well (incl. its version of
bitops.h). The previous implementation had its implicit type of long,
while the new one is int, so adjust the format literal accordingly in
the perf code.
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Acked-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
Save 40 bytes and move from 8 to 7 cache lines. Make member dwfl
dependent on being a powerpc build. Squeeze bits of int/enum types
when appropriate. Remove holes/padding by reordering variables.
Before:
struct dso {
struct mutex lock; /* 0 40 */
struct list_head node; /* 40 16 */
struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct rb_root * root; /* 80 8 */
struct rb_root_cached symbols; /* 88 16 */
struct symbol * * symbol_names; /* 104 8 */
size_t symbol_names_len; /* 112 8 */
struct rb_root_cached inlined_nodes; /* 120 16 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
struct rb_root_cached srclines; /* 136 16 */
struct {
u64 addr; /* 152 8 */
struct symbol * symbol; /* 160 8 */
} last_find_result; /* 152 16 */
void * a2l; /* 168 8 */
char * symsrc_filename; /* 176 8 */
unsigned int a2l_fails; /* 184 4 */
enum dso_space_type kernel; /* 188 4 */
/* --- cacheline 3 boundary (192 bytes) --- */
_Bool is_kmod; /* 192 1 */
/* XXX 3 bytes hole, try to pack */
enum dso_swap_type needs_swap; /* 196 4 */
enum dso_binary_type symtab_type; /* 200 4 */
enum dso_binary_type binary_type; /* 204 4 */
enum dso_load_errno load_errno; /* 208 4 */
u8 adjust_symbols:1; /* 212: 0 1 */
u8 has_build_id:1; /* 212: 1 1 */
u8 header_build_id:1; /* 212: 2 1 */
u8 has_srcline:1; /* 212: 3 1 */
u8 hit:1; /* 212: 4 1 */
u8 annotate_warned:1; /* 212: 5 1 */
u8 auxtrace_warned:1; /* 212: 6 1 */
u8 short_name_allocated:1; /* 212: 7 1 */
u8 long_name_allocated:1; /* 213: 0 1 */
u8 is_64_bit:1; /* 213: 1 1 */
/* XXX 6 bits hole, try to pack */
_Bool sorted_by_name; /* 214 1 */
_Bool loaded; /* 215 1 */
u8 rel; /* 216 1 */
/* XXX 7 bytes hole, try to pack */
struct build_id bid; /* 224 32 */
/* --- cacheline 4 boundary (256 bytes) --- */
u64 text_offset; /* 256 8 */
u64 text_end; /* 264 8 */
const char * short_name; /* 272 8 */
const char * long_name; /* 280 8 */
u16 long_name_len; /* 288 2 */
u16 short_name_len; /* 290 2 */
/* XXX 4 bytes hole, try to pack */
void * dwfl; /* 296 8 */
struct auxtrace_cache * auxtrace_cache; /* 304 8 */
int comp; /* 312 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 5 boundary (320 bytes) --- */
struct {
struct rb_root cache; /* 320 8 */
int fd; /* 328 4 */
int status; /* 332 4 */
u32 status_seen; /* 336 4 */
/* XXX 4 bytes hole, try to pack */
u64 file_size; /* 344 8 */
struct list_head open_entry; /* 352 16 */
u64 elf_base_addr; /* 368 8 */
u64 debug_frame_offset; /* 376 8 */
/* --- cacheline 6 boundary (384 bytes) --- */
u64 eh_frame_hdr_addr; /* 384 8 */
u64 eh_frame_hdr_offset; /* 392 8 */
} data; /* 320 80 */
struct {
u32 id; /* 400 4 */
u32 sub_id; /* 404 4 */
struct perf_env * env; /* 408 8 */
} bpf_prog; /* 400 16 */
union {
void * priv; /* 416 8 */
u64 db_id; /* 416 8 */
}; /* 416 8 */
struct nsinfo * nsinfo; /* 424 8 */
struct dso_id id; /* 432 24 */
/* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
refcount_t refcnt; /* 456 4 */
char name[]; /* 460 0 */
/* size: 464, cachelines: 8, members: 49 */
/* sum members: 440, holes: 4, sum holes: 18 */
/* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 6 bits */
/* padding: 4 */
/* forced alignments: 1 */
/* last cacheline: 16 bytes */
} __attribute__((__aligned__(8)));
After:
struct dso {
struct mutex lock; /* 0 40 */
struct list_head node; /* 40 16 */
struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
struct rb_root * root; /* 80 8 */
struct rb_root_cached symbols; /* 88 16 */
struct symbol * * symbol_names; /* 104 8 */
size_t symbol_names_len; /* 112 8 */
struct rb_root_cached inlined_nodes; /* 120 16 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
struct rb_root_cached srclines; /* 136 16 */
struct {
u64 addr; /* 152 8 */
struct symbol * symbol; /* 160 8 */
} last_find_result; /* 152 16 */
struct build_id bid; /* 168 32 */
/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
u64 text_offset; /* 200 8 */
u64 text_end; /* 208 8 */
const char * short_name; /* 216 8 */
const char * long_name; /* 224 8 */
void * a2l; /* 232 8 */
char * symsrc_filename; /* 240 8 */
struct nsinfo * nsinfo; /* 248 8 */
/* --- cacheline 4 boundary (256 bytes) --- */
struct auxtrace_cache * auxtrace_cache; /* 256 8 */
union {
void * priv; /* 264 8 */
u64 db_id; /* 264 8 */
}; /* 264 8 */
struct {
struct perf_env * env; /* 272 8 */
u32 id; /* 280 4 */
u32 sub_id; /* 284 4 */
} bpf_prog; /* 272 16 */
struct {
struct rb_root cache; /* 288 8 */
struct list_head open_entry; /* 296 16 */
u64 file_size; /* 312 8 */
/* --- cacheline 5 boundary (320 bytes) --- */
u64 elf_base_addr; /* 320 8 */
u64 debug_frame_offset; /* 328 8 */
u64 eh_frame_hdr_addr; /* 336 8 */
u64 eh_frame_hdr_offset; /* 344 8 */
int fd; /* 352 4 */
int status; /* 356 4 */
u32 status_seen; /* 360 4 */
} data; /* 288 80 */
/* XXX last struct has 4 bytes of padding */
struct dso_id id; /* 368 24 */
/* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
unsigned int a2l_fails; /* 392 4 */
int comp; /* 396 4 */
refcount_t refcnt; /* 400 4 */
enum dso_load_errno load_errno; /* 404 4 */
u16 long_name_len; /* 408 2 */
u16 short_name_len; /* 410 2 */
enum dso_binary_type symtab_type:8; /* 412: 0 4 */
enum dso_binary_type binary_type:8; /* 412: 8 4 */
enum dso_space_type kernel:2; /* 412:16 4 */
enum dso_swap_type needs_swap:2; /* 412:18 4 */
/* Bitfield combined with next fields */
_Bool is_kmod:1; /* 414: 4 1 */
u8 adjust_symbols:1; /* 414: 5 1 */
u8 has_build_id:1; /* 414: 6 1 */
u8 header_build_id:1; /* 414: 7 1 */
u8 has_srcline:1; /* 415: 0 1 */
u8 hit:1; /* 415: 1 1 */
u8 annotate_warned:1; /* 415: 2 1 */
u8 auxtrace_warned:1; /* 415: 3 1 */
u8 short_name_allocated:1; /* 415: 4 1 */
u8 long_name_allocated:1; /* 415: 5 1 */
u8 is_64_bit:1; /* 415: 6 1 */
/* XXX 1 bit hole, try to pack */
_Bool sorted_by_name; /* 416 1 */
_Bool loaded; /* 417 1 */
u8 rel; /* 418 1 */
char name[]; /* 419 0 */
/* size: 424, cachelines: 7, members: 48 */
/* sum members: 415 */
/* sum bitfield members: 31 bits, bit holes: 1, sum bit holes: 1 bits */
/* padding: 5 */
/* paddings: 1, sum paddings: 4 */
/* forced alignments: 1 */
/* last cacheline: 40 bytes */
} __attribute__((__aligned__(8)));
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ben Gainey <ben.gainey@arm.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Chengen Du <chengen.du@canonical.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Li Dong <lidong@vivo.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Markus Elfring <Markus.Elfring@web.de>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paran Lee <p4ranlee@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Song Liu <song@kernel.org>
Cc: Sun Haiyong <sunhaiyong@loongson.cn>
Cc: Yanteng Si <siyanteng@loongson.cn>
Cc: zhaimingbing <zhaimingbing@cmss.chinamobile.com>
Link: https://lore.kernel.org/r/20240321160300.1635121-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The 'perf lock contention' program currently shows the caller of the locks
as __traceiter_contention_begin+0x??. This caller can be ignored, as it is
from the traceiter itself. Instead, it should show the real callers for
the locks.
When fiddling with the --stack-skip parameter, the actual callers for
the locks start to show up. However, just ignore the
__traceiter_contention_begin and the __traceiter_contention_end symbols
so the actual callers will show up.
Before this patch is applied:
sudo perf lock con -a -b -- sleep 3
contended total wait max wait avg wait type caller
8 2.33 s 2.28 s 291.18 ms rwlock:W __traceiter_contention_begin+0x44
4 2.33 s 2.28 s 582.35 ms rwlock:W __traceiter_contention_begin+0x44
7 140.30 ms 46.77 ms 20.04 ms rwlock:W __traceiter_contention_begin+0x44
2 63.35 ms 33.76 ms 31.68 ms mutex trace_contention_begin+0x84
2 46.74 ms 46.73 ms 23.37 ms rwlock:W __traceiter_contention_begin+0x44
1 13.54 us 13.54 us 13.54 us mutex trace_contention_begin+0x84
1 3.67 us 3.67 us 3.67 us rwsem:R __traceiter_contention_begin+0x44
Before this patch is applied - using --stack-skip 5
sudo perf lock con --stack-skip 5 -a -b -- sleep 3
contended total wait max wait avg wait type caller
2 2.24 s 2.24 s 1.12 s rwlock:W do_epoll_wait+0x5a0
4 1.65 s 824.21 ms 412.08 ms rwlock:W do_exit+0x338
2 824.35 ms 824.29 ms 412.17 ms spinlock get_signal+0x108
2 824.14 ms 824.14 ms 412.07 ms rwlock:W release_task+0x68
1 25.22 ms 25.22 ms 25.22 ms mutex cgroup_kn_lock_live+0x58
1 24.71 us 24.71 us 24.71 us spinlock do_exit+0x44
1 22.04 us 22.04 us 22.04 us rwsem:R lock_mm_and_find_vma+0xb0
After this patch is applied:
sudo ./perf lock con -a -b -- sleep 3
contended total wait max wait avg wait type caller
4 4.13 s 2.07 s 1.03 s rwlock:W release_task+0x68
2 2.07 s 2.07 s 1.03 s rwlock:R mm_update_next_owner+0x50
2 2.07 s 2.07 s 1.03 s rwlock:W do_exit+0x338
1 41.56 ms 41.56 ms 41.56 ms mutex cgroup_kn_lock_live+0x58
2 36.12 us 18.83 us 18.06 us rwlock:W do_exit+0x338
Signed-off-by: Anne Macedo <retpolanne@posteo.net>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240319143629.3422590-1-retpolanne@posteo.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
This prototype is obtained indirectly, by luck, from some other header
in probe-event.c in most systems, but recently exploded on alpine:edge:
8 13.39 alpine:edge : FAIL gcc version 13.2.1 20240309 (Alpine 13.2.1_git20240309)
util/probe-event.c: In function 'convert_exec_to_group':
util/probe-event.c:225:16: error: implicit declaration of function 'basename' [-Werror=implicit-function-declaration]
225 | ptr1 = basename(exec_copy);
| ^~~~~~~~
util/probe-event.c:225:14: error: assignment to 'char *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
225 | ptr1 = basename(exec_copy);
| ^
cc1: all warnings being treated as errors
make[3]: *** [/git/perf-6.8.0/tools/build/Makefile.build:158: util] Error 2
Fix it by adding the libgen.h header where basename() is prototyped.
Fixes: fb7345bbf7fad9bf ("perf probe: Support basic dwarf-based operations on uprobe events")
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The levenshtein penalty for deleting a character was far higher than
subsituting or inserting a character. Lower the penalty to match that
of inserting a character.
Before:
$ perf recccord
perf: 'recccord' is not a perf-command. See 'perf --help'.
$
After:
$ perf recccord
perf: 'recccord' is not a perf-command. See 'perf --help'.
Did you mean this?
record
$
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240301201306.2680986-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The existing unknown command code looks for perf scripts like
perf-archive.sh and perf-iostat.sh, however, inbuilt commands aren't
suggested. Add the inbuilt commands so they may be suggested too.
Before:
$ perf reccord
perf: 'reccord' is not a perf-command. See 'perf --help'.
$
After:
$ perf reccord
perf: 'reccord' is not a perf-command. See 'perf --help'.
Did you mean this?
record
$
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240301201306.2680986-1-irogers@google.com
[ Added some fixes from Ian to problems I noticed while testing ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Add perf_pmu__name_from_config that does a reverse lookup from a
config number to an alias name. The lookup is expensive as the config
is computed for every alias by filling in a perf_event_attr, but this
is only done when verbose output is enabled. The lookup also only
considers config, and not config1, config2 or config3.
An example of the output:
$ perf stat -vv -e data_read true
...
perf_event_attr:
type 24 (uncore_imc_free_running_0)
size 136
config 0x20ff (data_read)
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
exclude_guest 1
...
Committer notes:
Fix the python binding build by adding dummies for not strictly
needed perf_pmu__name_from_config() and perf_pmus__find_by_type().
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
When dumping a perf_event_attr, use pmus to find the PMU and its name
by the type number. This allows dynamically added PMUs to be described.
Before:
$ perf stat -vv -e data_read true
...
perf_event_attr:
type 24
size 136
config 0x20ff
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
exclude_guest 1
...
After:
$ perf stat -vv -e data_read true
...
perf_event_attr:
type 24 (uncore_imc_free_running_0)
size 136
config 0x20ff
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
exclude_guest 1
...
However, it also means that when we have a PMU name we prefer it to a
hard coded name:
Before:
$ perf stat -vv -e faults true
...
perf_event_attr:
type 1 (PERF_TYPE_SOFTWARE)
size 136
config 0x2 (PERF_COUNT_SW_PAGE_FAULTS)
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
enable_on_exec 1
exclude_guest 1
...
After:
$ perf stat -vv -e faults true
...
perf_event_attr:
type 1 (software)
size 136
config 0x2 (PERF_COUNT_SW_PAGE_FAULTS)
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
enable_on_exec 1
exclude_guest 1
...
It feels more consistent to do this, rather than only prefer a PMU
name when a hard coded name isn't available.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
List all the PMUs, not just the first core one, and list real format
specifiers with value ranges.
Before:
$ perf list
...
rNNN [Raw hardware event descriptor]
cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
[(see 'man perf-list' on how to encode it)]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
After:
$ perf list
...
rNNN [Raw event descriptor]
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
breakpoint//modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
intel_bts//modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
kprobe/retprobe/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
power/event=0..255/modifier [Raw event descriptor]
software//modifier [Raw event descriptor]
tracepoint//modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
With '--details' provide more details on the formats encoding:
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
cpu/event=0..255,pc,edge,offcore_rsp=0..0xffffffffffffffff,ldlat=0..0xffff,inv,
umask=0..255,frontend=0..0xffffff,cmask=0..255,config=0..0xffffffffffffffff,
config1=0..0xffffffffffffffff,config2=0..0xffffffffffffffff,config3=0..0xffffffffffffffff,
name=string,period=number,freq=number,branch_type=(u|k|hv|any|...),time,
call-graph=(fp|dwarf|lbr),stack-size=number,max-stack=number,nr=number,inherit,no-inherit,
overwrite,no-overwrite,percore,aux-output,aux-sample-size=number/modifier
breakpoint//modifier [Raw event descriptor]
breakpoint//modifier
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier
intel_bts//modifier [Raw event descriptor]
intel_bts//modifier
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,pt,notnt,branch,tsc,pwr_evt,fup_on_ptw,cyc,noretcomp,
mtc,psb_period=0..15,mtc_period=0..15/modifier
kprobe/retprobe/modifier [Raw event descriptor]
kprobe/retprobe/modifier
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier
power/event=0..255/modifier [Raw event descriptor]
power/event=0..255/modifier
software//modifier [Raw event descriptor]
software//modifier
tracepoint//modifier [Raw event descriptor]
tracepoint//modifier
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier
Committer notes:
Address this build error in various distros:
55 58.44 ubuntu:24.04 : FAIL gcc version 13.2.0 (Ubuntu 13.2.0-17ubuntu2)
util/pmu.c:1638:70: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
1638 | _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6);
| ^
| , ""
1 error generated.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
"default_core" is used by jevents.py for json events' PMU name when
none is specified. On x86 the "default_core" is typically the PMU
"cpu". When creating an alias see if the event's PMU name is
"default_core" in which case don't record it. This means in places
like "perf list" the PMU's name will be used in its place.
Before:
$ perf list --details
...
cache:
l1d.replacement
[Counts the number of cache lines replaced in L1 data cache]
default_core/event=0x51,period=0x186a3,umask=0x1/
...
After:
$ perf list --details
...
cache:
l1d.replacement
[Counts the number of cache lines replaced in L1 data cache. Unit: cpu]
cpu/event=0x51,period=0x186a3,umask=0x1/
...
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The tracepoint id holds the config value and is probed in determining
what an event is. Add reading of the id so that we can display the
event encoding as:
$ perf list --details
...
alarmtimer:alarmtimer_cancel [Tracepoint event]
tracepoint/config=0x18c/
...
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
In some cases, it was able to find a type or location info (for per-cpu
variable) but cannot match because of invalid offset or missing global
information. In those cases, it's meaningless to go to the outer scope
and retry because there will be no additional information.
Let's change the return type of find_matching_type() and bail out if it
returns -1 for the cases.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-24-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
They are often searched by many different places. Let's add a cache
for them to reduce the duplicate DWARF access.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-23-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
When the stack protector is enabled, compiler would generate code to
check stack overflow with a special value called 'stack carary' at
runtime. On x86_64, GCC hard-codes the stack canary as %gs:40.
While there's a definition of fixed_percpu_data in asm/processor.h,
it seems that the header is not included everywhere and many places
it cannot find the type info. As it's in the well-known location (at
%gs:40), let's add a pseudo stack canary type to handle it specially.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-22-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
There are different patterns for percpu variable access using a constant
value added to the base.
2aeb: mov -0x7da0f7e0(,%rax,8),%r14 # r14 = __per_cpu_offset[cpu]
2af3: mov $0x34740,%rax # rax = address of runqueues
* 2afa: add %rax,%r14 # r14 = &per_cpu(runqueues, cpu)
2bfd: cmpl $0x0,0x10(%r14) # cpu_rq(cpu)->has_blocked_load
2b03: je 0x2b36
At the first instruction, r14 has the __per_cpu_offset. And then rax
has an immediate value and then added to r14 to calculate the address of
a per-cpu variable. So it needs to track the immediate values and ADD
instructions.
Similar but a little different case is to use "this_cpu_off" instead of
"__per_cpu_offset" for the current CPU. This time the variable address
comes with PC-rel addressing.
89: mov $0x34740,%rax # rax = address of runqueues
* 90: add %gs:0x7f015f60(%rip),%rax # 19a78 <this_cpu_off>
98: incl 0xd8c(%rax) # cpu_rq(cpu)->sched_count
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-21-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
This is to support per-cpu variable access often without a matching
DWARF entry. For some reason, I cannot find debug info of per-cpu
variables sometimes. They have more complex pattern to calculate the
address of per-cpu variables like below.
2b7d: mov -0x1e0(%rbp),%rax ; rax = cpu
2b84: mov -0x7da0f7e0(,%rax,8),%rcx ; rcx = __per_cpu_offset[cpu]
* 2b8c: mov 0x34870(%rcx),%rax ; *(__per_cpu_offset[cpu] + 0x34870)
Let's assume the rax register has a number for a CPU at 2b7d. The next
instruction is to get the per-cpu offset' for that cpu. The offset
-0x7da0f7e0 is 0xffffffff825f0820 in u64 which is the address of the
'__per_cpu_offset' array in my system. So it'd get the actual offset
of that CPU's per-cpu region and save it to the rcx register.
Then, at 2b8c, accesses using rcx can be handled same as the global
variable access. To handle this case, it should check if the offset
of the instruction matches to the address of '__per_cpu_offset'.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-20-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Like global variables, this per-cpu variables should be tracked
correctly. Factor our get_global_var_type() to handle both global
and per-cpu (for this cpu) variables in the same manner.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-19-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
On x86, the kernel gets the current task using the current macro like
below:
#define current get_current()
static __always_inline struct task_struct *get_current(void)
{
return this_cpu_read_stable(pcpu_hot.current_task);
}
So it returns the current_task field of struct pcpu_hot which is the
first member. On my build, it's located at 0x32940.
$ nm vmlinux | grep pcpu_hot
0000000000032940 D pcpu_hot
And the current macro generates the instructions like below:
mov %gs:0x32940, %rcx
So the %gs segment register points to the beginning of the per-cpu
region of this cpu and it points the variable with a constant.
Let's update the instruction location info to have a segment register
and handle %gs in kernel to look up a global variable. Pretend it as
a global variable by changing the register number to DWARF_REG_PC.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-18-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Add a segment field in the struct annotated_insn_loc and save it for the
segment based addressing like %gs:0x28. For simplicity it now handles
%gs register only.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-17-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
As instruction tracking updates the type state for each register, check
the final type info for the target register at the given instruction.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-16-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
If it failed to find a variable for the location directly, it might be
due to a missing variable in the source code. For example, accessing
pointer variables in a chain can result in the case like below:
struct foo *foo = ...;
int i = foo->bar->baz;
The DWARF debug information is created for each variable so it'd have
one for 'foo'. But there's no variable for 'foo->bar' and then it
cannot know the type of 'bar' and 'baz'.
The above source code can be compiled to the follow x86 instructions:
mov 0x8(%rax), %rcx
mov 0x4(%rcx), %rdx <=== PMU sample
mov %rdx, -4(%rbp)
Let's say 'foo' is located in the %rax and it has a pointer to struct
foo. But perf sample is captured in the second instruction and there
is no variable or type info for the %rcx.
It'd be great if compiler could generate debug info for %rcx, but we
should handle it on our side. So this patch implements the logic to
iterate instructions and update the type table for each location.
As it already collected a list of scopes including the target
instruction, we can use it to construct the type table smartly.
+---------------- scope[0] subprogram
|
| +-------------- scope[1] lexical_block
| |
| | +------------ scope[2] inlined_subroutine
| | |
| | | +---------- scope[3] inlined_subroutine
| | | |
| | | | +-------- scope[4] lexical_block
| | | | |
| | | | | *** target instruction
...
Image the target instruction has 5 scopes, each scope will have its own
variables and parameters. Then it can start with the innermost scope
(4). So it'd search the shortest path from the start of scope[4] to
the target address and build a list of basic blocks. Then it iterates
the basic blocks with the variables in the scope and update the table.
If it finds a type at the target instruction, then returns it.
Otherwise, it moves to the upper scope[3]. Now it'd search the shortest
path from the start of scope[3] to the start of scope[4]. Then connect
it to the existing basic block list. Then it'd iterate the blocks with
variables for both scopes. It can repeat this until it finds a type at
the target instruction or reaches to the top scope[0].
As the basic blocks contain the shortest path, it won't worry about
branches and can update the table simply.
The final check will be done by find_matching_type() in the next patch.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-15-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
When updating instruction states, the call instruction should play a
role since it changes the register states. For simplicity, mark some
registers as caller-saved registers (should be arch-dependent), and
invalidate them all after a function call.
If the function returns something, the designated register (ret_reg)
will have the type info.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-14-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
When updating the instruction states, it also needs to handle global
variable accesses. Same as it does for PC-relative addressing, it can
look up the type by address (if it's defined in the same file), or by
name after finding the symbol by address (for declarations).
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-13-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Accessing global variable is common when it tracks execution later.
Factor out the common code into a function for later use.
It adds thread and cpumode to struct data_loc_info to find (global)
symbols if needed. Also remove var_name as it's retrieved in the
helper function.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The update_insn_state() function is to update the type state table after
processing each instruction. For now, it handles MOV (on x86) insn
to transfer type info from the source location to the target.
The location can be a register or a stack slot. Check carefully when
memory reference happens and fetch the type correctly. It basically
ignores write to a memory since it doesn't change the type info. One
exception is writes to (new) stack slots for register spilling.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
As it collected basic block and variable information in each scope, it
now can build a state table to find matching variable at the location.
The struct type_state is to keep the type info saved in each register
and stack slot. The update_var_state() updates the table when it finds
variables in the current address. It expects die_collect_vars() filled
a list of variables with type info and starting address.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Add a new debug option "type-profile" to enable the detailed info during
the type analysis especially for instruction tracking. You can use this
before the command name like 'report' or 'annotate'.
$ perf --debug type-profile annotate --data-type
Committer testing:
First get some memory events:
$ perf mem record ls
Then, without data-type profiling debug:
$ perf annotate --data-type | head
Annotate type: 'struct rtld_global' in /usr/lib64/ld-linux-x86-64.so.2 (1 samples):
============================================================================
samples offset size field
1 0 4336 struct rtld_global {
0 0 0 struct link_namespaces* _dl_ns;
0 2560 8 size_t _dl_nns;
0 2568 40 __rtld_lock_recursive_t _dl_load_lock {
0 2568 40 pthread_mutex_t mutex {
0 2568 40 struct __pthread_mutex_s __data {
0 2568 4 int __lock;
$
And with only data-type profiling:
$ perf --debug type-profile annotate --data-type | head
-----------------------------------------------------------
find_data_type_die [1e67] for reg13873052 (PC) offset=0x150e2 in dl_main
CU die offset: 0x29cd3
found PC-rel by addr=0x34020 offset=0x20
-----------------------------------------------------------
find_data_type_die [2e] for reg12 offset=0 in __GI___readdir64
CU die offset: 0x137a45
frame base: cfa=1 fbreg=-1
found "__futex" in scope=2/2 (die: 0x137ad5) 0(reg12) type=int (die:2a)
-----------------------------------------------------------
find_data_type_die [52] for reg5 offset=0 in __memmove_avx_unaligned_erms
CU die offset: 0x1124ed
no variable found
Annotate type: 'struct rtld_global' in /usr/lib64/ld-linux-x86-64.so.2 (1 samples):
============================================================================
samples offset size field
1 0 4336 struct rtld_global {
0 0 0 struct link_namespaces* _dl_ns;
0 2560 8 size_t _dl_nns;
0 2568 40 __rtld_lock_recursive_t _dl_load_lock {
0 2568 40 pthread_mutex_t mutex {
0 2568 40 struct __pthread_mutex_s __data {
0 2568 4 int __lock;
$
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The annotate_get_basic_blocks() is to find a list of basic blocks from
the source instruction to the destination instruction in a function.
It'll be used to find variables in a scope. Use BFS (Breadth First
Search) to find a shortest path to carry the variable/register state
minimally.
Also change find_disasm_line() to be used in annotate_get_basic_blocks()
and add 'allow_update' argument to control if it can update the IP.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The find_data_type() needs many information to describe the location of
the data. Add the new 'struct data_loc_info' to pass those information at
once.
No functional changes intended.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Sometimes we want to convert an address in objdump output to
map-relative address to match with a sample data. Let's add
map__objdump_2rip() for that.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The die_find_func_rettype() is to find a debug entry for the given
function name and sets the type information of the return value. By
convention, it'd return the pointer to the type die (should be the
same as the given mem_die argument) if found, or NULL otherwise.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
We want to track type states as instructions are executed. Each
instruction can access compound types like struct or union and load/
store its members to a different location.
The die_deref_ptr_type() is to find a type of memory access with a
pointer variable. If it points to a compound type like struct, the
target memory is a member in the struct. The access will happen with an
offset indicating which member it refers. Let's follow the DWARF info
to figure out the type of the pointer target.
For example, say we have the following code.
struct foo {
int a;
int b;
};
struct foo *p = malloc(sizeof(*p));
p->b = 0;
The last pointer access should produce x86 asm like below:
mov 0x0, 4(%rbx)
And we know %rbx register has a pointer to struct foo. Then offset 4
should return the debug info of member 'b'.
Also variables of compound types can be accessed directly without a
pointer. The die_get_member_type() is to handle a such case.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-4-namhyung@kernel.org
[ Check if die_get_real_type() returned NULL ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
The die_collect_vars() is to find all variable information in the scope
including function parameters. The struct die_var_type is to save the
type of the variable with the location (reg and offset) as well as where
it's defined in the code (addr).
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
It's not used, let's get rid of it.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240319055115.4063940-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Rather than manually iterating the CPU map, use
perf_cpu_map__for_each_cpu(). When possible tidy local variables.
Reviewed-by: James Clark <james.clark@arm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: André Almeida <andrealmeid@igalia.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Atish Patra <atishp@rivosinc.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paran Lee <p4ranlee@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Cc: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20240202234057.2085863-9-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Most uses of what was perf_cpu_map__empty but is now
perf_cpu_map__has_any_cpu_or_is_empty want to do something with the
CPU map if it contains CPUs. Replace uses of
perf_cpu_map__has_any_cpu_or_is_empty with other helpers so that CPUs
within the map can be handled.
Reviewed-by: James Clark <james.clark@arm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Cc: Andrew Jones <ajones@ventanamicro.com>
Cc: André Almeida <andrealmeid@igalia.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Atish Patra <atishp@rivosinc.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paran Lee <p4ranlee@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steinar H. Gunderson <sesse@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Cc: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/20240202234057.2085863-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
If the --itrace option is used more than once, the options are
combined, but "i" and "y" (sub-)options can be corrupted because
itrace_do_parse_synth_opts() incorrectly overwrites the period type and
period with default values.
For example, with:
--itrace=i0ns --itrace=e
The processing of "--itrace=e", resets the "i" period from 0 nanoseconds
to the default 100 microseconds.
Fix by performing the default setting of period type and period only if
"i" or "y" are present in the currently processed --itrace value.
Fixes: f6986c95af84ff2a ("perf session: Add instruction tracing options")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240315071334.3478-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
That is a 'struct timespec' passed from userspace to the kernel as we
can see with a system wide syscall tracing:
root@number:~# perf trace -e nanosleep
0.000 (10.102 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
38.924 (10.077 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
100.177 (10.107 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
139.171 (10.063 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
200.603 (10.105 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
239.399 (10.064 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
300.994 (10.096 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
339.584 (10.067 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
401.335 (10.057 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
439.758 (10.166 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
501.814 (10.110 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
539.983 (10.227 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
602.284 (10.199 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
640.208 (10.105 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
702.662 (10.163 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
740.440 (10.107 ms): podman/2195174 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
802.993 (10.159 ms): podman/9150 nanosleep(rqtp: { .tv_sec: 0, .tv_nsec: 10000000 }) = 0
^Croot@number:~# strace -p 9150 -e nanosleep
If we then use the ptrace method to look at that podman process:
root@number:~# strace -p 9150 -e nanosleep
strace: Process 9150 attached
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
nanosleep({tv_sec=0, tv_nsec=10000000}, NULL) = 0
^Cstrace: Process 9150 detached
root@number:~#
With some changes we can get something closer to the strace output,
still in system wide mode:
root@number:~# perf config trace.show_arg_names=false
root@number:~# perf config trace.show_duration=false
root@number:~# perf config trace.show_timestamp=false
root@number:~# perf config trace.show_zeros=true
root@number:~# perf config trace.args_alignment=0
root@number:~# perf trace -e nanosleep --max-events=10
podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/2195174 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
podman/9150 nanosleep({ .tv_sec: 0, .tv_nsec: 10000000 }, NULL) = 0
root@number:~#
root@number:~# perf config
trace.show_arg_names=false
trace.show_duration=false
trace.show_timestamp=false
trace.show_zeros=true
trace.args_alignment=0
root@number:~# cat ~/.perfconfig
# this file is auto-generated.
[trace]
show_arg_names = false
show_duration = false
show_timestamp = false
show_zeros = true
args_alignment = 0
root@number:~#
This will not get reused by any other syscall as nanosleep is the only
one to have as its first argument a 'struct timespec" pointer argument
passed from userspace to the kernel:
root@number:~# grep timespec /sys/kernel/tracing/events/syscalls/sys_enter_*/format | grep offset:16
/sys/kernel/tracing/events/syscalls/sys_enter_nanosleep/format: field:struct __kernel_timespec * rqtp; offset:16; size:8; signed:0;
root@number:~#
BTF based pretty printing will simplify all this, but then lets just get
the low hanging fruits first.
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/Zbq72dJRpOlfRWnf@kernel.org/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240304230815.1440583-5-namhyung@kernel.org
|
|
It's not used anymore and the code is coverted to use a hash map. Now
sym_hist has a static size, so no need to have sizeof_sym_hist in the
struct annotated_source.
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240304230815.1440583-4-namhyung@kernel.org
|
|
Use annotated_source.samples hashmap instead of addr array in the
struct sym_hist.
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240304230815.1440583-3-namhyung@kernel.org
|
|
Now symbol histogram uses an array to save per-offset sample counts.
But it wastes a lot of memory if the symbol has a few samples only.
Add a hashmap to save values only for actual samples.
For now, it has duplicate histogram (one in the existing array and
another in the new hash map). Once it can convert to use the hash
in all places, we can get rid of the array later.
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240304230815.1440583-2-namhyung@kernel.org
|
|
The threads data structure is an array of hashmaps, previously
rbtrees. The two levels allows for a fixed outer array where access is
guarded by rw_semaphores. Commit 91e467bc568f ("perf machine: Use
hashtable for machine threads") sized the outer table at 256 entries
to avoid future scalability problems, however, this means the threads
struct is sized at 30,720 bytes. As the hashmaps allow O(1) access for
the common find/insert/remove operations, lower the number of entries
to 8. This reduces the size overhead to 960 bytes.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-8-irogers@google.com
|
|
The rbtree provides a sorting on entries but this is unused. Switch to
using hashmap for O(1) rather than O(log n) find/insert/remove
complexity.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-7-irogers@google.com
|
|
Move threads out of machine and into its own file.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-6-irogers@google.com
|
|
Move thread_rb_node into the machine.c file. This hides the
implementation of threads from the rest of the code allowing for it to
be refactored.
Locking discipline is tightened up in this change. As the lock is now
encapsulated in threads, the findnew function requires holding it (as
it already did in machine). Rather than do conditionals with locks
based on whether the thread should be created (which could potentially
be error prone with a read lock match with a write unlock), have a
separate threads__find that won't create the thread and only holds the
read lock. This effectively duplicates the findnew logic, with the
existing findnew logic only operating under a write lock assuming
creation is necessary as a previous find failed. The creation may
still fail with the write lock due to another thread. The duplication
is removed in a later next patch that delegates the implementation to
hashtable.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-5-irogers@google.com
|
|
Avoid exposing the threads data structure by switching to the callback
machine__for_each_thread approach. machine__fprintf is only used in
tests and verbose >3 output so don't turn to list and sort. Add
machine__threads_nr to be refactored later.
Note, all existing *_fprintf routines ignore fprintf errors.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-4-irogers@google.com
|
|
Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf trace
--summary output sorts and prints each hash bucket, rather than all
threads globally. Change this behavior by turn all threads into a
list, sort the list by number of trace events then by tids, finally
print the list. This also allows the rbtree in threads to be not
accessed outside of machine.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-3-irogers@google.com
|
|
Commit 91e467bc568f ("perf machine: Use hashtable for machine
threads") made the iteration of thread tids unordered. The perf report
--tasks output now shows child threads in an order determined by the
hashing. For example, in this snippet tid 3 appears after tid 256 even
though they have the same ppid 2:
```
$ perf report --tasks
% pid tid ppid comm
0 0 -1 |swapper
2 2 0 | kthreadd
256 256 2 | kworker/12:1H-k
693761 693761 2 | kworker/10:1-mm
1301762 1301762 2 | kworker/1:1-mm_
1302530 1302530 2 | kworker/u32:0-k
3 3 2 | rcu_gp
...
```
The output is easier to read if threads appear numerically
increasing. To allow for this, read all threads into a list then sort
with a comparator that orders by the child task's of the first common
parent. The list creation and deletion are created as utilities on
machine. The indentation is possible by counting the number of
parents a child has.
With this change the output for the same data file is now like:
```
$ perf report --tasks
% pid tid ppid comm
0 0 -1 |swapper
1 1 0 | systemd
823 823 1 | systemd-journal
853 853 1 | systemd-udevd
3230 3230 1 | systemd-timesyn
3236 3236 1 | auditd
3239 3239 3236 | audisp-syslog
3321 3321 1 | accounts-daemon
...
```
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301053646.1449657-2-irogers@google.com
|
|
The find will get the map, ensure puts are done on all paths.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240229062048.558799-1-irogers@google.com
|