diff options
Diffstat (limited to 'tools')
224 files changed, 8691 insertions, 1465 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf6..c3af3f324c5ad 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index f599064dd8dc8..bdf5f10f8b9f5 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -48,6 +48,24 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, PERF_REG_POWERPC_MMCRA, - PERF_REG_POWERPC_MAX, + /* Extended registers */ + PERF_REG_POWERPC_MMCR0, + PERF_REG_POWERPC_MMCR1, + PERF_REG_POWERPC_MMCR2, + PERF_REG_POWERPC_MMCR3, + PERF_REG_POWERPC_SIER2, + PERF_REG_POWERPC_SIER3, + /* Max regs without the extended regs */ + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, }; + +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) + +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */ +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK) +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */ +#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK) + +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1) +#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h index 0e2eeeb1fd27b..f506cca520b06 100644 --- a/tools/arch/riscv/include/uapi/asm/unistd.h +++ b/tools/arch/riscv/include/uapi/asm/unistd.h @@ -12,7 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * along with this program. If not, see <https://www.gnu.org/licenses/>. */ #ifdef __LP64__ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 02dabc9e77b05..2901d5df4366c 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,6 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +/* free ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -107,6 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +/* free ( 3*32+29) */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -365,7 +367,9 @@ #define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e8370e64a155c..2859ee4f39a83 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -149,6 +149,10 @@ #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 + +#define MSR_IA32_POWER_CTL 0x000001fc +#define MSR_IA32_POWER_CTL_BIT_EE 19 + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -158,7 +162,23 @@ #define LBR_INFO_MISPRED BIT_ULL(63) #define LBR_INFO_IN_TX BIT_ULL(62) #define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60) #define LBR_INFO_CYCLES 0xffff +#define LBR_INFO_BR_TYPE_OFFSET 56 +#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) + +#define MSR_ARCH_LBR_CTL 0x000014ce +#define ARCH_LBR_CTL_LBREN BIT(0) +#define ARCH_LBR_CTL_CPL_OFFSET 1 +#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET) +#define ARCH_LBR_CTL_STACK_OFFSET 3 +#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET) +#define ARCH_LBR_CTL_FILTER_OFFSET 16 +#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET) +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_PEBS_DATA_CFG 0x000003f2 @@ -253,8 +273,6 @@ #define MSR_PEBS_FRONTEND 0x000003f7 -#define MSR_IA32_POWER_CTL 0x000001fc - #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 @@ -418,7 +436,6 @@ #define MSR_AMD64_PATCH_LEVEL 0x0000008b #define MSR_AMD64_TSC_RATIO 0xc0000104 #define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 @@ -427,6 +444,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_BU_CFG2 0xc001102a @@ -466,6 +484,8 @@ #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ +#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a +#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL #define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf new file mode 100644 index 0000000000000..fde6c561512e1 --- /dev/null +++ b/tools/bootconfig/samples/bad-override.bconf @@ -0,0 +1,3 @@ +key.subkey = value +# We can not override pre-defined subkeys with value +key := value diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf new file mode 100644 index 0000000000000..688587cb023c3 --- /dev/null +++ b/tools/bootconfig/samples/bad-override2.bconf @@ -0,0 +1,3 @@ +key = value +# We can not override pre-defined value with subkey +key.subkey := value diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf new file mode 100644 index 0000000000000..7d31d5f8fbd8c --- /dev/null +++ b/tools/bootconfig/samples/good-override.bconf @@ -0,0 +1,6 @@ +# Override the value +key.word = 1,2,4 +key.word := 2,3 + +# No pre-defined key +key.new.word := "new" diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 3c2ab9e757303..56284b98d8f0b 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -117,6 +117,19 @@ xpass grep -q "bar" $OUTFILE xpass grep -q "baz" $OUTFILE xpass grep -q "qux" $OUTFILE +echo "Override same-key values" +cat > $TEMPCONF << EOF +key = bar, baz +key := qux +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xfail grep -q "bar" $OUTFILE +xfail grep -q "baz" $OUTFILE +xpass grep -q "qux" $OUTFILE + echo "Double/single quotes test" echo "key = '\"string\"';" > $TEMPCONF $BOOTCONF -a $TEMPCONF $INITRD diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba1..3b1aad7535dd8 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a236..4d9ecb9758622 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -566,6 +566,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index cb152370fdefd..774f0b0ca28ac 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -8,7 +8,7 @@ endif feature_check = $(eval $(feature_check_code)) define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) endef feature_set = $(eval $(feature_set_code)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 88371f7f0369f..846ee1341a5cc 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -74,8 +74,6 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -CC ?= $(CROSS_COMPILE)gcc -CXX ?= $(CROSS_COMPILE)g++ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config LLVM_CONFIG ?= llvm-config CLANG ?= clang diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py new file mode 100644 index 0000000000000..c4225ed63565a --- /dev/null +++ b/tools/cgroup/memcg_slabinfo.py @@ -0,0 +1,226 @@ +#!/usr/bin/env drgn +# +# Copyright (C) 2020 Roman Gushchin <guro@fb.com> +# Copyright (C) 2020 Facebook + +from os import stat +import argparse +import sys + +from drgn.helpers.linux import list_for_each_entry, list_empty +from drgn.helpers.linux import for_each_page +from drgn.helpers.linux.cpumask import for_each_online_cpu +from drgn.helpers.linux.percpu import per_cpu_ptr +from drgn import container_of, FaultError, Object + + +DESC = """ +This is a drgn script to provide slab statistics for memory cgroups. +It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo +interface of cgroup v1. +For drgn, visit https://github.com/osandov/drgn. +""" + + +MEMCGS = {} + +OO_SHIFT = 16 +OO_MASK = ((1 << OO_SHIFT) - 1) + + +def err(s): + print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True) + sys.exit(1) + + +def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''): + if not list_empty(css.children.address_of_()): + for css in list_for_each_entry('struct cgroup_subsys_state', + css.children.address_of_(), + 'sibling'): + name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8') + memcg = container_of(css, 'struct mem_cgroup', 'css') + MEMCGS[css.cgroup.kn.id.value_()] = memcg + find_memcg_ids(css, name) + + +def is_root_cache(s): + try: + return False if s.memcg_params.root_cache else True + except AttributeError: + return True + + +def cache_name(s): + if is_root_cache(s): + return s.name.string_().decode('utf-8') + else: + return s.memcg_params.root_cache.name.string_().decode('utf-8') + + +# SLUB + +def oo_order(s): + return s.oo.x >> OO_SHIFT + + +def oo_objects(s): + return s.oo.x & OO_MASK + + +def count_partial(n, fn): + nr_pages = 0 + for page in list_for_each_entry('struct page', n.partial.address_of_(), + 'lru'): + nr_pages += fn(page) + return nr_pages + + +def count_free(page): + return page.objects - page.inuse + + +def slub_get_slabinfo(s, cfg): + nr_slabs = 0 + nr_objs = 0 + nr_free = 0 + + for node in range(cfg['nr_nodes']): + n = s.node[node] + nr_slabs += n.nr_slabs.counter.value_() + nr_objs += n.total_objects.counter.value_() + nr_free += count_partial(n, count_free) + + return {'active_objs': nr_objs - nr_free, + 'num_objs': nr_objs, + 'active_slabs': nr_slabs, + 'num_slabs': nr_slabs, + 'objects_per_slab': oo_objects(s), + 'cache_order': oo_order(s), + 'limit': 0, + 'batchcount': 0, + 'shared': 0, + 'shared_avail': 0} + + +def cache_show(s, cfg, objs): + if cfg['allocator'] == 'SLUB': + sinfo = slub_get_slabinfo(s, cfg) + else: + err('SLAB isn\'t supported yet') + + if cfg['shared_slab_pages']: + sinfo['active_objs'] = objs + sinfo['num_objs'] = objs + + print('%-17s %6lu %6lu %6u %4u %4d' + ' : tunables %4u %4u %4u' + ' : slabdata %6lu %6lu %6lu' % ( + cache_name(s), sinfo['active_objs'], sinfo['num_objs'], + s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'], + sinfo['limit'], sinfo['batchcount'], sinfo['shared'], + sinfo['active_slabs'], sinfo['num_slabs'], + sinfo['shared_avail'])) + + +def detect_kernel_config(): + cfg = {} + + cfg['nr_nodes'] = prog['nr_online_nodes'].value_() + + if prog.type('struct kmem_cache').members[1][1] == 'flags': + cfg['allocator'] = 'SLUB' + elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': + cfg['allocator'] = 'SLAB' + else: + err('Can\'t determine the slab allocator') + + cfg['shared_slab_pages'] = False + try: + if prog.type('struct obj_cgroup'): + cfg['shared_slab_pages'] = True + except: + pass + + return cfg + + +def for_each_slab_page(prog): + PGSlab = 1 << prog.constant('PG_slab') + PGHead = 1 << prog.constant('PG_head') + + for page in for_each_page(prog): + try: + if page.flags.value_() & PGSlab: + yield page + except FaultError: + pass + + +def main(): + parser = argparse.ArgumentParser(description=DESC, + formatter_class= + argparse.RawTextHelpFormatter) + parser.add_argument('cgroup', metavar='CGROUP', + help='Target memory cgroup') + args = parser.parse_args() + + try: + cgroup_id = stat(args.cgroup).st_ino + find_memcg_ids() + memcg = MEMCGS[cgroup_id] + except KeyError: + err('Can\'t find the memory cgroup') + + cfg = detect_kernel_config() + + print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>' + ' : tunables <limit> <batchcount> <sharedfactor>' + ' : slabdata <active_slabs> <num_slabs> <sharedavail>') + + if cfg['shared_slab_pages']: + obj_cgroups = set() + stats = {} + caches = {} + + # find memcg pointers belonging to the specified cgroup + obj_cgroups.add(memcg.objcg.value_()) + for ptr in list_for_each_entry('struct obj_cgroup', + memcg.objcg_list.address_of_(), + 'list'): + obj_cgroups.add(ptr.value_()) + + # look over all slab pages, belonging to non-root memcgs + # and look for objects belonging to the given memory cgroup + for page in for_each_slab_page(prog): + objcg_vec_raw = page.obj_cgroups.value_() + if objcg_vec_raw == 0: + continue + cache = page.slab_cache + if not cache: + continue + addr = cache.value_() + caches[addr] = cache + # clear the lowest bit to get the true obj_cgroups + objcg_vec = Object(prog, page.obj_cgroups.type_, + value=objcg_vec_raw & ~1) + + if addr not in stats: + stats[addr] = 0 + + for i in range(oo_objects(cache)): + if objcg_vec[i].value_() in obj_cgroups: + stats[addr] += 1 + + for addr in caches: + if stats[addr] > 0: + cache_show(caches[addr], cfg, stats[addr]) + + else: + for s in list_for_each_entry('struct kmem_cache', + memcg.kmem_caches.address_of_(), + 'memcg_params.kmem_caches_node'): + cache_show(s, cfg, None) + + +main() diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h index 348c6f47e4cc3..af8d0fe1c6ce9 100644 --- a/tools/include/linux/jhash.h +++ b/tools/include/linux/jhash.h @@ -5,7 +5,7 @@ * * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) * - * http://burtleburtle.net/bob/hash/ + * https://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index c8c189a5f0a6b..995b36c2ea7d8 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -850,6 +850,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open) #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) #endif +#define __NR_close_range 436 +__SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 __SYSCALL(__NR_openat2, sys_openat2) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 14b67cd6b54b7..00546062e0235 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -55,7 +55,7 @@ extern "C" { * cause the related events to not be seen. * * I915_RESET_UEVENT - Event is generated just before an attempt to reset the - * the GPU. The value supplied with the event is always 1. NOTE: Disable + * GPU. The value supplied with the event is always 1. NOTE: Disable * reset via module parameter will cause this event to not be seen. */ #define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" @@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id { /** * The value specifies which set of OA unit metrics should be - * be configured, defining the contents of any OA unit reports. + * configured, defining the contents of any OA unit reports. * * This property is available in perf revision 1. */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9dbe..0480f893facd2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 8533bf07450f0..3d0d8231dc196 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -123,6 +123,7 @@ struct in_addr { #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 #define IP_RECVFRAGSIZE 25 +#define IP_RECVERR_RFC4884 26 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 21a1edd08cbe0..077e7ee69e3d8 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -383,7 +383,8 @@ struct perf_event_attr { bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ - __reserved_1 : 31; + text_poke : 1, /* include text poke events */ + __reserved_1 : 30; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1041,12 +1042,35 @@ enum perf_event_type { */ PERF_RECORD_CGROUP = 19, + /* + * Records changes to kernel text i.e. self-modified code. 'old_len' is + * the number of old bytes, 'new_len' is the number of new bytes. Either + * 'old_len' or 'new_len' may be zero to indicate, for example, the + * addition or removal of a trampoline. 'bytes' contains the old bytes + * followed immediately by the new bytes. + * + * struct { + * struct perf_event_header header; + * u64 addr; + * u16 old_len; + * u16 new_len; + * u8 bytes[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_TEXT_POKE = 20, + PERF_RECORD_MAX, /* non-ABI */ }; enum perf_record_ksymbol_type { PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0, PERF_RECORD_KSYMBOL_TYPE_BPF = 1, + /* + * Out of line code such as kprobe-replaced instructions or optimized + * kprobes or ftrace trampolines. + */ + PERF_RECORD_KSYMBOL_TYPE_OOL = 2, PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */ }; diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 58d44d5eee31d..5e6cb9debe37e 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -8,6 +8,7 @@ #include <poll.h> #include <stdlib.h> #include <unistd.h> +#include <string.h> void fdarray__init(struct fdarray *fda, int nr_autogrow) { @@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow) int fdarray__grow(struct fdarray *fda, int nr) { - void *priv; + struct priv *priv; int nr_alloc = fda->nr_alloc + nr; size_t psize = sizeof(fda->priv[0]) * nr_alloc; size_t size = sizeof(struct pollfd) * nr_alloc; @@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr) return -ENOMEM; } + memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr); + memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr); + fda->nr_alloc = nr_alloc; fda->entries = entries; fda->priv = priv; @@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda) free(fda); } -int fdarray__add(struct fdarray *fda, int fd, short revents) +int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags) { int pos = fda->nr; @@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents) fda->entries[fda->nr].fd = fd; fda->entries[fda->nr].events = revents; + fda->priv[fda->nr].flags = flags; fda->nr++; return pos; } @@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents, return 0; for (fd = 0; fd < fda->nr; ++fd) { + if (!fda->entries[fd].events) + continue; + if (fda->entries[fd].revents & revents) { if (entry_destructor) entry_destructor(fda, fd, arg); + fda->entries[fd].revents = fda->entries[fd].events = 0; continue; } - if (fd != nr) { - fda->entries[nr] = fda->entries[fd]; - fda->priv[nr] = fda->priv[fd]; - } - - ++nr; + if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable)) + ++nr; } - return fda->nr = nr; + return nr; } int fdarray__poll(struct fdarray *fda, int timeout) diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index b39557d1a88f9..7fcf21a33c0c6 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -21,19 +21,27 @@ struct fdarray { int nr_alloc; int nr_autogrow; struct pollfd *entries; - union { - int idx; - void *ptr; + struct priv { + union { + int idx; + void *ptr; + }; + unsigned int flags; } *priv; }; +enum fdarray_flags { + fdarray_flag__default = 0x00000000, + fdarray_flag__nonfilterable = 0x00000001 +}; + void fdarray__init(struct fdarray *fda, int nr_autogrow); void fdarray__exit(struct fdarray *fda); struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); void fdarray__delete(struct fdarray *fda); -int fdarray__add(struct fdarray *fda, int fd, short revents); +int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d0..0750681057c28 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f43..015d13f25fcc4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a045637..4843e44916f7c 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -564,8 +564,8 @@ done: struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29ca..0a06124f7999a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc2..5ecb4069a9f02 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 6a875a0f01bb0..2208444ecb448 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -305,9 +305,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) } int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent) + void *ptr, short revent, enum fdarray_flags flags) { - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags); if (pos >= 0) { evlist->pollfd.priv[pos].ptr = ptr; @@ -488,7 +488,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, revent = !overwrite ? POLLIN : 0; if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { + perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) { perf_mmap__put(map); return -1; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 74dc8c3f0b667..2d0fa02b036f6 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops { int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent); + void *ptr, short revent, enum fdarray_flags flags); int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 69b44d2cc0f50..842028858d66e 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -111,6 +111,14 @@ struct perf_record_cgroup { char path[PATH_MAX]; }; +struct perf_record_text_poke_event { + struct perf_event_header header; + __u64 addr; + __u16 old_len; + __u16 new_len; + __u8 bytes[]; +}; + struct perf_record_sample { struct perf_event_header header; __u64 array[]; @@ -367,6 +375,7 @@ union perf_event { struct perf_record_sample sample; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; + struct perf_record_text_poke_event text_poke; struct perf_record_header_attr attr; struct perf_record_event_update event_update; struct perf_record_header_event_type event_type; diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 06ac7bd2144b7..727396de6be54 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -13,7 +13,7 @@ #include <linux/export.h> /* - * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree * * 1) A node is either red or black * 2) The root is black diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt index 596032ade31fd..4d6394397d92e 100644 --- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt +++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt @@ -3,7 +3,7 @@ libtraceevent(3) NAME ---- -tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins. +tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins. SYNOPSIS -------- @@ -13,6 +13,12 @@ SYNOPSIS struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_); void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_); +void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_, + void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep, + const char pass:[*]path, + const char pass:[*]name, + void pass:[*]data), + void pass:[*]_data_); -- DESCRIPTION @@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context. The plugin directories are : [verse] -- + - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST - System's plugin directory, defined at the library compile time. It depends on the library installation prefix and usually is _(install_preffix)/lib/traceevent/plugins_ - Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_ - User's plugin directory, located at _~/.local/lib/traceevent/plugins_ + - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST -- Loading of plugins can be controlled by the _tep_flags_, using the _tep_set_flag()_ API: @@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The _plugin_list_ is the list of loaded plugins, returned by the _tep_load_plugins()_ function. +The _tep_load_plugins_hook_ function walks through all directories with plugins +and calls user specified _load_plugin()_ hook for each plugin file. Only files +with given _suffix_ are considered to be plugins. The _data_ is a user specified +context, passed to _load_plugin()_. Directories and the walk order are the same +as in _tep_load_plugins()_ API. + RETURN VALUE ------------ The _tep_load_plugins()_ function returns a list of successfully loaded plugins, @@ -63,6 +77,15 @@ if (plugins == NULL) { } ... tep_unload_plugins(plugins, tep); +... +void print_plugin(struct tep_handle *tep, const char *path, + const char *name, void *data) +{ + pritnf("Found libtraceevent plugin %s/%s\n", path, name); +} +... +tep_load_plugins_hook(tep, ".so", print_plugin, NULL); +... -- FILES diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index cee469803a34a..d805a920af6f6 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -13,6 +13,7 @@ struct func_map; struct func_list; struct event_handler; struct func_resolver; +struct tep_plugins_dir; struct tep_handle { int ref_count; @@ -47,7 +48,6 @@ struct tep_handle { struct printk_list *printklist; unsigned int printk_count; - struct tep_event **events; int nr_events; struct tep_event **sort_events; @@ -81,10 +81,30 @@ struct tep_handle { /* cache */ struct tep_event *last_event; + + struct tep_plugins_dir *plugins_dir; +}; + +enum tep_print_parse_type { + PRINT_FMT_STRING, + PRINT_FMT_ARG_DIGIT, + PRINT_FMT_ARG_POINTER, + PRINT_FMT_ARG_STRING, +}; + +struct tep_print_parse { + struct tep_print_parse *next; + + char *format; + int ls; + enum tep_print_parse_type type; + struct tep_print_arg *arg; + struct tep_print_arg *len_as_arg; }; void tep_free_event(struct tep_event *event); void tep_free_format_field(struct tep_format_field *field); +void tep_free_plugin_paths(struct tep_handle *tep); unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data); unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ba4f33804af15..3ba566de821c6 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4565,43 +4565,93 @@ get_bprint_format(void *data, int size __maybe_unused, return format; } -static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, - struct tep_event *event, struct tep_print_arg *arg) +static int print_mac_arg(struct trace_seq *s, const char *format, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) { - unsigned char *buf; const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; + bool reverse = false; + unsigned char *buf; + int ret = 0; if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); - return; + return 0; } if (arg->type != TEP_PRINT_FIELD) { trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return; + return 0; } - if (mac == 'm') + if (format[0] == 'm') { fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; + } else if (format[0] == 'M' && format[1] == 'F') { + fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x"; + ret++; + } + if (format[1] == 'R') { + reverse = true; + ret++; + } + if (!arg->field.field) { arg->field.field = tep_find_any_field(event, arg->field.name); if (!arg->field.field) { do_warning_event(event, "%s: field %s not found", __func__, arg->field.name); - return; + return ret; } } if (arg->field.field->size != 6) { trace_seq_printf(s, "INVALIDMAC"); - return; + return ret; } + buf = data + arg->field.field->offset; - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + if (reverse) + trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]); + else + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); + + return ret; } -static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +static int parse_ip4_print_args(struct tep_handle *tep, + const char *ptr, bool *reverse) +{ + int ret = 0; + + *reverse = false; + + /* hnbl */ + switch (*ptr) { + case 'h': + if (tep->file_bigendian) + *reverse = false; + else + *reverse = true; + ret++; + break; + case 'l': + *reverse = true; + ret++; + break; + case 'n': + case 'b': + ret++; + /* fall through */ + default: + *reverse = false; + break; + } + + return ret; +} + +static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf) { const char *fmt; @@ -4610,7 +4660,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) else fmt = "%d.%d.%d.%d"; - trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); + if (reverse) + trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]); + else + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); + } static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) @@ -4693,7 +4747,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) if (useIPv4) { if (needcolon) trace_seq_printf(s, ":"); - print_ip4_addr(s, 'I', &in6.s6_addr[12]); + print_ip4_addr(s, 'I', false, &in6.s6_addr[12]); } return; @@ -4722,16 +4776,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { + bool reverse = false; unsigned char *buf; + int ret; + + ret = parse_ip4_print_args(event->tep, ptr, &reverse); if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); - return 0; + return ret; } if (arg->type != TEP_PRINT_FIELD) { trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); - return 0; + return ret; } if (!arg->field.field) { @@ -4740,7 +4798,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, if (!arg->field.field) { do_warning("%s: field %s not found", __func__, arg->field.name); - return 0; + return ret; } } @@ -4748,11 +4806,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, if (arg->field.field->size != 4) { trace_seq_printf(s, "INVALIDIPv4"); - return 0; + return ret; } - print_ip4_addr(s, i, buf); - return 0; + print_ip4_addr(s, i, reverse, buf); + return ret; + } static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, @@ -4812,7 +4871,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, char have_c = 0, have_p = 0; unsigned char *buf; struct sockaddr_storage *sa; + bool reverse = false; int rc = 0; + int ret; /* pISpc */ if (i == 'I') { @@ -4827,6 +4888,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, rc++; } } + ret = parse_ip4_print_args(event->tep, ptr, &reverse); + ptr += ret; + rc += ret; if (arg->type == TEP_PRINT_FUNC) { process_defined_func(s, data, size, event, arg); @@ -4858,7 +4922,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, return rc; } - print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr); if (have_p) trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); @@ -4892,25 +4956,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr, struct tep_print_arg *arg) { char i = *ptr; /* 'i' or 'I' */ - char ver; - int rc = 0; + int rc = 1; + /* IP version */ ptr++; - rc++; - ver = *ptr; - ptr++; - rc++; - - switch (ver) { + switch (*ptr) { case '4': - rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg); break; case '6': - rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg); break; case 'S': - rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg); break; default: return 0; @@ -4919,6 +4978,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr, return rc; } +static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; +static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + +static int print_uuid_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) +{ + const int *index = uuid_index; + char *format = "%02x"; + int ret = 0; + char *buf; + int i; + + switch (*(ptr + 1)) { + case 'L': + format = "%02X"; + /* fall through */ + case 'l': + index = guid_index; + ret++; + break; + case 'B': + format = "%02X"; + /* fall through */ + case 'b': + ret++; + break; + } + + if (arg->type == TEP_PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return ret; + } + + if (arg->type != TEP_PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return ret; + } + + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return ret; + } + } + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDUUID"); + return ret; + } + + buf = data + arg->field.field->offset; + + for (i = 0; i < 16; i++) { + trace_seq_printf(s, format, buf[index[i]] & 0xff); + switch (i) { + case 3: + case 5: + case 7: + case 9: + trace_seq_printf(s, "-"); + break; + } + } + + return ret; +} + +static int print_raw_buff_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg, int print_len) +{ + int plen = print_len; + char *delim = " "; + int ret = 0; + char *buf; + int i; + unsigned long offset; + int arr_len; + + switch (*(ptr + 1)) { + case 'C': + delim = ":"; + ret++; + break; + case 'D': + delim = "-"; + ret++; + break; + case 'N': + delim = ""; + ret++; + break; + } + + if (arg->type == TEP_PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return ret; + } + + if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return ret; + } + + offset = tep_read_number(event->tep, + data + arg->dynarray.field->offset, + arg->dynarray.field->size); + arr_len = (unsigned long long)(offset >> 16); + buf = data + (offset & 0xffff); + + if (arr_len < plen) + plen = arr_len; + + if (plen < 1) + return ret; + + trace_seq_printf(s, "%02x", buf[0] & 0xff); + for (i = 1; i < plen; i++) + trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff); + + return ret; +} + static int is_printable_array(char *p, unsigned int len) { unsigned int i; @@ -5007,264 +5193,567 @@ void tep_print_fields(struct trace_seq *s, void *data, } } -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) +static int print_function(struct trace_seq *s, const char *format, + void *data, int size, struct tep_event *event, + struct tep_print_arg *arg) { - struct tep_handle *tep = event->tep; - struct tep_print_fmt *print_fmt = &event->print_fmt; - struct tep_print_arg *arg = print_fmt->args; - struct tep_print_arg *args = NULL; - const char *ptr = print_fmt->format; - unsigned long long val; struct func_map *func; - const char *saveptr; - struct trace_seq p; - char *bprint_fmt = NULL; - char format[32]; - int show_func; - int len_as_arg; - int len_arg = 0; - int len; - int ls; + unsigned long long val; - if (event->flags & TEP_EVENT_FL_FAILED) { - trace_seq_printf(s, "[FAILED TO PARSE]"); - tep_print_fields(s, data, size, event); - return; + val = eval_num_arg(data, size, event, arg); + func = find_func(event->tep, val); + if (func) { + trace_seq_puts(s, func->func); + if (*format == 'F' || *format == 'S') + trace_seq_printf(s, "+0x%llx", val - func->addr); + } else { + if (event->tep->long_size == 4) + trace_seq_printf(s, "0x%lx", (long)val); + else + trace_seq_printf(s, "0x%llx", (long long)val); } - if (event->flags & TEP_EVENT_FL_ISBPRINT) { - bprint_fmt = get_bprint_format(data, size, event); - args = make_bprint_args(bprint_fmt, data, size, event); - arg = args; - ptr = bprint_fmt; + return 0; +} + +static int print_arg_pointer(struct trace_seq *s, const char *format, int plen, + void *data, int size, + struct tep_event *event, struct tep_print_arg *arg) +{ + unsigned long long val; + int ret = 1; + + if (arg->type == TEP_PRINT_BSTRING) { + trace_seq_puts(s, arg->string.string); + return 0; + } + while (*format) { + if (*format == 'p') { + format++; + break; + } + format++; } - for (; *ptr; ptr++) { - ls = 0; - if (*ptr == '\\') { - ptr++; - switch (*ptr) { + switch (*format) { + case 'F': + case 'f': + case 'S': + case 's': + ret += print_function(s, format, data, size, event, arg); + break; + case 'M': + case 'm': + ret += print_mac_arg(s, format, data, size, event, arg); + break; + case 'I': + case 'i': + ret += print_ip_arg(s, format, data, size, event, arg); + break; + case 'U': + ret += print_uuid_arg(s, format, data, size, event, arg); + break; + case 'h': + ret += print_raw_buff_arg(s, format, data, size, event, arg, plen); + break; + default: + ret = 0; + val = eval_num_arg(data, size, event, arg); + trace_seq_printf(s, "%p", (void *)val); + break; + } + + return ret; + +} + +static int print_arg_number(struct trace_seq *s, const char *format, int plen, + void *data, int size, int ls, + struct tep_event *event, struct tep_print_arg *arg) +{ + unsigned long long val; + + val = eval_num_arg(data, size, event, arg); + + switch (ls) { + case -2: + if (plen >= 0) + trace_seq_printf(s, format, plen, (char)val); + else + trace_seq_printf(s, format, (char)val); + break; + case -1: + if (plen >= 0) + trace_seq_printf(s, format, plen, (short)val); + else + trace_seq_printf(s, format, (short)val); + break; + case 0: + if (plen >= 0) + trace_seq_printf(s, format, plen, (int)val); + else + trace_seq_printf(s, format, (int)val); + break; + case 1: + if (plen >= 0) + trace_seq_printf(s, format, plen, (long)val); + else + trace_seq_printf(s, format, (long)val); + break; + case 2: + if (plen >= 0) + trace_seq_printf(s, format, plen, (long long)val); + else + trace_seq_printf(s, format, (long long)val); + break; + default: + do_warning_event(event, "bad count (%d)", ls); + event->flags |= TEP_EVENT_FL_FAILED; + } + return 0; +} + + +static void print_arg_string(struct trace_seq *s, const char *format, int plen, + void *data, int size, + struct tep_event *event, struct tep_print_arg *arg) +{ + struct trace_seq p; + + /* Use helper trace_seq */ + trace_seq_init(&p); + print_str_arg(&p, data, size, event, + format, plen, arg); + trace_seq_terminate(&p); + trace_seq_puts(s, p.buffer); + trace_seq_destroy(&p); +} + +static int parse_arg_format_pointer(const char *format) +{ + int ret = 0; + int index; + int loop; + + switch (*format) { + case 'F': + case 'S': + case 'f': + case 's': + ret++; + break; + case 'M': + case 'm': + /* [mM]R , [mM]F */ + switch (format[1]) { + case 'R': + case 'F': + ret++; + break; + } + ret++; + break; + case 'I': + case 'i': + index = 2; + loop = 1; + switch (format[1]) { + case 'S': + /*[S][pfs]*/ + while (loop) { + switch (format[index]) { + case 'p': + case 'f': + case 's': + ret++; + index++; + break; + default: + loop = 0; + break; + } + } + /* fall through */ + case '4': + /* [4S][hnbl] */ + switch (format[index]) { + case 'h': case 'n': - trace_seq_putc(s, '\n'); - break; - case 't': - trace_seq_putc(s, '\t'); - break; - case 'r': - trace_seq_putc(s, '\r'); - break; - case '\\': - trace_seq_putc(s, '\\'); + case 'l': + case 'b': + ret++; + index++; break; - default: - trace_seq_putc(s, *ptr); + } + if (format[1] == '4') { + ret++; break; } + /* fall through */ + case '6': + /* [6S]c */ + if (format[index] == 'c') + ret++; + ret++; + break; + } + ret++; + break; + case 'U': + switch (format[1]) { + case 'L': + case 'l': + case 'B': + case 'b': + ret++; + break; + } + ret++; + break; + case 'h': + switch (format[1]) { + case 'C': + case 'D': + case 'N': + ret++; + break; + } + ret++; + break; + default: + break; + } - } else if (*ptr == '%') { - saveptr = ptr; - show_func = 0; - len_as_arg = 0; - cont_process: - ptr++; - switch (*ptr) { - case '%': - trace_seq_putc(s, '%'); - break; - case '#': - /* FIXME: need to handle properly */ - goto cont_process; - case 'h': - ls--; - goto cont_process; - case 'l': - ls++; - goto cont_process; - case 'L': - ls = 2; - goto cont_process; - case '*': - /* The argument is the length. */ - if (!arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } - len_arg = eval_num_arg(data, size, event, arg); - len_as_arg = 1; - arg = arg->next; - goto cont_process; - case '.': - case 'z': - case 'Z': - case '0' ... '9': - case '-': - goto cont_process; - case 'p': - if (tep->long_size == 4) - ls = 1; - else - ls = 2; + return ret; +} - if (isalnum(ptr[1])) - ptr++; +static void free_parse_args(struct tep_print_parse *arg) +{ + struct tep_print_parse *del; - if (arg->type == TEP_PRINT_BSTRING) { - trace_seq_puts(s, arg->string.string); - arg = arg->next; - break; - } + while (arg) { + del = arg; + arg = del->next; + free(del->format); + free(del); + } +} - if (*ptr == 'F' || *ptr == 'f' || - *ptr == 'S' || *ptr == 's') { - show_func = *ptr; - } else if (*ptr == 'M' || *ptr == 'm') { - print_mac_arg(s, *ptr, data, size, event, arg); - arg = arg->next; - break; - } else if (*ptr == 'I' || *ptr == 'i') { - int n; +static int parse_arg_add(struct tep_print_parse **parse, char *format, + enum tep_print_parse_type type, + struct tep_print_arg *arg, + struct tep_print_arg *len_as_arg, + int ls) +{ + struct tep_print_parse *parg = NULL; - n = print_ip_arg(s, ptr, data, size, event, arg); - if (n > 0) { - ptr += n - 1; - arg = arg->next; - break; - } - } + parg = calloc(1, sizeof(*parg)); + if (!parg) + goto error; + parg->format = strdup(format); + if (!parg->format) + goto error; + parg->type = type; + parg->arg = arg; + parg->len_as_arg = len_as_arg; + parg->ls = ls; + *parse = parg; + return 0; +error: + if (parg) { + free(parg->format); + free(parg); + } + return -1; +} - /* fall through */ - case 'd': - case 'u': - case 'i': - case 'x': - case 'X': - case 'o': - if (!arg) { - do_warning_event(event, "no argument match"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } +static int parse_arg_format(struct tep_print_parse **parse, + struct tep_event *event, + const char *format, struct tep_print_arg **arg) +{ + struct tep_print_arg *len_arg = NULL; + char print_format[32]; + const char *start = format; + int ret = 0; + int ls = 0; + int res; + int len; - len = ((unsigned long)ptr + 1) - - (unsigned long)saveptr; + format++; + ret++; + for (; *format; format++) { + switch (*format) { + case '#': + /* FIXME: need to handle properly */ + break; + case 'h': + ls--; + break; + case 'l': + ls++; + break; + case 'L': + ls = 2; + break; + case '.': + case 'z': + case 'Z': + case '0' ... '9': + case '-': + break; + case '*': + /* The argument is the length. */ + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + if (len_arg) { + do_warning_event(event, "argument already matched"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + len_arg = *arg; + *arg = (*arg)->next; + break; + case 'p': + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } + res = parse_arg_format_pointer(format + 1); + if (res > 0) { + format += res; + ret += res; + } + len = ((unsigned long)format + 1) - + (unsigned long)start; + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + memcpy(print_format, start, len); + print_format[len] = 0; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_POINTER, *arg, len_arg, ls); + *arg = (*arg)->next; + ret++; + return ret; + case 'd': + case 'u': + case 'i': + case 'x': + case 'X': + case 'o': + if (!*arg) { + do_warning_event(event, "no argument match"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } - memcpy(format, saveptr, len); - format[len] = 0; + len = ((unsigned long)format + 1) - + (unsigned long)start; - val = eval_num_arg(data, size, event, arg); - arg = arg->next; + /* should never happen */ + if (len > 30) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + memcpy(print_format, start, len); + print_format[len] = 0; - if (show_func) { - func = find_func(tep, val); - if (func) { - trace_seq_puts(s, func->func); - if (show_func == 'F') - trace_seq_printf(s, - "+0x%llx", - val - func->addr); - break; - } - } - if (tep->long_size == 8 && ls == 1 && - sizeof(long) != 8) { - char *p; - - /* make %l into %ll */ - if (ls == 1 && (p = strchr(format, 'l'))) - memmove(p+1, p, strlen(p)+1); - else if (strcmp(format, "%p") == 0) - strcpy(format, "0x%llx"); - ls = 2; - } - switch (ls) { - case -2: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (char)val); - else - trace_seq_printf(s, format, (char)val); - break; - case -1: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (short)val); - else - trace_seq_printf(s, format, (short)val); - break; - case 0: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (int)val); - else - trace_seq_printf(s, format, (int)val); - break; - case 1: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, (long)val); - else - trace_seq_printf(s, format, (long)val); - break; - case 2: - if (len_as_arg) - trace_seq_printf(s, format, len_arg, - (long long)val); - else - trace_seq_printf(s, format, (long long)val); - break; - default: - do_warning_event(event, "bad count (%d)", ls); - event->flags |= TEP_EVENT_FL_FAILED; - } - break; - case 's': - if (!arg) { - do_warning_event(event, "no matching argument"); - event->flags |= TEP_EVENT_FL_FAILED; - goto out_failed; - } + if (event->tep->long_size == 8 && ls == 1 && + sizeof(long) != 8) { + char *p; + + /* make %l into %ll */ + if (ls == 1 && (p = strchr(print_format, 'l'))) + memmove(p+1, p, strlen(p)+1); + ls = 2; + } + if (ls < -2 || ls > 2) { + do_warning_event(event, "bad count (%d)", ls); + event->flags |= TEP_EVENT_FL_FAILED; + } + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls); + *arg = (*arg)->next; + ret++; + return ret; + case 's': + if (!*arg) { + do_warning_event(event, "no matching argument"); + event->flags |= TEP_EVENT_FL_FAILED; + goto out_failed; + } - len = ((unsigned long)ptr + 1) - - (unsigned long)saveptr; + len = ((unsigned long)format + 1) - + (unsigned long)start; - /* should never happen */ - if (len > 31) { - do_warning_event(event, "bad format!"); - event->flags |= TEP_EVENT_FL_FAILED; - len = 31; - } + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= TEP_EVENT_FL_FAILED; + len = 31; + } + + memcpy(print_format, start, len); + print_format[len] = 0; + + parse_arg_add(parse, print_format, + PRINT_FMT_ARG_STRING, *arg, len_arg, 0); + *arg = (*arg)->next; + ret++; + return ret; + default: + snprintf(print_format, 32, ">%c<", *format); + parse_arg_add(parse, print_format, + PRINT_FMT_STRING, NULL, NULL, 0); + ret++; + return ret; + } + ret++; + } + +out_failed: + return ret; - memcpy(format, saveptr, len); - format[len] = 0; - if (!len_as_arg) - len_arg = -1; - /* Use helper trace_seq */ - trace_seq_init(&p); - print_str_arg(&p, data, size, event, - format, len_arg, arg); - trace_seq_terminate(&p); - trace_seq_puts(s, p.buffer); - trace_seq_destroy(&p); - arg = arg->next; +} + +static int parse_arg_string(struct tep_print_parse **parse, const char *format) +{ + struct trace_seq s; + int ret = 0; + + trace_seq_init(&s); + for (; *format; format++) { + if (*format == '\\') { + format++; + ret++; + switch (*format) { + case 'n': + trace_seq_putc(&s, '\n'); + break; + case 't': + trace_seq_putc(&s, '\t'); + break; + case 'r': + trace_seq_putc(&s, '\r'); + break; + case '\\': + trace_seq_putc(&s, '\\'); break; default: - trace_seq_printf(s, ">%c<", *ptr); - + trace_seq_putc(&s, *format); + break; } + } else if (*format == '%') { + if (*(format + 1) == '%') { + trace_seq_putc(&s, '%'); + format++; + ret++; + } else + break; } else - trace_seq_putc(s, *ptr); + trace_seq_putc(&s, *format); + + ret++; + } + trace_seq_terminate(&s); + parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0); + trace_seq_destroy(&s); + + return ret; +} + +static struct tep_print_parse * +parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg) +{ + struct tep_print_parse *parse_ret = NULL; + struct tep_print_parse **parse = NULL; + int ret; + int len; + + len = strlen(format); + while (*format) { + if (!parse_ret) + parse = &parse_ret; + if (*format == '%' && *(format + 1) != '%') + ret = parse_arg_format(parse, event, format, &arg); + else + ret = parse_arg_string(parse, format); + if (*parse) + parse = &((*parse)->next); + + len -= ret; + if (len > 0) + format += ret; + else + break; + } + return parse_ret; +} + +static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s, + void *data, int size, struct tep_event *event) +{ + int len_arg; + + while (parse) { + if (parse->len_as_arg) + len_arg = eval_num_arg(data, size, event, parse->len_as_arg); + switch (parse->type) { + case PRINT_FMT_ARG_DIGIT: + print_arg_number(s, parse->format, + parse->len_as_arg ? len_arg : -1, data, + size, parse->ls, event, parse->arg); + break; + case PRINT_FMT_ARG_POINTER: + print_arg_pointer(s, parse->format, + parse->len_as_arg ? len_arg : 1, + data, size, event, parse->arg); + break; + case PRINT_FMT_ARG_STRING: + print_arg_string(s, parse->format, + parse->len_as_arg ? len_arg : -1, + data, size, event, parse->arg); + break; + case PRINT_FMT_STRING: + default: + trace_seq_printf(s, "%s", parse->format); + break; + } + parse = parse->next; } +} + +static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) +{ + struct tep_print_parse *parse = event->print_fmt.print_cache; + struct tep_print_arg *args = NULL; + char *bprint_fmt = NULL; if (event->flags & TEP_EVENT_FL_FAILED) { -out_failed: trace_seq_printf(s, "[FAILED TO PARSE]"); + tep_print_fields(s, data, size, event); + return; } - if (args) { + if (event->flags & TEP_EVENT_FL_ISBPRINT) { + bprint_fmt = get_bprint_format(data, size, event); + args = make_bprint_args(bprint_fmt, data, size, event); + parse = parse_args(event, bprint_fmt, args); + } + + print_event_cache(parse, s, data, size, event); + + if (event->flags & TEP_EVENT_FL_ISBPRINT) { + free_parse_args(parse); free_args(args); free(bprint_fmt); } @@ -6363,9 +6852,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp, *list = arg; list = &arg->next; } - return 0; } + if (!(event->flags & TEP_EVENT_FL_ISBPRINT)) + event->print_fmt.print_cache = parse_args(event, + event->print_fmt.format, + event->print_fmt.args); + return 0; event_parse_failed: @@ -7032,7 +7525,7 @@ void tep_free_event(struct tep_event *event) free(event->print_fmt.format); free_args(event->print_fmt.args); - + free_parse_args(event->print_fmt.print_cache); free(event); } @@ -7120,6 +7613,7 @@ void tep_free(struct tep_handle *tep) free(tep->events); free(tep->sort_events); free(tep->func_resolver); + tep_free_plugin_paths(tep); free(tep); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index b77837f75a0d7..c29b693e31eee 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -1,21 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #ifndef _PARSE_EVENTS_H #define _PARSE_EVENTS_H @@ -272,9 +258,12 @@ struct tep_print_arg { }; }; +struct tep_print_parse; + struct tep_print_fmt { char *format; struct tep_print_arg *args; + struct tep_print_parse *print_cache; }; struct tep_event { @@ -379,7 +368,7 @@ enum tep_errno { * errno since SUS requires the errno has distinct positive values. * See 'Issue 6' in the link below. * - * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html */ __TEP_ERRNO__START = -100000, @@ -393,14 +382,29 @@ struct tep_plugin_list; #define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) +enum tep_plugin_load_priority { + TEP_PLUGIN_FIRST, + TEP_PLUGIN_LAST, +}; + +int tep_add_plugin_path(struct tep_handle *tep, char *path, + enum tep_plugin_load_priority prio); struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); void tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep); +void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, + void (*load_plugin)(struct tep_handle *tep, + const char *path, + const char *name, + void *data), + void *data); char **tep_plugin_list_options(void); void tep_plugin_free_options_list(char **list); int tep_plugin_add_options(const char *name, struct tep_plugin_option *options); +int tep_plugin_add_option(const char *name, const char *val); void tep_plugin_remove_options(struct tep_plugin_option *options); +void tep_plugin_print_options(struct trace_seq *s); void tep_print_plugins(struct trace_seq *s, const char *prefix, const char *suffix, const struct tep_plugin_list *list); diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index e1f7ddd5a6cf0..e7c2acb8680fc 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -13,6 +13,7 @@ #include <sys/stat.h> #include <unistd.h> #include <dirent.h> +#include <errno.h> #include "event-parse.h" #include "event-parse-local.h" #include "event-utils.h" @@ -38,6 +39,12 @@ struct tep_plugin_list { void *handle; }; +struct tep_plugins_dir { + struct tep_plugins_dir *next; + char *path; + enum tep_plugin_load_priority prio; +}; + static void lower_case(char *str) { if (!str) @@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options) } } +static int parse_option_name(char **option, char **plugin) +{ + char *p; + + *plugin = NULL; + + if ((p = strstr(*option, ":"))) { + *plugin = *option; + *p = '\0'; + *option = strdup(p + 1); + if (!*option) + return -1; + } + return 0; +} + +static struct tep_plugin_option * +find_registered_option(const char *plugin, const char *option) +{ + struct registered_plugin_options *reg; + struct tep_plugin_option *op; + const char *op_plugin; + + for (reg = registered_options; reg; reg = reg->next) { + for (op = reg->options; op->name; op++) { + if (op->plugin_alias) + op_plugin = op->plugin_alias; + else + op_plugin = op->file; + + if (plugin && strcmp(plugin, op_plugin) != 0) + continue; + if (strcmp(option, op->name) != 0) + continue; + + return op; + } + } + + return NULL; +} + +static int process_option(const char *plugin, const char *option, const char *val) +{ + struct tep_plugin_option *op; + + op = find_registered_option(plugin, option); + if (!op) + return 0; + + return update_option_value(op, val); +} + +/** + * tep_plugin_add_option - add an option/val pair to set plugin options + * @name: The name of the option (format: <plugin>:<option> or just <option>) + * @val: (optional) the value for the option + * + * Modify a plugin option. If @val is given than the value of the option + * is set (note, some options just take a boolean, so @val must be either + * "1" or "0" or "true" or "false"). + */ +int tep_plugin_add_option(const char *name, const char *val) +{ + struct trace_plugin_options *op; + char *option_str; + char *plugin; + + option_str = strdup(name); + if (!option_str) + return -ENOMEM; + + if (parse_option_name(&option_str, &plugin) < 0) + return -ENOMEM; + + /* If the option exists, update the val */ + for (op = trace_plugin_options; op; op = op->next) { + /* Both must be NULL or not NULL */ + if ((!plugin || !op->plugin) && plugin != op->plugin) + continue; + if (plugin && strcmp(plugin, op->plugin) != 0) + continue; + if (strcmp(op->option, option_str) != 0) + continue; + + /* update option */ + free(op->value); + if (val) { + op->value = strdup(val); + if (!op->value) + goto out_free; + } else + op->value = NULL; + + /* plugin and option_str don't get freed at the end */ + free(plugin); + free(option_str); + + plugin = op->plugin; + option_str = op->option; + break; + } + + /* If not found, create */ + if (!op) { + op = malloc(sizeof(*op)); + if (!op) + goto out_free; + memset(op, 0, sizeof(*op)); + op->plugin = plugin; + op->option = option_str; + if (val) { + op->value = strdup(val); + if (!op->value) { + free(op); + goto out_free; + } + } + op->next = trace_plugin_options; + trace_plugin_options = op; + } + + return process_option(plugin, option_str, val); + +out_free: + free(plugin); + free(option_str); + return -ENOMEM; +} + +static void print_op_data(struct trace_seq *s, const char *name, + const char *op) +{ + if (op) + trace_seq_printf(s, "%8s:\t%s\n", name, op); +} + +/** + * tep_plugin_print_options - print out the registered plugin options + * @s: The trace_seq descriptor to write the plugin options into + * + * Writes a list of options into trace_seq @s. + */ +void tep_plugin_print_options(struct trace_seq *s) +{ + struct registered_plugin_options *reg; + struct tep_plugin_option *op; + + for (reg = registered_options; reg; reg = reg->next) { + if (reg != registered_options) + trace_seq_printf(s, "============\n"); + for (op = reg->options; op->name; op++) { + if (op != reg->options) + trace_seq_printf(s, "------------\n"); + print_op_data(s, "file", op->file); + print_op_data(s, "plugin", op->plugin_alias); + print_op_data(s, "option", op->name); + print_op_data(s, "desc", op->description); + print_op_data(s, "value", op->value); + trace_seq_printf(s, "%8s:\t%d\n", "set", op->set); + } + } +} + /** * tep_print_plugins - print out the list of plugins loaded * @s: the trace_seq descripter to write to @@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path, const char *file, void *data) { struct tep_plugin_list **plugin_list = data; + struct tep_plugin_option *options; tep_plugin_load_func func; struct tep_plugin_list *list; const char *alias; @@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path, if (!alias) alias = file; + options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME); + if (options) { + while (options->name) { + ret = update_option(alias, options); + if (ret < 0) + goto out_free; + options++; + } + } + func = dlsym(handle, TEP_PLUGIN_LOADER_NAME); if (!func) { warning("could not find func '%s' in plugin '%s'\n%s\n", @@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix, closedir(dir); } -static void -load_plugins(struct tep_handle *tep, const char *suffix, - void (*load_plugin)(struct tep_handle *tep, - const char *path, - const char *name, - void *data), - void *data) +/** + * tep_load_plugins_hook - call a user specified callback to load a plugin + * @tep: handler to traceevent context + * @suffix: filter only plugin files with given suffix + * @load_plugin: user specified callback, called for each plugin file + * @data: custom context, passed to @load_plugin + * + * Searches for traceevent plugin files and calls @load_plugin for each + * The order of plugins search is: + * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST + * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR + * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR + * - In user's home: ~/.local/lib/traceevent/plugins/ + * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST + * + */ +void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix, + void (*load_plugin)(struct tep_handle *tep, + const char *path, + const char *name, + void *data), + void *data) { + struct tep_plugins_dir *dir = NULL; char *home; char *path; char *envdir; int ret; - if (tep->flags & TEP_DISABLE_PLUGINS) + if (tep && tep->flags & TEP_DISABLE_PLUGINS) return; + if (tep) + dir = tep->plugins_dir; + while (dir) { + if (dir->prio == TEP_PLUGIN_FIRST) + load_plugins_dir(tep, suffix, dir->path, + load_plugin, data); + dir = dir->next; + } + /* * If a system plugin directory was defined, * check that first. */ #ifdef PLUGIN_DIR - if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS)) + if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS)) load_plugins_dir(tep, suffix, PLUGIN_DIR, load_plugin, data); #endif @@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix, load_plugins_dir(tep, suffix, path, load_plugin, data); + if (tep) + dir = tep->plugins_dir; + while (dir) { + if (dir->prio == TEP_PLUGIN_LAST) + load_plugins_dir(tep, suffix, dir->path, + load_plugin, data); + dir = dir->next; + } + free(path); } @@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep) { struct tep_plugin_list *list = NULL; - load_plugins(tep, ".so", load_plugin, &list); + tep_load_plugins_hook(tep, ".so", load_plugin, &list); return list; } +/** + * tep_add_plugin_path - Add a new plugin directory. + * @tep: Trace event handler. + * @path: Path to a directory. All plugin files in that + * directory will be loaded. + *@prio: Load priority of the plugins in that directory. + * + * Returns -1 in case of an error, 0 otherwise. + */ +int tep_add_plugin_path(struct tep_handle *tep, char *path, + enum tep_plugin_load_priority prio) +{ + struct tep_plugins_dir *dir; + + if (!tep || !path) + return -1; + + dir = calloc(1, sizeof(*dir)); + if (!dir) + return -1; + + dir->path = strdup(path); + if (!dir->path) { + free(dir); + return -1; + } + dir->prio = prio; + dir->next = tep->plugins_dir; + tep->plugins_dir = dir; + + return 0; +} + +void tep_free_plugin_paths(struct tep_handle *tep) +{ + struct tep_plugins_dir *dir; + + if (!tep) + return; + + dir = tep->plugins_dir; + while (dir) { + tep->plugins_dir = tep->plugins_dir->next; + free(dir->path); + free(dir); + dir = tep->plugins_dir; + } +} + void tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep) { diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index 5fa8292e341b3..a2b522093cfdd 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -1,22 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #ifndef _KBUFFER_H #define _KBUFFER_H diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build index 210d269106130..dd4da823c38fb 100644 --- a/tools/lib/traceevent/plugins/Build +++ b/tools/lib/traceevent/plugins/Build @@ -5,6 +5,8 @@ plugin_kvm-y += plugin_kvm.o plugin_mac80211-y += plugin_mac80211.o plugin_sched_switch-y += plugin_sched_switch.o plugin_function-y += plugin_function.o +plugin_futex-y += plugin_futex.o plugin_xen-y += plugin_xen.o plugin_scsi-y += plugin_scsi.o plugin_cfg80211-y += plugin_cfg80211.o +plugin_tlb-y += plugin_tlb.o
\ No newline at end of file diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile index 680d883efe05b..47e8025532502 100644 --- a/tools/lib/traceevent/plugins/Makefile +++ b/tools/lib/traceevent/plugins/Makefile @@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so PLUGINS += plugin_mac80211.so PLUGINS += plugin_sched_switch.so PLUGINS += plugin_function.so +PLUGINS += plugin_futex.so PLUGINS += plugin_xen.so PLUGINS += plugin_scsi.so PLUGINS += plugin_cfg80211.so +PLUGINS += plugin_tlb.so PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) PLUGINS_IN := $(PLUGINS:.so=-in.o) diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c index 7770fcb78e0fb..807b16e1bf0f9 100644 --- a/tools/lib/traceevent/plugins/plugin_function.c +++ b/tools/lib/traceevent/plugins/plugin_function.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> @@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] = .set = 1, }, { + .name = "offset", + .plugin_alias = "ftrace", + .description = + "Show function names as well as their offsets", + .set = 0, + }, + { .name = NULL, } }; static struct tep_plugin_option *ftrace_parent = &plugin_options[0]; static struct tep_plugin_option *ftrace_indent = &plugin_options[1]; +static struct tep_plugin_option *ftrace_offset = &plugin_options[2]; static void add_child(struct func_stack *stack, const char *child, int pos) { @@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu) return 0; } +static void show_function(struct trace_seq *s, struct tep_handle *tep, + const char *func, unsigned long long function) +{ + unsigned long long offset; + + trace_seq_printf(s, "%s", func); + if (ftrace_offset->set) { + offset = tep_find_function_address(tep, function); + trace_seq_printf(s, "+0x%x ", (int)(function - offset)); + } +} + static int function_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { @@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, trace_seq_printf(s, "%*s", index*3, ""); if (func) - trace_seq_printf(s, "%s", func); + show_function(s, tep, func, function); else trace_seq_printf(s, "0x%llx", function); if (ftrace_parent->set) { trace_seq_printf(s, " <-- "); if (parent) - trace_seq_printf(s, "%s", parent); + show_function(s, tep, parent, pfunction); else trace_seq_printf(s, "0x%llx", pfunction); } @@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, return 0; } +static int +trace_stack_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tep_format_field *field; + unsigned long long addr; + const char *func; + int long_size; + void *data = record->data; + + field = tep_find_any_field(event, "caller"); + if (!field) { + trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller"); + return 0; + } + + trace_seq_puts(s, "<stack trace >\n"); + + long_size = tep_get_long_size(event->tep); + + for (data += field->offset; data < record->data + record->size; + data += long_size) { + addr = tep_read_number(event->tep, data, long_size); + + if ((long_size == 8 && addr == (unsigned long long)-1) || + ((int)addr == -1)) + break; + + func = tep_find_function(event->tep, addr); + if (func) + trace_seq_printf(s, "=> %s (%llx)\n", func, addr); + else + trace_seq_printf(s, "=> %llx\n", addr); + } + + return 0; +} + +static int +trace_raw_data_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct tep_format_field *field; + unsigned long long id; + int long_size; + void *data = record->data; + + if (tep_get_field_val(s, event, "id", record, &id, 1)) + return trace_seq_putc(s, '!'); + + trace_seq_printf(s, "# %llx", id); + + field = tep_find_any_field(event, "buf"); + if (!field) { + trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf"); + return 0; + } + + long_size = tep_get_long_size(event->tep); + + for (data += field->offset; data < record->data + record->size; + data += long_size) { + int size = sizeof(long); + int left = (record->data + record->size) - data; + int i; + + if (size > left) + size = left; + + for (i = 0; i < size; i++) + trace_seq_printf(s, " %02x", *(unsigned char *)(data + i)); + } + + return 0; +} + int TEP_PLUGIN_LOADER(struct tep_handle *tep) { tep_register_event_handler(tep, -1, "ftrace", "function", function_handler, NULL); + tep_register_event_handler(tep, -1, "ftrace", "kernel_stack", + trace_stack_handler, NULL); + + tep_register_event_handler(tep, -1, "ftrace", "raw_data", + trace_raw_data_handler, NULL); + tep_plugin_add_options("ftrace", plugin_options); return 0; diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c new file mode 100644 index 0000000000000..eb7c9f8a850ab --- /dev/null +++ b/tools/lib/traceevent/plugins/plugin_futex.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2017 National Instruments Corp. + * + * Author: Julia Cartwright <julia@ni.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/futex.h> + +#include "event-parse.h" + +#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0])) + +struct futex_args { + unsigned long long uaddr; + unsigned long long op; + unsigned long long val; + unsigned long long utime; /* or val2 */ + unsigned long long uaddr2; + unsigned long long val3; +}; + +struct futex_op { + const char *name; + const char *fmt_val; + const char *fmt_utime; + const char *fmt_uaddr2; + const char *fmt_val3; +}; + +static const struct futex_op futex_op_tbl[] = { + { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL }, + { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL }, + { "FUTEX_FD", " val=%llu", NULL, NULL, NULL }, + { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL }, + { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL }, + { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL }, + { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL }, + { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" }, + { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" }, + { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" }, + { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" }, +}; + + +static void futex_print(struct trace_seq *s, const struct futex_args *args, + const struct futex_op *fop) +{ + trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr); + + if (fop->fmt_val) + trace_seq_printf(s, fop->fmt_val, args->val); + + if (fop->fmt_utime) + trace_seq_printf(s,fop->fmt_utime, args->utime); + + if (fop->fmt_uaddr2) + trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2); + + if (fop->fmt_val3) + trace_seq_printf(s, fop->fmt_val3, args->val3); +} + +static int futex_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + const struct futex_op *fop; + struct futex_args args; + unsigned long long cmd; + + if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1)) + return 1; + + if (tep_get_field_val(s, event, "op", record, &args.op, 1)) + return 1; + + if (tep_get_field_val(s, event, "val", record, &args.val, 1)) + return 1; + + if (tep_get_field_val(s, event, "utime", record, &args.utime, 1)) + return 1; + + if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1)) + return 1; + + if (tep_get_field_val(s, event, "val3", record, &args.val3, 1)) + return 1; + + cmd = args.op & FUTEX_CMD_MASK; + if (cmd >= ARRAY_SIZE(futex_op_tbl)) + return 1; + + fop = &futex_op_tbl[cmd]; + + trace_seq_printf(s, "op=%s", fop->name); + + if (args.op & FUTEX_PRIVATE_FLAG) + trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG"); + + if (args.op & FUTEX_CLOCK_REALTIME) + trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME"); + + futex_print(s, &args, fop); + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *tep) +{ + tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex", + futex_handler, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) +{ + tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex", + futex_handler, NULL); +} diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c index bb434e0ed03ab..d98466788f14c 100644 --- a/tools/lib/traceevent/plugins/plugin_hrtimer.c +++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c @@ -1,22 +1,7 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c index 04fc125f38cb3..69111a68d3cf1 100644 --- a/tools/lib/traceevent/plugins/plugin_jbd2.c +++ b/tools/lib/traceevent/plugins/plugin_jbd2.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c index edaec5d962c3d..4b4f7f9616e31 100644 --- a/tools/lib/traceevent/plugins/plugin_kmem.c +++ b/tools/lib/traceevent/plugins/plugin_kmem.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c index c8e623065a7e4..51ceeb9147eb6 100644 --- a/tools/lib/traceevent/plugins/plugin_kvm.c +++ b/tools/lib/traceevent/plugins/plugin_kvm.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> @@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip, _ER(EXIT_WRITE_DR5, 0x035) \ _ER(EXIT_WRITE_DR6, 0x036) \ _ER(EXIT_WRITE_DR7, 0x037) \ - _ER(EXIT_EXCP_BASE, 0x040) \ + _ER(EXIT_EXCP_DE, 0x040) \ + _ER(EXIT_EXCP_DB, 0x041) \ + _ER(EXIT_EXCP_BP, 0x043) \ + _ER(EXIT_EXCP_OF, 0x044) \ + _ER(EXIT_EXCP_BR, 0x045) \ + _ER(EXIT_EXCP_UD, 0x046) \ + _ER(EXIT_EXCP_NM, 0x047) \ + _ER(EXIT_EXCP_DF, 0x048) \ + _ER(EXIT_EXCP_TS, 0x04a) \ + _ER(EXIT_EXCP_NP, 0x04b) \ + _ER(EXIT_EXCP_SS, 0x04c) \ + _ER(EXIT_EXCP_GP, 0x04d) \ + _ER(EXIT_EXCP_PF, 0x04e) \ + _ER(EXIT_EXCP_MF, 0x050) \ + _ER(EXIT_EXCP_AC, 0x051) \ + _ER(EXIT_EXCP_MC, 0x052) \ + _ER(EXIT_EXCP_XF, 0x053) \ _ER(EXIT_INTR, 0x060) \ _ER(EXIT_NMI, 0x061) \ _ER(EXIT_SMI, 0x062) \ @@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip, _ER(EXIT_MONITOR, 0x08a) \ _ER(EXIT_MWAIT, 0x08b) \ _ER(EXIT_MWAIT_COND, 0x08c) \ - _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_XSETBV, 0x08d) \ + _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \ + _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \ _ER(EXIT_ERR, -1) #define _ER(reason, val) { #reason, val }, @@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val) } if (!strings) return "UNKNOWN-ISA"; - for (i = 0; strings[i].val >= 0; i++) + for (i = 0; strings[i].str; i++) if (strings[i].val == val) break; diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c index 884303c26b5cd..f48071e3cfb83 100644 --- a/tools/lib/traceevent/plugins/plugin_mac80211.c +++ b/tools/lib/traceevent/plugins/plugin_mac80211.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c index 957389a0ff7ad..e12fa103820ab 100644 --- a/tools/lib/traceevent/plugins/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c @@ -1,21 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include <stdio.h> #include <stdlib.h> diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c new file mode 100644 index 0000000000000..43657fb60504f --- /dev/null +++ b/tools/lib/traceevent/plugins/plugin_tlb.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +enum tlb_flush_reason { + TLB_FLUSH_ON_TASK_SWITCH, + TLB_REMOTE_SHOOTDOWN, + TLB_LOCAL_SHOOTDOWN, + TLB_LOCAL_MM_SHOOTDOWN, + NR_TLB_FLUSH_REASONS, +}; + +static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + unsigned long long val; + + trace_seq_printf(s, "pages="); + + tep_print_num_field(s, "%ld", event, "pages", record, 1); + + if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0) + return -1; + + trace_seq_puts(s, " reason="); + + switch (val) { + case TLB_FLUSH_ON_TASK_SWITCH: + trace_seq_puts(s, "flush on task switch"); + break; + case TLB_REMOTE_SHOOTDOWN: + trace_seq_puts(s, "remote shootdown"); + break; + case TLB_LOCAL_SHOOTDOWN: + trace_seq_puts(s, "local shootdown"); + break; + case TLB_LOCAL_MM_SHOOTDOWN: + trace_seq_puts(s, "local mm shootdown"); + break; + } + + trace_seq_printf(s, " (%lld)", val); + + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *tep) +{ + tep_register_event_handler(tep, -1, "tlb", "tlb_flush", + tlb_flush_handler, NULL); + + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) +{ + tep_unregister_event_handler(tep, -1, + "tlb", "tlb_flush", + tlb_flush_handler, NULL); +} diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index e817179c5027a..d3740c8f399ba 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -18,6 +18,7 @@ l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce @@ -47,3 +48,16 @@ --itrace=i0nss1000000 skips the first million instructions. + + The 'e' option may be followed by flags which affect what errors will or + will not be reported. Each flag must be preceded by either '+' or '-'. + The flags are: + o overflow + l trace data lost + + If supported, the 'd' option may be followed by flags which affect what + debug messages will or will not be logged. Each flag must be preceded + by either '+' or '-'. The flags are: + a all perf events + + If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bad16512c48d7..a0529c7fa5ef9 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -49,6 +49,9 @@ SUBSYSTEM 'sched':: Scheduler and IPC mechanisms. +'syscall':: + System call performance (throughput). + 'mem':: Memory access performance. @@ -137,6 +140,14 @@ Example of *pipe* 59004 ops/sec --------------------- +SUITES FOR 'syscall' +~~~~~~~~~~~~~~~~~~ +*basic*:: +Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics). +This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not +cached by glibc. + + SUITES FOR 'mem' ~~~~~~~~~~~~~~~~ *memcpy*:: diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index c87180764829c..726b9bc9e1a70 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -27,6 +27,9 @@ OPTIONS for 'convert' --to-ctf:: Triggers the CTF conversion, specify the path of CTF data directory. +--tod:: + Convert time to wall clock time. + -i:: Specify input perf data file path. diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index f4cd49a7fcdbd..d5a266d7f15b5 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -825,6 +825,7 @@ The letters are: l synthesize last branch entries (use with i or x) L synthesize last branch entries on existing event records s skip initial number of events + q quicker (less detailed) decoding "Instructions" events look like they were recorded by "perf record -e instructions". @@ -871,11 +872,24 @@ Developer Manuals. Error events show where the decoder lost the trace. Error events are quite important. Users must know if what they are seeing is a complete -picture or not. +picture or not. The "e" option may be followed by flags which affect what errors +will or will not be reported. Each flag must be preceded by either '+' or '-'. +The flags supported by Intel PT are: + -o Suppress overflow errors + -l Suppress trace data lost errors +For example, for errors but not overflow or data lost errors: + + --itrace=e-o-l The "d" option will cause the creation of a file "intel_pt.log" containing all decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. +and that the resulting file may be very large. The "d" option may be followed +by flags which affect what debug messages will or will not be logged. Each flag +must be preceded by either '+' or '-'. The flags support by Intel PT are: + -a Suppress logging of perf events + +a Log all perf events +By default, logged perf events are filtered by any specified time ranges, but +flag +a overrides that. In addition, the period of the "instructions" event can be specified. e.g. @@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code. skips the first million instructions. +The q option changes the way the trace is decoded. The decoding is much faster +but much less detailed. Specifically, with the q option, the decoder does not +decode TNT packets, and does not walk object code, but gets the ip from FUP and +TIP packets. The q option can be used with the b and i options but the period +is not used. The q option decodes more quickly, but is useful only if the +control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or +TIP.PGD packets (refer below). However the q option could be used to find time +ranges that could then be decoded fully using the --time option. + +What will *not* be decoded with the (single) q option: + + - direct calls and jmps + - conditional branches + - non-branch instructions + +What *will* be decoded with the (single) q option: + + - asynchronous branches such as interrupts + - indirect branches + - function return target address *if* the noretcomp config term (refer + config terms section) was used + - start of (control-flow) tracing + - end of (control-flow) tracing, if it is not out of context + - power events, ptwrite, transaction start and abort + - instruction pointer associated with PSB packets + +Note the q option does not specify what events will be synthesized e.g. the p +option must be used also to show power events. + +Repeating the q option (double-q i.e. qq) results in even faster decoding and even +less detail. The decoder decodes only extended PSB (PSB+) packets, getting the +instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and +PSBEND). Note PSB packets occur regularly in the trace based on the psb_period +config term (refer config terms section). There will be a FUP packet if the +PSB+ occurs while control flow is being traced. + +What will *not* be decoded with the qq option: + + - everything except instruction pointer associated with PSB packets + +What *will* be decoded with the qq option: + + - instruction pointer associated with PSB packets + + dump option ~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 376a50b3452d0..10ed539a8859c 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -119,6 +119,7 @@ It's also possible to use pmu syntax: perf record -e r1a8 -a sleep 1 perf record -e cpu/r1a8/ ... + perf record -e cpu/r0x1a8/ ... You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index fa8a5fcd27aba..3f72d8e261f3d 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -407,8 +407,9 @@ if combined with -a or -C options. -D:: --delay=:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, which +is often very different. -I:: --intr-regs:: @@ -626,6 +627,45 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 372dfd110e6d6..4f712fb8f175d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -322,6 +322,10 @@ OPTIONS --show-cgroup-events Display cgroup events i.e. events of type PERF_RECORD_CGROUP. +--show-text-poke-events + Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and + PERF_RECORD_KSYMBOL. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index b029ee728a0bc..c9bfefc051fbe 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -176,6 +176,45 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd +--control fd:ctl-fd[,ack-fd] +Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, +'disable': disable events). Measurements can be started with events disabled using +--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor +to synchronize with the controlling process. Example of bash shell script to enable and +disable events during measurements: + +#!/bin/bash + +ctl_dir=/tmp/ + +ctl_fifo=${ctl_dir}perf_ctl.fifo +test -p ${ctl_fifo} && unlink ${ctl_fifo} +mkfifo ${ctl_fifo} +exec {ctl_fd}<>${ctl_fifo} + +ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo +test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} +mkfifo ${ctl_ack_fifo} +exec {ctl_fd_ack}<>${ctl_ack_fifo} + +perf stat -D -1 -e cpu-cycles -a -I 1000 \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & +perf_pid=$! + +sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" +sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + +exec {ctl_fd_ack}>&- +unlink ${ctl_ack_fifo} + +exec {ctl_fd}>&- +unlink ${ctl_fifo} + +wait -n ${perf_pid} +exit $? + + --pre:: --post:: Pre and post measurement hooks, e.g.: @@ -238,8 +277,9 @@ mode, use --per-node in addition to -a. (system-wide). -D msecs:: --delay msecs:: -After starting the program, wait msecs before measuring. This is useful to -filter out the startup phase of the program, which is often very different. +After starting the program, wait msecs before measuring (-1: start with events +disabled). This is useful to filter out the startup phase of the program, +which is often very different. -T:: --transaction:: diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b6472e463284d..9ee96640744e5 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -389,6 +389,19 @@ struct { Example: cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + HEADER_CLOCK_DATA = 29, + + Contains clock id and its reference time together with wall clock + time taken at the 'same time', both values are in nanoseconds. + The format of data is as below. + +struct { + u32 version; /* version = 1 */ + u32 clockid; + u64 wall_clock_ns; + u64 clockid_time_ns; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 28a5d0c18b1d2..b187bddbd01ac 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -57,17 +57,15 @@ struct auxtrace_record struct evsel *evsel; bool found_etm = false; struct perf_pmu *found_spe = NULL; - static struct perf_pmu **arm_spe_pmus = NULL; - static int nr_spes = 0; + struct perf_pmu **arm_spe_pmus = NULL; + int nr_spes = 0; int i = 0; if (!evlist) return NULL; cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); - - if (!arm_spe_pmus) - arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); + arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); evlist__for_each_entry(evlist, evsel) { if (cs_etm_pmu && @@ -84,6 +82,7 @@ struct auxtrace_record } } } + free(arm_spe_pmus); if (found_etm && found_spe) { pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n"); diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cea5e33d61d2b..cad7bf7834138 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -243,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, } /* - * No sink was provided on the command line - for _now_ treat - * this as an error. + * No sink was provided on the command line - allow the CoreSight + * system to look for a default */ - return ret; + return 0; } static int cs_etm_recording_options(struct auxtrace_record *itr, diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index b190f2eb2611b..3ca6fe057a0b1 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -193,7 +193,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid 148 common fdatasync sys_fdatasync -149 nospu _sysctl sys_sysctl compat_sys_sysctl +149 nospu _sysctl sys_ni_syscall 150 common mlock sys_mlock 151 common munlock sys_munlock 152 common mlockall sys_mlockall diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index e18a3556f5e31..63f3ac91049f7 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -64,7 +64,13 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_DAR] = "dar", [PERF_REG_POWERPC_DSISR] = "dsisr", [PERF_REG_POWERPC_SIER] = "sier", - [PERF_REG_POWERPC_MMCRA] = "mmcra" + [PERF_REG_POWERPC_MMCRA] = "mmcra", + [PERF_REG_POWERPC_MMCR0] = "mmcr0", + [PERF_REG_POWERPC_MMCR1] = "mmcr1", + [PERF_REG_POWERPC_MMCR2] = "mmcr2", + [PERF_REG_POWERPC_MMCR3] = "mmcr3", + [PERF_REG_POWERPC_SIER2] = "sier2", + [PERF_REG_POWERPC_SIER3] = "sier3", }; static inline const char *perf_reg_name(int id) diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h index 54cfa0530e86d..488f4339b83c7 100644 --- a/tools/perf/arch/powerpc/util/book3s_hcalls.h +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -84,7 +84,7 @@ {0x1a4, "H_CREATE_RPT"}, \ {0x1a8, "H_REMOVE_RPT"}, \ {0x1ac, "H_REGISTER_RPAGES"}, \ - {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b0, "H_DISABLE_AND_GET"}, \ {0x1b4, "H_ERROR_DATA"}, \ {0x1b8, "H_GET_HCA_INFO"}, \ {0x1bc, "H_GET_PERF_COUNT"}, \ diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index d4870074f14c7..1a950171a66fa 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -7,17 +7,10 @@ #include <string.h> #include <linux/stringify.h> #include "header.h" +#include "utils_header.h" #include "metricgroup.h" #include <api/fs/fs.h> -#define mfspr(rn) ({unsigned long rval; \ - asm volatile("mfspr %0," __stringify(rn) \ - : "=r" (rval)); rval; }) - -#define SPRN_PVR 0x11F /* Processor Version Register */ -#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ - int get_cpuid(char *buffer, size_t sz) { diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 0a52429002485..2b6d4704e3aa2 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -6,9 +6,16 @@ #include "../../../util/perf_regs.h" #include "../../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/header.h" +#include "../../../perf-sys.h" +#include "utils_header.h" #include <linux/kernel.h> +#define PVR_POWER9 0x004E +#define PVR_POWER10 0x0080 + const struct sample_reg sample_reg_masks[] = { SMPL_REG(r0, PERF_REG_POWERPC_R0), SMPL_REG(r1, PERF_REG_POWERPC_R1), @@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), SMPL_REG(sier, PERF_REG_POWERPC_SIER), SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA), + SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0), + SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1), + SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2), + SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3), + SMPL_REG(sier2, PERF_REG_POWERPC_SIER2), + SMPL_REG(sier3, PERF_REG_POWERPC_SIER3), SMPL_REG_END }; @@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) return SDT_ARG_VALID; } + +uint64_t arch__intr_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_INTR, + .precise_ip = 1, + .disabled = 1, + .exclude_kernel = 1, + }; + int fd; + u32 version; + u64 extended_mask = 0, mask = PERF_REGS_MASK; + + /* + * Get the PVR value to set the extended + * mask specific to platform. + */ + version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF); + if (version == PVR_POWER9) + extended_mask = PERF_REG_PMU_MASK_300; + else if (version == PVR_POWER10) + extended_mask = PERF_REG_PMU_MASK_31; + else + return mask; + + attr.sample_regs_intr = extended_mask; + attr.sample_period = 1; + event_attr_init(&attr); + + /* + * check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + mask |= extended_mask; + } + return mask; +} diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h new file mode 100644 index 0000000000000..5788eb1f1fe33 --- /dev/null +++ b/tools/perf/arch/powerpc/util/utils_header.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_UTIL_HEADER_H +#define __PERF_UTIL_HEADER_H + +#include <linux/stringify.h> + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval; }) + +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +#endif /* __PERF_UTIL_HEADER_H */ diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 56ae24b6e4be6..6a0bbea225db0 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -138,7 +138,7 @@ 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync -149 common _sysctl sys_sysctl compat_sys_sysctl +149 common _sysctl - - 150 common mlock sys_mlock compat_sys_mlock 151 common munlock sys_munlock compat_sys_munlock 152 common mlockall sys_mlockall sys_mlockall diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index e008d638e6417..f30d6ae9a6883 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -164,7 +164,7 @@ 153 common vhangup sys_vhangup 154 common modify_ldt sys_modify_ldt 155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl +156 64 _sysctl sys_ni_syscall 157 common prctl sys_prctl 158 common arch_prctl sys_arch_prctl 159 common adjtimex sys_adjtimex @@ -357,6 +357,7 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 +436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 6ce451293634e..082e5f2a415a3 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, } } + if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel && + perf_can_record_text_poke_events() && perf_can_record_cpu_wide()) + opts->text_poke = true; + if (intel_pt_evsel) { /* * To obtain the auxtrace buffer file descriptor, the auxtrace diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 768e408757a05..dd68a40a790c5 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,5 +1,6 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o +perf-y += syscall.o perf-y += mem-functions.o perf-y += futex-hash.o perf-y += futex-wake.o @@ -10,6 +11,7 @@ perf-y += epoll-wait.o perf-y += epoll-ctl.o perf-y += synthesize.o perf-y += kallsyms-parse.o +perf-y += find-bit-bench.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 61cae4966cae2..2804812d4154c 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -33,8 +33,10 @@ extern struct timeval bench__start, bench__end, bench__runtime; int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); +int bench_syscall_basic(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); +int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); int bench_futex_wake_parallel(int argc, const char **argv); diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c new file mode 100644 index 0000000000000..fa90f3e9d3680 --- /dev/null +++ b/tools/perf/bench/find-bit-bench.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark find_next_bit and related bit operations. + * + * Copyright 2020 Google LLC. + */ +#include <stdlib.h> +#include "bench.h" +#include "../util/stat.h" +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int outer_iterations = 5; +static unsigned int inner_iterations = 100000; + +static const struct option options[] = { + OPT_UINTEGER('i', "outer-iterations", &outer_iterations, + "Number of outerer iterations used"), + OPT_UINTEGER('j', "inner-iterations", &inner_iterations, + "Number of outerer iterations used"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench mem find_bit <options>", + NULL +}; + +static unsigned int accumulator; +static unsigned int use_of_val; + +static noinline void workload(int val) +{ + use_of_val += val; + accumulator++; +} + +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) +static bool asm_test_bit(long nr, const unsigned long *addr) +{ + bool oldbit; + + asm volatile("bt %2,%1" + : "=@ccc" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); + + return oldbit; +} +#else +#define asm_test_bit test_bit +#endif + +static int do_for_each_set_bit(unsigned int num_bits) +{ + unsigned long *to_test = bitmap_alloc(num_bits); + struct timeval start, end, diff; + u64 runtime_us; + struct stats fb_time_stats, tb_time_stats; + double time_average, time_stddev; + unsigned int bit, i, j; + unsigned int set_bits, skip; + unsigned int old; + + init_stats(&fb_time_stats); + init_stats(&tb_time_stats); + + for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { + bitmap_zero(to_test, num_bits); + skip = num_bits / set_bits; + for (i = 0; i < num_bits; i += skip) + set_bit(i, to_test); + + for (i = 0; i < outer_iterations; i++) { + old = accumulator; + gettimeofday(&start, NULL); + for (j = 0; j < inner_iterations; j++) { + for_each_set_bit(bit, to_test, num_bits) + workload(bit); + } + gettimeofday(&end, NULL); + assert(old + (inner_iterations * set_bits) == accumulator); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&fb_time_stats, runtime_us); + + old = accumulator; + gettimeofday(&start, NULL); + for (j = 0; j < inner_iterations; j++) { + for (bit = 0; bit < num_bits; bit++) { + if (asm_test_bit(bit, to_test)) + workload(bit); + } + } + gettimeofday(&end, NULL); + assert(old + (inner_iterations * set_bits) == accumulator); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&tb_time_stats, runtime_us); + } + + printf("%d operations %d bits set of %d bits\n", + inner_iterations, set_bits, num_bits); + time_average = avg_stats(&fb_time_stats); + time_stddev = stddev_stats(&fb_time_stats); + printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + time_average = avg_stats(&tb_time_stats); + time_stddev = stddev_stats(&tb_time_stats); + printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ + printf("\n"); + } + bitmap_free(to_test); + return 0; +} + +int bench_mem_find_bit(int argc, const char **argv) +{ + int err = 0, i; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + for (i = 1; i <= 2048; i <<= 1) + do_for_each_set_bit(i); + + return err; +} diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c new file mode 100644 index 0000000000000..5fe621cff8e93 --- /dev/null +++ b/tools/perf/bench/syscall.c @@ -0,0 +1,81 @@ +/* + * + * syscall.c + * + * syscall: Benchmark for system call performance + */ +#include "../perf.h" +#include "../util/util.h" +#include <subcmd/parse-options.h> +#include "../builtin.h" +#include "bench.h" + +#include <stdio.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdlib.h> + +#define LOOPS_DEFAULT 10000000 +static int loops = LOOPS_DEFAULT; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_syscall_usage[] = { + "perf bench syscall <options>", + NULL +}; + +int bench_syscall_basic(int argc, const char **argv) +{ + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + int i; + + argc = parse_options(argc, argv, options, bench_syscall_usage, 0); + + gettimeofday(&start, NULL); + + for (i = 0; i < loops; i++) + getppid(); + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %'d getppid() calls\n", loops); + + result_usec = diff.tv_sec * 1000000; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + diff.tv_sec, + (unsigned long) (diff.tv_usec/1000)); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %'14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)1000000))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + diff.tv_sec, + (unsigned long) (diff.tv_usec / 1000)); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index cad31b1d34389..4f176039fc8f4 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -11,6 +11,7 @@ * Available benchmark collection list: * * sched ... scheduler and IPC performance + * syscall ... System call performance * mem ... memory access performance * numa ... NUMA scheduling and MM performance * futex ... Futex performance @@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = { { NULL, NULL, NULL } }; +static struct bench syscall_benchmarks[] = { + { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic }, + { "all", "Run all syscall benchmarks", NULL }, + { NULL, NULL, NULL }, +}; + static struct bench mem_benchmarks[] = { { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, { "memset", "Benchmark for memset() functions", bench_mem_memset }, + { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit }, { "all", "Run all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; @@ -90,6 +98,7 @@ struct collection { static struct collection collections[] = { { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, + { "syscall", "System call benchmarks", syscall_benchmarks }, { "mem", "Memory access benchmarks", mem_benchmarks }, #ifdef HAVE_LIBNUMA_SUPPORT { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d617d5682c688..5938b100eaf4c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2582,7 +2582,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) static int setup_callchain(struct evlist *evlist) { - u64 sample_type = perf_evlist__combined_sample_type(evlist); + u64 sample_type = evlist__combined_sample_type(evlist); enum perf_call_graph_mode mode = CALLCHAIN_NONE; if ((sample_type & PERF_SAMPLE_REGS_USER) && diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index ca2fb44874e4b..8d23b8d6ee8e3 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv) OPT_STRING('i', "input", &input_name, "file", "input file name"), #ifdef HAVE_LIBBABELTRACE_SUPPORT OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), + OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"), #endif OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"), OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"), diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 4a6de4b03ac0b..6d2f410d773ad 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -292,7 +292,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap.filename, sample->pid, &n); + event->mmap.filename, event->mmap.pid, &n); if (ret < 0) return ret; if (ret) { @@ -330,7 +330,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, * if jit marker, then inject jit mmaps and generate ELF images */ ret = jit_process(inject->session, &inject->output, machine, - event->mmap2.filename, sample->pid, &n); + event->mmap2.filename, event->mmap2.pid, &n); if (ret < 0) return ret; if (ret) { diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 38a5ab683ebce..a50dae2c4ae9e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1933,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv) return ret; argc = parse_options_subcommand(argc, argv, kmem_options, - kmem_subcommands, kmem_usage, 0); + kmem_subcommands, kmem_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(kmem_usage, kmem_options); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 95a77058023e6..460945ded6dd2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1319,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void) *name = '\0'; name++; - if (perf_evlist__add_newtp(evlist, sys, name, NULL)) { + if (evlist__add_newtp(evlist, sys, name, NULL)) { pr_err("Failed to add %s tracepoint to the list\n", *events_tp); free(tp); goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a37e7910e9e90..f91352f847c08 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,7 @@ #include "util/bpf-event.h" #include "util/util.h" #include "util/pfm.h" +#include "util/clockid.h" #include "asm/bug.h" #include "perf.h" @@ -70,6 +71,7 @@ #include <linux/time64.h> #include <linux/zalloc.h> #include <linux/bitmap.h> +#include <sys/time.h> struct switch_output { bool enabled; @@ -765,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused) #endif +static int record__config_text_poke(struct evlist *evlist) +{ + struct evsel *evsel; + int err; + + /* Nothing to do if text poke is already configured */ + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.text_poke) + return 0; + } + + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + evsel = evlist__last(evlist); + + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->core.attr.text_poke = 1; + evsel->core.attr.ksymbol = 1; + + evsel->core.system_wide = true; + evsel->no_aux_samples = true; + evsel->immediate = true; + + /* Text poke must be collected on all CPUs */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + evsel__set_sample_bit(evsel, TIME); + + return 0; +} + static bool record__kcore_readable(struct machine *machine) { char kcore[PATH_MAX]; @@ -855,7 +894,7 @@ static int record__open(struct record *rec) pos = perf_evlist__get_tracking_event(evlist); if (!evsel__is_dummy_event(pos)) { /* Set up dummy event. */ - if (perf_evlist__add_dummy(evlist)) + if (evlist__add_dummy(evlist)) return -ENOMEM; pos = evlist__last(evlist); perf_evlist__set_tracking_event(evlist, pos); @@ -1166,6 +1205,9 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + if (!rec->opts.use_clockid) + perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); if (!record__comp_enabled(rec)) perf_header__clear_feat(&session->header, HEADER_COMPRESSED); @@ -1489,7 +1531,7 @@ static int record__setup_sb_evlist(struct record *rec) evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); rec->thread_id = pthread_self(); } - +#ifdef HAVE_LIBBPF_SUPPORT if (!opts->no_bpf_event) { if (rec->sb_evlist == NULL) { rec->sb_evlist = evlist__new(); @@ -1505,7 +1547,7 @@ static int record__setup_sb_evlist(struct record *rec) return -1; } } - +#endif if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; @@ -1514,6 +1556,43 @@ static int record__setup_sb_evlist(struct record *rec) return 0; } +static int record__init_clock(struct record *rec) +{ + struct perf_session *session = rec->session; + struct timespec ref_clockid; + struct timeval ref_tod; + u64 ref; + + if (!rec->opts.use_clockid) + return 0; + + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns; + + session->header.env.clock.clockid = rec->opts.clockid; + + if (gettimeofday(&ref_tod, NULL) != 0) { + pr_err("gettimeofday failed, cannot set reference time.\n"); + return -1; + } + + if (clock_gettime(rec->opts.clockid, &ref_clockid)) { + pr_err("clock_gettime failed, cannot set reference time.\n"); + return -1; + } + + ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + + (u64) ref_tod.tv_usec * NSEC_PER_USEC; + + session->header.env.clock.tod_ns = ref; + + ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + + (u64) ref_clockid.tv_nsec; + + session->header.env.clock.clockid_ns = ref; + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1527,6 +1606,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) bool disabled = false, draining = false; int fd; float ratio = 0; + enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -1593,10 +1673,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) return -1; } - record__init_features(rec); + if (record__init_clock(rec)) + return -1; - if (rec->opts.use_clockid && rec->opts.clockid_res_ns) - session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + record__init_features(rec); if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, @@ -1646,7 +1726,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * Normally perf_session__new would do this, but it doesn't have the * evlist. */ - if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { + if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) { pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); rec->tool.ordered_events = false; } @@ -1748,9 +1828,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_evlist__start_workload(rec->evlist); } + if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack)) + goto out_child; + if (opts->initial_delay) { - usleep(opts->initial_delay * USEC_PER_MSEC); - evlist__enable(rec->evlist); + pr_info(EVLIST_DISABLED_MSG); + if (opts->initial_delay > 0) { + usleep(opts->initial_delay * USEC_PER_MSEC); + evlist__enable(rec->evlist); + pr_info(EVLIST_ENABLED_MSG); + } } trigger_ready(&auxtrace_snapshot_trigger); @@ -1842,6 +1929,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) draining = true; } + if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) { + switch (cmd) { + case EVLIST_CTL_CMD_ENABLE: + pr_info(EVLIST_ENABLED_MSG); + break; + case EVLIST_CTL_CMD_DISABLE: + pr_info(EVLIST_DISABLED_MSG); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + break; + } + } + /* * When perf is starting the traced process, at the end events * die with the process and we wait for that. Thus no need to @@ -1875,6 +1977,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__synthesize_workload(rec, true); out_child: + evlist__finalize_ctlfd(rec->evlist); record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); @@ -2041,103 +2144,6 @@ static int perf_record_config(const char *var, const char *value, void *cb) return 0; } -struct clockid_map { - const char *name; - int clockid; -}; - -#define CLOCKID_MAP(n, c) \ - { .name = n, .clockid = (c), } - -#define CLOCKID_END { .name = NULL, } - - -/* - * Add the missing ones, we need to build on many distros... - */ -#ifndef CLOCK_MONOTONIC_RAW -#define CLOCK_MONOTONIC_RAW 4 -#endif -#ifndef CLOCK_BOOTTIME -#define CLOCK_BOOTTIME 7 -#endif -#ifndef CLOCK_TAI -#define CLOCK_TAI 11 -#endif - -static const struct clockid_map clockids[] = { - /* available for all events, NMI safe */ - CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), - CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), - - /* available for some events */ - CLOCKID_MAP("realtime", CLOCK_REALTIME), - CLOCKID_MAP("boottime", CLOCK_BOOTTIME), - CLOCKID_MAP("tai", CLOCK_TAI), - - /* available for the lazy */ - CLOCKID_MAP("mono", CLOCK_MONOTONIC), - CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), - CLOCKID_MAP("real", CLOCK_REALTIME), - CLOCKID_MAP("boot", CLOCK_BOOTTIME), - - CLOCKID_END, -}; - -static int get_clockid_res(clockid_t clk_id, u64 *res_ns) -{ - struct timespec res; - - *res_ns = 0; - if (!clock_getres(clk_id, &res)) - *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; - else - pr_warning("WARNING: Failed to determine specified clock resolution.\n"); - - return 0; -} - -static int parse_clockid(const struct option *opt, const char *str, int unset) -{ - struct record_opts *opts = (struct record_opts *)opt->value; - const struct clockid_map *cm; - const char *ostr = str; - - if (unset) { - opts->use_clockid = 0; - return 0; - } - - /* no arg passed */ - if (!str) - return 0; - - /* no setting it twice */ - if (opts->use_clockid) - return -1; - - opts->use_clockid = true; - - /* if its a number, we're done */ - if (sscanf(str, "%d", &opts->clockid) == 1) - return get_clockid_res(opts->clockid, &opts->clockid_res_ns); - - /* allow a "CLOCK_" prefix to the name */ - if (!strncasecmp(str, "CLOCK_", 6)) - str += 6; - - for (cm = clockids; cm->name; cm++) { - if (!strcasecmp(str, cm->name)) { - opts->clockid = cm->clockid; - return get_clockid_res(opts->clockid, - &opts->clockid_res_ns); - } - } - - opts->use_clockid = false; - ui__warning("unknown clockid %s, check man page\n", ostr); - return -1; -} static int record__parse_affinity(const struct option *opt, const char *str, int unset) { @@ -2224,6 +2230,33 @@ out_free: return ret; } +static int parse_control_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + char *comma = NULL, *endptr = NULL; + struct record_opts *config = (struct record_opts *)opt->value; + + if (strncmp(str, "fd:", 3)) + return -EINVAL; + + config->ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + static void switch_output_size_warn(struct record *rec) { u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages); @@ -2360,6 +2393,8 @@ static struct record record = { }, .mmap_flush = MMAP_FLUSH_DEFAULT, .nr_threads_synthesize = 1, + .ctl_fd = -1, + .ctl_fd_ack = -1, }, .tool = { .sample = process_sample_event, @@ -2462,8 +2497,8 @@ static struct option __record_options[] = { OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), - OPT_UINTEGER('D', "delay", &record.opts.initial_delay, - "ms to wait before starting measurement after program start"), + OPT_INTEGER('D', "delay", &record.opts.initial_delay, + "ms to wait before starting measurement after program start (-1: start with events disabled)"), OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"), OPT_STRING('u', "uid", &record.opts.target.uid_str, "user", "user to profile"), @@ -2561,6 +2596,10 @@ static struct option __record_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif + OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]", + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + parse_control_option), OPT_END() }; @@ -2722,7 +2761,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0 && - __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { + __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { pr_err("Not enough memory for event selector list\n"); goto out; } @@ -2766,6 +2805,14 @@ int cmd_record(int argc, const char **argv) if (rec->opts.full_auxtrace) rec->buildid_all = true; + if (rec->opts.text_poke) { + err = record__config_text_poke(rec->evlist); + if (err) { + pr_err("record__config_text_poke failed, error %d\n", err); + goto out; + } + } + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5f1d2a878fade..ece1cddfcd7c7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -338,7 +338,7 @@ static int process_read_event(struct perf_tool *tool, static int report__setup_sample_type(struct report *rep) { struct perf_session *session = rep->session; - u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = evlist__combined_sample_type(session->evlist); bool is_pipe = perf_data__is_pipe(session->data); if (session->itrace_synth_opts->callchain || @@ -410,8 +410,7 @@ static int report__setup_sample_type(struct report *rep) } /* ??? handle more cases than just ANY? */ - if (!(perf_evlist__combined_branch_type(session->evlist) & - PERF_SAMPLE_BRANCH_ANY)) + if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; #if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) @@ -1093,7 +1092,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * Check if we need to enable callchains based * on events sample_type. */ - sample_type = perf_evlist__combined_sample_type(*pevlist); + sample_type = evlist__combined_sample_type(*pevlist); callchain_param_setup(sample_type); return 0; } @@ -1389,7 +1388,7 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER) + if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER) has_br_stack = false; setup_forced_leader(&report, session->evlist); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 447457786362d..484ce6067d23f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -82,38 +82,64 @@ static bool native_arch; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; enum perf_output_field { - PERF_OUTPUT_COMM = 1U << 0, - PERF_OUTPUT_TID = 1U << 1, - PERF_OUTPUT_PID = 1U << 2, - PERF_OUTPUT_TIME = 1U << 3, - PERF_OUTPUT_CPU = 1U << 4, - PERF_OUTPUT_EVNAME = 1U << 5, - PERF_OUTPUT_TRACE = 1U << 6, - PERF_OUTPUT_IP = 1U << 7, - PERF_OUTPUT_SYM = 1U << 8, - PERF_OUTPUT_DSO = 1U << 9, - PERF_OUTPUT_ADDR = 1U << 10, - PERF_OUTPUT_SYMOFFSET = 1U << 11, - PERF_OUTPUT_SRCLINE = 1U << 12, - PERF_OUTPUT_PERIOD = 1U << 13, - PERF_OUTPUT_IREGS = 1U << 14, - PERF_OUTPUT_BRSTACK = 1U << 15, - PERF_OUTPUT_BRSTACKSYM = 1U << 16, - PERF_OUTPUT_DATA_SRC = 1U << 17, - PERF_OUTPUT_WEIGHT = 1U << 18, - PERF_OUTPUT_BPF_OUTPUT = 1U << 19, - PERF_OUTPUT_CALLINDENT = 1U << 20, - PERF_OUTPUT_INSN = 1U << 21, - PERF_OUTPUT_INSNLEN = 1U << 22, - PERF_OUTPUT_BRSTACKINSN = 1U << 23, - PERF_OUTPUT_BRSTACKOFF = 1U << 24, - PERF_OUTPUT_SYNTH = 1U << 25, - PERF_OUTPUT_PHYS_ADDR = 1U << 26, - PERF_OUTPUT_UREGS = 1U << 27, - PERF_OUTPUT_METRIC = 1U << 28, - PERF_OUTPUT_MISC = 1U << 29, - PERF_OUTPUT_SRCCODE = 1U << 30, - PERF_OUTPUT_IPC = 1U << 31, + PERF_OUTPUT_COMM = 1ULL << 0, + PERF_OUTPUT_TID = 1ULL << 1, + PERF_OUTPUT_PID = 1ULL << 2, + PERF_OUTPUT_TIME = 1ULL << 3, + PERF_OUTPUT_CPU = 1ULL << 4, + PERF_OUTPUT_EVNAME = 1ULL << 5, + PERF_OUTPUT_TRACE = 1ULL << 6, + PERF_OUTPUT_IP = 1ULL << 7, + PERF_OUTPUT_SYM = 1ULL << 8, + PERF_OUTPUT_DSO = 1ULL << 9, + PERF_OUTPUT_ADDR = 1ULL << 10, + PERF_OUTPUT_SYMOFFSET = 1ULL << 11, + PERF_OUTPUT_SRCLINE = 1ULL << 12, + PERF_OUTPUT_PERIOD = 1ULL << 13, + PERF_OUTPUT_IREGS = 1ULL << 14, + PERF_OUTPUT_BRSTACK = 1ULL << 15, + PERF_OUTPUT_BRSTACKSYM = 1ULL << 16, + PERF_OUTPUT_DATA_SRC = 1ULL << 17, + PERF_OUTPUT_WEIGHT = 1ULL << 18, + PERF_OUTPUT_BPF_OUTPUT = 1ULL << 19, + PERF_OUTPUT_CALLINDENT = 1ULL << 20, + PERF_OUTPUT_INSN = 1ULL << 21, + PERF_OUTPUT_INSNLEN = 1ULL << 22, + PERF_OUTPUT_BRSTACKINSN = 1ULL << 23, + PERF_OUTPUT_BRSTACKOFF = 1ULL << 24, + PERF_OUTPUT_SYNTH = 1ULL << 25, + PERF_OUTPUT_PHYS_ADDR = 1ULL << 26, + PERF_OUTPUT_UREGS = 1ULL << 27, + PERF_OUTPUT_METRIC = 1ULL << 28, + PERF_OUTPUT_MISC = 1ULL << 29, + PERF_OUTPUT_SRCCODE = 1ULL << 30, + PERF_OUTPUT_IPC = 1ULL << 31, + PERF_OUTPUT_TOD = 1ULL << 32, +}; + +struct perf_script { + struct perf_tool tool; + struct perf_session *session; + bool show_task_events; + bool show_mmap_events; + bool show_switch_events; + bool show_namespace_events; + bool show_lost_events; + bool show_round_events; + bool show_bpf_events; + bool show_cgroup_events; + bool show_text_poke_events; + bool allocated; + bool per_event_dump; + bool stitch_lbr; + struct evswitch evswitch; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + int name_width; + const char *time_str; + struct perf_time_interval *ptime_range; + int range_size; + int range_num; }; struct output_option { @@ -152,6 +178,7 @@ struct output_option { {.str = "misc", .field = PERF_OUTPUT_MISC}, {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, {.str = "ipc", .field = PERF_OUTPUT_IPC}, + {.str = "tod", .field = PERF_OUTPUT_TOD}, }; enum { @@ -388,7 +415,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char * return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false); } -static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session) +static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) { struct perf_event_attr *attr = &evsel->core.attr; bool allow_user_set; @@ -443,8 +470,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess return -EINVAL; } if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && - !(perf_evlist__combined_branch_type(session->evlist) & - PERF_SAMPLE_BRANCH_ANY)) { + !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" "Hint: run 'perf record -b ...'\n"); return -EINVAL; @@ -503,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) */ static int perf_session__check_output_opt(struct perf_session *session) { + bool tod = false; unsigned int j; struct evsel *evsel; @@ -522,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session) } if (evsel && output[j].fields && - perf_evsel__check_attr(evsel, session)) + evsel__check_attr(evsel, session)) return -1; if (evsel == NULL) continue; set_print_ip_opts(&evsel->core.attr); + tod |= output[j].fields & PERF_OUTPUT_TOD; } if (!no_callchain) { @@ -569,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session) } } + if (tod && !session->header.env.clock.enabled) { + pr_err("Can't provide 'tod' time, missing clock data. " + "Please record with -k/--clockid option.\n"); + return -1; + } out: return 0; } static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, - FILE *fp -) + FILE *fp) { unsigned i = 0, r; int printed = 0; @@ -593,6 +625,56 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, return printed; } +#define DEFAULT_TOD_FMT "%F %H:%M:%S" + +static char* +tod_scnprintf(struct perf_script *script, char *buf, int buflen, + u64 timestamp) +{ + u64 tod_ns, clockid_ns; + struct perf_env *env; + unsigned long nsec; + struct tm ltime; + char date[64]; + time_t sec; + + buf[0] = '\0'; + if (buflen < 64 || !script) + return buf; + + env = &script->session->header.env; + if (!env->clock.enabled) { + scnprintf(buf, buflen, "disabled"); + return buf; + } + + clockid_ns = env->clock.clockid_ns; + tod_ns = env->clock.tod_ns; + + if (timestamp > clockid_ns) + tod_ns += timestamp - clockid_ns; + else + tod_ns -= clockid_ns - timestamp; + + sec = (time_t) (tod_ns / NSEC_PER_SEC); + nsec = tod_ns - sec * NSEC_PER_SEC; + + if (localtime_r(&sec, <ime) == NULL) { + scnprintf(buf, buflen, "failed"); + } else { + strftime(date, sizeof(date), DEFAULT_TOD_FMT, <ime); + + if (symbol_conf.nanosecs) { + snprintf(buf, buflen, "%s.%09lu", date, nsec); + } else { + snprintf(buf, buflen, "%s.%06lu", + date, nsec / NSEC_PER_USEC); + } + } + + return buf; +} + static int perf_sample__fprintf_iregs(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { @@ -607,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, attr->sample_regs_user, fp); } -static int perf_sample__fprintf_start(struct perf_sample *sample, +static int perf_sample__fprintf_start(struct perf_script *script, + struct perf_sample *sample, struct thread *thread, struct evsel *evsel, u32 type, FILE *fp) @@ -616,6 +699,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, unsigned long secs; unsigned long long nsecs; int printed = 0; + char tstr[128]; if (PRINT_FIELD(COMM)) { if (latency_format) @@ -684,6 +768,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += ret; } + if (PRINT_FIELD(TOD)) { + tod_scnprintf(script, tstr, sizeof(tstr), sample->time); + printed += fprintf(fp, "%s ", tstr); + } + if (PRINT_FIELD(TIME)) { u64 t = sample->time; if (reltime) { @@ -1668,31 +1757,7 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } -struct perf_script { - struct perf_tool tool; - struct perf_session *session; - bool show_task_events; - bool show_mmap_events; - bool show_switch_events; - bool show_namespace_events; - bool show_lost_events; - bool show_round_events; - bool show_bpf_events; - bool show_cgroup_events; - bool allocated; - bool per_event_dump; - bool stitch_lbr; - struct evswitch evswitch; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; - int name_width; - const char *time_str; - struct perf_time_interval *ptime_range; - int range_size; - int range_num; -}; - -static int perf_evlist__max_name_len(struct evlist *evlist) +static int evlist__max_name_len(struct evlist *evlist) { struct evsel *evsel; int max = 0; @@ -1739,7 +1804,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused, if (!fmt) return; - perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel, PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) @@ -1754,7 +1819,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused, { struct metric_ctx *mctx = ctx; - perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel, PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1865,7 +1930,7 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, + perf_sample__fprintf_start(script, sample, thread, evsel, PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) @@ -1875,7 +1940,7 @@ static void process_event(struct perf_script *script, const char *evname = evsel__name(evsel); if (!script->name_width) - script->name_width = perf_evlist__max_name_len(script->session->evlist); + script->name_width = evlist__max_name_len(script->session->evlist); fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]"); } @@ -2120,7 +2185,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, } if (evsel->core.attr.sample_type) { - err = perf_evsel__check_attr(evsel, scr->session); + err = evsel__check_attr(evsel, scr->session); if (err) return err; } @@ -2129,7 +2194,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * Check if we need to enable callchains based * on events sample_type. */ - sample_type = perf_evlist__combined_sample_type(evlist); + sample_type = evlist__combined_sample_type(evlist); callchain_param_setup(sample_type); /* Enable fields for callchain entries */ @@ -2174,11 +2239,11 @@ static int print_event_with_time(struct perf_tool *tool, thread = machine__findnew_thread(machine, pid, tid); if (thread && evsel) { - perf_sample__fprintf_start(sample, thread, evsel, + perf_sample__fprintf_start(script, sample, thread, evsel, event->header.type, stdout); } - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, machine, stdout); thread__put(thread); @@ -2313,7 +2378,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused, struct ordered_events *oe __maybe_unused) { - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, NULL, stdout); return 0; } @@ -2330,6 +2395,18 @@ process_bpf_events(struct perf_tool *tool __maybe_unused, sample->tid); } +static int process_text_poke_events(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + if (perf_event__process_text_poke(tool, event, sample, machine) < 0) + return -1; + + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2438,6 +2515,10 @@ static int __cmd_script(struct perf_script *script) script->tool.ksymbol = process_bpf_events; script->tool.bpf = process_bpf_events; } + if (script->show_text_poke_events) { + script->tool.ksymbol = process_bpf_events; + script->tool.text_poke = process_text_poke_events; + } if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3171,7 +3252,7 @@ static int have_cmd(int argc, const char **argv) static void script__setup_sample_type(struct perf_script *script) { struct perf_session *session = script->session; - u64 sample_type = perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = evlist__combined_sample_type(session->evlist); if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -3423,7 +3504,7 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), @@ -3474,6 +3555,8 @@ int cmd_script(int argc, const char **argv) "Show round events (if recorded)"), OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events, "Show bpf related events (if recorded)"), + OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events, + "Show text poke related events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9be020e0098ad..483a28ef4ec4f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -188,6 +188,8 @@ static struct perf_stat_config stat_config = { .metric_only_len = METRIC_ONLY_LEN, .walltime_nsecs_stats = &walltime_nsecs_stats, .big_num = true, + .ctl_fd = -1, + .ctl_fd_ack = -1 }; static bool cpus_map_matched(struct evsel *a, struct evsel *b) @@ -475,18 +477,38 @@ static void process_interval(void) print_counters(&rs, 0, NULL); } +static bool handle_interval(unsigned int interval, int *times) +{ + if (interval) { + process_interval(); + if (interval_count && !(--(*times))) + return true; + } + return false; +} + static void enable_counters(void) { - if (stat_config.initial_delay) + if (stat_config.initial_delay < 0) { + pr_info(EVLIST_DISABLED_MSG); + return; + } + + if (stat_config.initial_delay > 0) { + pr_info(EVLIST_DISABLED_MSG); usleep(stat_config.initial_delay * USEC_PER_MSEC); + } /* * We need to enable counters only if: * - we don't have tracee (attaching to task or cpu) * - we have initial delay configured */ - if (!target__none(&target) || stat_config.initial_delay) + if (!target__none(&target) || stat_config.initial_delay) { evlist__enable(evsel_list); + if (stat_config.initial_delay > 0) + pr_info(EVLIST_ENABLED_MSG); + } } static void disable_counters(void) @@ -540,6 +562,86 @@ static bool is_target_alive(struct target *_target, return false; } +static void process_evlist(struct evlist *evlist, unsigned int interval) +{ + enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED; + + if (evlist__ctlfd_process(evlist, &cmd) > 0) { + switch (cmd) { + case EVLIST_CTL_CMD_ENABLE: + pr_info(EVLIST_ENABLED_MSG); + if (interval) + process_interval(); + break; + case EVLIST_CTL_CMD_DISABLE: + if (interval) + process_interval(); + pr_info(EVLIST_DISABLED_MSG); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + break; + } + } +} + +static void compute_tts(struct timespec *time_start, struct timespec *time_stop, + int *time_to_sleep) +{ + int tts = *time_to_sleep; + struct timespec time_diff; + + diff_timespec(&time_diff, time_stop, time_start); + + tts -= time_diff.tv_sec * MSEC_PER_SEC + + time_diff.tv_nsec / NSEC_PER_MSEC; + + if (tts < 0) + tts = 0; + + *time_to_sleep = tts; +} + +static int dispatch_events(bool forks, int timeout, int interval, int *times) +{ + int child_exited = 0, status = 0; + int time_to_sleep, sleep_time; + struct timespec time_start, time_stop; + + if (interval) + sleep_time = interval; + else if (timeout) + sleep_time = timeout; + else + sleep_time = 1000; + + time_to_sleep = sleep_time; + + while (!done) { + if (forks) + child_exited = waitpid(child_pid, &status, WNOHANG); + else + child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0; + + if (child_exited) + break; + + clock_gettime(CLOCK_MONOTONIC, &time_start); + if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */ + if (timeout || handle_interval(interval, times)) + break; + time_to_sleep = sleep_time; + } else { /* fd revent */ + process_evlist(evsel_list, interval); + clock_gettime(CLOCK_MONOTONIC, &time_stop); + compute_tts(&time_start, &time_stop, &time_to_sleep); + } + } + + return status; +} + enum counter_recovery { COUNTER_SKIP, COUNTER_RETRY, @@ -603,7 +705,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) char msg[BUFSIZ]; unsigned long long t0, t1; struct evsel *counter; - struct timespec ts; size_t l; int status = 0; const bool forks = (argc > 0); @@ -612,17 +713,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int i, cpu; bool second_pass = false; - if (interval) { - ts.tv_sec = interval / USEC_PER_MSEC; - ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; - } else if (timeout) { - ts.tv_sec = timeout / USEC_PER_MSEC; - ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC; - } else { - ts.tv_sec = 1; - ts.tv_nsec = 0; - } - if (forks) { if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { @@ -779,16 +869,8 @@ try_again_reset: perf_evlist__start_workload(evsel_list); enable_counters(); - if (interval || timeout) { - while (!waitpid(child_pid, &status, WNOHANG)) { - nanosleep(&ts, NULL); - if (timeout) - break; - process_interval(); - if (interval_count && !(--times)) - break; - } - } + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) + status = dispatch_events(forks, timeout, interval, ×); if (child_pid != -1) { if (timeout) kill(child_pid, SIGTERM); @@ -805,18 +887,7 @@ try_again_reset: psignal(WTERMSIG(status), argv[0]); } else { enable_counters(); - while (!done) { - nanosleep(&ts, NULL); - if (!is_target_alive(&target, evsel_list->core.threads)) - break; - if (timeout) - break; - if (interval) { - process_interval(); - if (interval_count && !(--times)) - break; - } - } + status = dispatch_events(forks, timeout, interval, ×); } disable_counters(); @@ -970,6 +1041,33 @@ static int parse_metric_groups(const struct option *opt, &stat_config.metric_events); } +static int parse_control_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + char *comma = NULL, *endptr = NULL; + struct perf_stat_config *config = (struct perf_stat_config *)opt->value; + + if (strncmp(str, "fd:", 3)) + return -EINVAL; + + config->ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1041,8 +1139,8 @@ static struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode, "aggregate counts per numa node", AGGR_NODE), - OPT_UINTEGER('D', "delay", &stat_config.initial_delay, - "ms to wait before starting measurement after program start"), + OPT_INTEGER('D', "delay", &stat_config.initial_delay, + "ms to wait before starting measurement after program start (-1: start with events disabled)"), OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group, @@ -1071,6 +1169,10 @@ static struct option stat_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif + OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]", + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + parse_control_option), OPT_END() }; @@ -1679,19 +1781,17 @@ static int add_default_attributes(void) if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; - if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { - if (perf_evlist__add_default_attrs(evsel_list, - frontend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) return -1; } if (pmu_have_event("cpu", "stalled-cycles-backend")) { - if (perf_evlist__add_default_attrs(evsel_list, - backend_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) return -1; } - if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) + if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1701,21 +1801,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) + if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } static const char * const stat_record_usage[] = { @@ -2242,6 +2342,9 @@ int cmd_stat(int argc, const char **argv) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); + if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack)) + goto out; + status = 0; for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { if (stat_config.run_count != 1 && verbose > 0) @@ -2261,6 +2364,8 @@ int cmd_stat(int argc, const char **argv) if (!forever && status != -1 && (!interval || stat_config.summary)) print_counters(NULL, argc, argv); + evlist__finalize_ctlfd(evsel_list); + if (STAT_RECORD) { /* * We synthesize the kernel mmap record just so that older tools @@ -2307,6 +2412,7 @@ out: evlist__delete(evsel_list); + metricgroup__rblist_exit(&stat_config.metric_events); runtime_stat_delete(&stat_config); return status; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 13889d73f8dd5..994c230027bb2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1627,7 +1627,7 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; if (!top.evlist->core.nr_entries && - perf_evlist__add_default(top.evlist) < 0) { + evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); goto out_delete_evlist; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4cbb64edc9983..bea461b6f937b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3917,8 +3917,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) + evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; /* * If a global cgroup was set, apply it to all the events without an @@ -4150,11 +4149,11 @@ out_error_raw_syscalls: goto out_error; out_error_mmap: - perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); goto out_error; out_error_open: - perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); out_error: fprintf(trace->output, "%s\n", errbuf); @@ -4813,7 +4812,7 @@ int cmd_trace(int argc, const char **argv) "per thread proc mmap processing timeout in ms"), OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only", trace__parse_cgroups), - OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, + OPT_INTEGER('D', "delay", &trace.opts.initial_delay, "ms to wait before starting measurement after program " "start"), OPTS_EVSWITCH(&trace.evswitch), diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json index c121e526442a3..8383a37647ad0 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -15,5 +15,40 @@ "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", "MetricName": "PowerBUS_Frequency", "ScaleUnit": "2.5e-7GHz" + }, + { + "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@", + "MetricName" : "mcs01-read", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@", + "MetricName" : "mcs23-read", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@", + "MetricName" : "mcs01-write", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@", + "MetricName" : "mcs23-write", + "MetricGroup" : "memory-bandwidth", + "ScaleUnit": "6.1e-5MB" + }, + { + "MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@", + "MetricName" : "powerbus_freq", + "ScaleUnit": "1e-9GHz" + }, + { + "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)", + "MetricName" : "Memory-bandwidth-MCS", + "MetricGroup" : "memory_bw", + "ScaleUnit": "6.1e-5MB" } ] diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index cd00498a5dce6..84352fc49a205 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -59,6 +59,7 @@ perf-y += genelf.o perf-y += api-io.o perf-y += demangle-java-test.o perf-y += pfm.o +perf-y += parse-metric.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README index 430024f618f1b..6cd4081085955 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/attr/README @@ -53,6 +53,7 @@ Following tests are defined (with perf commands): perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) perf record -c 100 -P kill (test-record-period) + perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period) perf record -R kill (test-record-raw) perf stat -e cycles kill (test-stat-basic) perf stat kill (test-stat-default) diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period new file mode 100644 index 0000000000000..368f5b8140943 --- /dev/null +++ b/tools/perf/tests/attr/test-record-pfm-period @@ -0,0 +1,9 @@ +[config] +command = record +args = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1 +ret = 1 + +[event:base-record] +sample_period=77777 +sample_type=7 +freq=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index da5b6cc23f25b..d328caaba45d4 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -338,6 +338,10 @@ static struct test generic_tests[] = { .func = test__demangle_java, }, { + .desc = "Parse and process metrics", + .func = test__parse_metric, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6fe221d31f07e..035c9123549a9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore) if (verbose > 0) { char errbuf[512]; - perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 1cb02ca2b15fb..4d01051951cd2 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -18,14 +18,15 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2) int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) { + struct expr_id_data *val_ptr; const char *p; - double val, *val_ptr; + double val; int ret; struct expr_parse_ctx ctx; expr__ctx_init(&ctx); - expr__add_id(&ctx, strdup("FOO"), 1); - expr__add_id(&ctx, strdup("BAR"), 2); + expr__add_id_val(&ctx, strdup("FOO"), 1); + expr__add_id_val(&ctx, strdup("BAR"), 2); ret = test(&ctx, "1+1", 2); ret |= test(&ctx, "FOO+BAR", 3); @@ -39,6 +40,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) ret |= test(&ctx, "1+1 if 3*4 else 0", 2); ret |= test(&ctx, "1.1 + 2.1", 3.2); ret |= test(&ctx, ".1 + 2.", 2.1); + ret |= test(&ctx, "d_ratio(1, 2)", 0.5); + ret |= test(&ctx, "d_ratio(2.5, 0)", 0); + ret |= test(&ctx, "1.1 < 2.2", 1); + ret |= test(&ctx, "2.2 > 1.1", 1); + ret |= test(&ctx, "1.1 < 1.1", 0); + ret |= test(&ctx, "2.2 > 2.2", 0); + ret |= test(&ctx, "2.2 < 1.1", 0); + ret |= test(&ctx, "1.1 > 2.2", 0); if (ret) return ret; diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index c7c81c4a5b2b5..d9eca8e86a6b0 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents) for (fd = 0; fd < fda->nr; ++fd) { fda->entries[fd].fd = fda->nr - fd; + fda->entries[fd].events = revents; fda->entries[fd].revents = revents; } } @@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused) { - int nr_fds, expected_fd[2], fd, err = TEST_FAIL; + int nr_fds, err = TEST_FAIL; struct fdarray *fda = fdarray__new(5, 5); if (fda == NULL) { @@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ fdarray__init_revents(fda, POLLHUP); fda->entries[2].revents = POLLIN; - expected_fd[0] = fda->entries[2].fd; pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); @@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ goto out_delete; } - if (fda->entries[0].fd != expected_fd[0]) { - pr_debug("\nfda->entries[0].fd=%d != %d\n", - fda->entries[0].fd, expected_fd[0]); - goto out_delete; - } - fdarray__init_revents(fda, POLLHUP); fda->entries[0].revents = POLLIN; - expected_fd[0] = fda->entries[0].fd; fda->entries[3].revents = POLLIN; - expected_fd[1] = fda->entries[3].fd; pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); @@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_ goto out_delete; } - for (fd = 0; fd < 2; ++fd) { - if (fda->entries[fd].fd != expected_fd[fd]) { - pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd, - fda->entries[fd].fd, expected_fd[fd]); - goto out_delete; - } - } - pr_debug("\n"); err = 0; @@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu } #define FDA_ADD(_idx, _fd, _revents, _nr) \ - if (fdarray__add(fda, _fd, _revents) < 0) { \ + if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \ pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \ __LINE__,_fd, _revents); \ goto out_delete; \ diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 895188b63f963..7f9f87a470c3b 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -631,6 +631,34 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong val", term->val.num == 1); TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask")); + /* + * read + * + * The perf_pmu__test_parse_init injects 'read' term into + * perf_pmu_events_list, so 'read' is evaluated as read term + * and not as raw event with 'ead' hex value. + */ + term = list_entry(term->list.next, struct parse_events_term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_USER); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 1); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read")); + + /* + * r0xead + * + * To be still able to pass 'ead' value with 'r' syntax, + * we added support to parse 'r0xHEX' event. + */ + term = list_entry(term->list.next, struct parse_events_term, list); + TEST_ASSERT_VAL("wrong type term", + term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); + TEST_ASSERT_VAL("wrong type val", + term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); + TEST_ASSERT_VAL("wrong config", !term->config); return 0; } @@ -1766,6 +1794,11 @@ static struct evlist_test test__events_pmu[] = { .check = test__checkevent_raw_pmu, .id = 4, }, + { + .name = "software/r0x1a/", + .check = test__checkevent_raw_pmu, + .id = 4, + }, }; struct terms_test { @@ -1776,7 +1809,7 @@ struct terms_test { static struct terms_test test__terms[] = { [0] = { - .str = "config=10,config1,config2=3,umask=1", + .str = "config=10,config1,config2=3,umask=1,read,r0xead", .check = test__checkterms_simple, }, }; @@ -1836,6 +1869,13 @@ static int test_term(struct terms_test *t) INIT_LIST_HEAD(&terms); + /* + * The perf_pmu__test_parse_init prepares perf_pmu_events_list + * which gets freed in parse_events_terms. + */ + if (perf_pmu__test_parse_init()) + return -1; + ret = parse_events_terms(&terms, t->str); if (ret) { pr_debug("failed to parse terms '%s', err %d\n", diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c new file mode 100644 index 0000000000000..fc0838a7abc22 --- /dev/null +++ b/tools/perf/tests/parse-metric.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <string.h> +#include <perf/cpumap.h> +#include <perf/evlist.h> +#include "metricgroup.h" +#include "tests.h" +#include "pmu-events/pmu-events.h" +#include "evlist.h" +#include "rblist.h" +#include "debug.h" +#include "expr.h" +#include "stat.h" +#include <perf/cpumap.h> +#include <perf/evlist.h> + +static struct pmu_event pme_test[] = { +{ + .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread", + .metric_name = "IPC", + .metric_group = "group1", +}, +{ + .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * " + "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))", + .metric_name = "Frontend_Bound_SMT", +}, +{ + .metric_expr = "l1d\\-loads\\-misses / inst_retired.any", + .metric_name = "dcache_miss_cpi", +}, +{ + .metric_expr = "l1i\\-loads\\-misses / inst_retired.any", + .metric_name = "icache_miss_cycles", +}, +{ + .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)", + .metric_name = "cache_miss_cycles", + .metric_group = "group1", +}, +{ + .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit", + .metric_name = "DCache_L2_All_Hits", +}, +{ + .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + " + "l2_rqsts.pf_miss + l2_rqsts.rfo_miss", + .metric_name = "DCache_L2_All_Miss", +}, +{ + .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss", + .metric_name = "DCache_L2_All", +}, +{ + .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)", + .metric_name = "DCache_L2_Hits", +}, +{ + .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)", + .metric_name = "DCache_L2_Misses", +}, +{ + .metric_expr = "ipc + m2", + .metric_name = "M1", +}, +{ + .metric_expr = "ipc + m1", + .metric_name = "M2", +}, +{ + .metric_expr = "1/m3", + .metric_name = "M3", +} +}; + +static struct pmu_events_map map = { + .cpuid = "test", + .version = "1", + .type = "core", + .table = pme_test, +}; + +struct value { + const char *event; + u64 val; +}; + +static u64 find_value(const char *name, struct value *values) +{ + struct value *v = values; + + while (v->event) { + if (!strcmp(name, v->event)) + return v->val; + v++; + }; + return 0; +} + +static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, + struct value *vals) +{ + struct evsel *evsel; + u64 count; + + evlist__for_each_entry(evlist, evsel) { + count = find_value(evsel->name, vals); + perf_stat__update_shadow_stats(evsel, count, 0, st); + } +} + +static double compute_single(struct rblist *metric_events, struct evlist *evlist, + struct runtime_stat *st, const char *name) +{ + struct metric_expr *mexp; + struct metric_event *me; + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + me = metricgroup__lookup(metric_events, evsel, false); + if (me != NULL) { + list_for_each_entry (mexp, &me->head, nd) { + if (strcmp(mexp->metric_name, name)) + continue; + return test_generic_metric(mexp, 0, st); + } + } + } + return 0.; +} + +static int __compute_metric(const char *name, struct value *vals, + const char *name1, double *ratio1, + const char *name2, double *ratio2) +{ + struct rblist metric_events = { + .nr_entries = 0, + }; + struct perf_cpu_map *cpus; + struct runtime_stat st; + struct evlist *evlist; + int err; + + /* + * We need to prepare evlist for stat mode running on CPU 0 + * because that's where all the stats are going to be created. + */ + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + cpus = perf_cpu_map__new("0"); + if (!cpus) + return -ENOMEM; + + perf_evlist__set_maps(&evlist->core, cpus, NULL); + + /* Parse the metric into metric_events list. */ + err = metricgroup__parse_groups_test(evlist, &map, name, + false, false, + &metric_events); + if (err) + return err; + + if (perf_evlist__alloc_stats(evlist, false)) + return -1; + + /* Load the runtime stats with given numbers for events. */ + runtime_stat__init(&st); + load_runtime_stat(&st, evlist, vals); + + /* And execute the metric */ + if (name1 && ratio1) + *ratio1 = compute_single(&metric_events, evlist, &st, name1); + if (name2 && ratio2) + *ratio2 = compute_single(&metric_events, evlist, &st, name2); + + /* ... clenup. */ + metricgroup__rblist_exit(&metric_events); + runtime_stat__exit(&st); + perf_evlist__free_stats(evlist); + perf_cpu_map__put(cpus); + evlist__delete(evlist); + return 0; +} + +static int compute_metric(const char *name, struct value *vals, double *ratio) +{ + return __compute_metric(name, vals, name, ratio, NULL, NULL); +} + +static int compute_metric_group(const char *name, struct value *vals, + const char *name1, double *ratio1, + const char *name2, double *ratio2) +{ + return __compute_metric(name, vals, name1, ratio1, name2, ratio2); +} + +static int test_ipc(void) +{ + double ratio; + struct value vals[] = { + { .event = "inst_retired.any", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("IPC", vals, &ratio) == 0); + + TEST_ASSERT_VAL("IPC failed, wrong ratio", + ratio == 1.5); + return 0; +} + +static int test_frontend(void) +{ + double ratio; + struct value vals[] = { + { .event = "idq_uops_not_delivered.core", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 }, + { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0); + + TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio", + ratio == 0.45); + return 0; +} + +static int test_cache_miss_cycles(void) +{ + double ratio; + struct value vals[] = { + { .event = "l1d-loads-misses", .val = 300 }, + { .event = "l1i-loads-misses", .val = 200 }, + { .event = "inst_retired.any", .val = 400 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("cache_miss_cycles", vals, &ratio) == 0); + + TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio", + ratio == 1.25); + return 0; +} + + +/* + * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi + * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + + * l2_rqsts.pf_miss + l2_rqsts.rfo_miss + * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss + * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all) + * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all) + * + * l2_rqsts.demand_data_rd_hit = 100 + * l2_rqsts.pf_hit = 200 + * l2_rqsts.rfo_hi = 300 + * l2_rqsts.all_demand_data_rd = 400 + * l2_rqsts.pf_miss = 500 + * l2_rqsts.rfo_miss = 600 + * + * DCache_L2_All_Hits = 600 + * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400 + * DCache_L2_All = 600 + 1400 = 2000 + * DCache_L2_Hits = 600 / 2000 = 0.3 + * DCache_L2_Misses = 1400 / 2000 = 0.7 + */ +static int test_dcache_l2(void) +{ + double ratio; + struct value vals[] = { + { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 }, + { .event = "l2_rqsts.pf_hit", .val = 200 }, + { .event = "l2_rqsts.rfo_hit", .val = 300 }, + { .event = "l2_rqsts.all_demand_data_rd", .val = 400 }, + { .event = "l2_rqsts.pf_miss", .val = 500 }, + { .event = "l2_rqsts.rfo_miss", .val = 600 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("DCache_L2_Hits", vals, &ratio) == 0); + + TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio", + ratio == 0.3); + + TEST_ASSERT_VAL("failed to compute metric", + compute_metric("DCache_L2_Misses", vals, &ratio) == 0); + + TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio", + ratio == 0.7); + return 0; +} + +static int test_recursion_fail(void) +{ + double ratio; + struct value vals[] = { + { .event = "inst_retired.any", .val = 300 }, + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric("M1", vals, &ratio) == -1); + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric("M3", vals, &ratio) == -1); + return 0; +} + +static int test_metric_group(void) +{ + double ratio1, ratio2; + struct value vals[] = { + { .event = "cpu_clk_unhalted.thread", .val = 200 }, + { .event = "l1d-loads-misses", .val = 300 }, + { .event = "l1i-loads-misses", .val = 200 }, + { .event = "inst_retired.any", .val = 400 }, + { .event = NULL, }, + }; + + TEST_ASSERT_VAL("failed to find recursion", + compute_metric_group("group1", vals, + "IPC", &ratio1, + "cache_miss_cycles", &ratio2) == 0); + + TEST_ASSERT_VAL("group IPC failed, wrong ratio", + ratio1 == 2.0); + + TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio", + ratio2 == 1.25); + return 0; +} + +int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); + TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); + TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); + TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); + TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); + TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); + return 0; +} diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 83adfd846ccda..67d3f5aad0167 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { if (verbose > 0) - perf_event__fprintf(event, stderr); + perf_event__fprintf(event, NULL, stderr); pr_debug("Couldn't parse sample\n"); goto out_delete_evlist; } if (verbose > 0) { pr_info("%" PRIu64" %d ", sample.time, sample.cpu); - perf_event__fprintf(event, stderr); + perf_event__fprintf(event, NULL, stderr); } if (prev_time > sample.time) { diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index ab64b4a4e2848..eb19f9a0bc153 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -390,9 +390,9 @@ static bool is_number(const char *str) return errno == 0 && end_ptr != str; } -static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) +static int check_parse_id(const char *id, struct parse_events_error *error, + struct perf_pmu *fake_pmu) { - struct parse_events_error error; struct evlist *evlist; int ret; @@ -401,8 +401,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) return 0; evlist = evlist__new(); - memset(&error, 0, sizeof(error)); - ret = parse_events(evlist, id, &error); + if (!evlist) + return -ENOMEM; + ret = __parse_events(evlist, id, error, fake_pmu); + evlist__delete(evlist); + return ret; +} + +static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) +{ + struct parse_events_error error = { .idx = 0, }; + + int ret = check_parse_id(id, &error, NULL); if (ret && same_cpu) { pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", pe->metric_name, id, pe->metric_expr); @@ -413,7 +423,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe) id, pe->metric_name, pe->metric_expr); ret = 0; } - evlist__delete(evlist); + free(error.str); + free(error.help); + free(error.first_str); + free(error.first_help); + return ret; +} + +static int check_parse_fake(const char *id) +{ + struct parse_events_error error = { .idx = 0, }; + int ret = check_parse_id(id, &error, &perf_pmu__fake); + free(error.str); free(error.help); free(error.first_str); @@ -471,10 +492,10 @@ static int test_parsing(void) */ k = 1; hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id(&ctx, strdup(cur->key), k++); + expr__add_id_val(&ctx, strdup(cur->key), k++); hashmap__for_each_entry((&ctx.ids), cur, bkt) { - if (check_parse_id(cur->key, map == cpus_map, + if (check_parse_cpu(cur->key, map == cpus_map, pe)) ret++; } @@ -490,6 +511,100 @@ static int test_parsing(void) return ret == 0 ? TEST_OK : TEST_SKIP; } +struct test_metric { + const char *str; +}; + +static struct test_metric metrics[] = { + { "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." }, + { "imx8_ddr0@read\\-cycles@ * 4 * 4", }, + { "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", }, + { "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", }, + { "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", }, +}; + +static int metric_parse_fake(const char *str) +{ + struct expr_parse_ctx ctx; + struct hashmap_entry *cur; + double result; + int ret = -1; + size_t bkt; + int i; + + pr_debug("parsing '%s'\n", str); + + expr__ctx_init(&ctx); + if (expr__find_other(str, NULL, &ctx, 0) < 0) { + pr_err("expr__find_other failed\n"); + return -1; + } + + /* + * Add all ids with a made up value. The value may + * trigger divide by zero when subtracted and so try to + * make them unique. + */ + i = 1; + hashmap__for_each_entry((&ctx.ids), cur, bkt) + expr__add_id_val(&ctx, strdup(cur->key), i++); + + hashmap__for_each_entry((&ctx.ids), cur, bkt) { + if (check_parse_fake(cur->key)) { + pr_err("check_parse_fake failed\n"); + goto out; + } + } + + if (expr__parse(&result, &ctx, str, 1)) + pr_err("expr__parse failed\n"); + else + ret = 0; + +out: + expr__ctx_clear(&ctx); + return ret; +} + +/* + * Parse all the metrics for current architecture, + * or all defined cpus via the 'fake_pmu' + * in parse_events. + */ +static int test_parsing_fake(void) +{ + struct pmu_events_map *map; + struct pmu_event *pe; + unsigned int i, j; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(metrics); i++) { + err = metric_parse_fake(metrics[i].str); + if (err) + return err; + } + + i = 0; + for (;;) { + map = &pmu_events_map[i++]; + if (!map->table) + break; + j = 0; + for (;;) { + pe = &map->table[j++]; + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + err = metric_parse_fake(pe->metric_expr); + if (err) + return err; + } + } + + return 0; +} + static const struct { int (*func)(void); const char *desc; @@ -506,6 +621,10 @@ static const struct { .func = test_parsing, .desc = "Parsing of PMU event table metrics", }, + { + .func = test_parsing_fake, + .desc = "Parsing of PMU event table metrics with fake PMUs", + }, }; const char *test__pmu_events_subtest_get_desc(int subtest) diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 76a4e352eaaf7..4447a516c6897 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -121,6 +121,7 @@ int test__demangle_java(struct test *test, int subtest); int test__pfm(struct test *test, int subtest); const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); +int test__parse_metric(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9023267e56433..bd77825fd5a15 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, from - 1, - to > from ? true : false); + to > from); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8d18380ecd10e..494626e303f5c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -128,6 +128,7 @@ perf-y += expr-bison.o perf-y += expr.o perf-y += branch.o perf-y += mem2node.o +perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -191,36 +192,60 @@ CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \ + --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y +$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ -$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c +$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \ + --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/expr-bison.c: util/expr.y +$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/expr-bison.c -p expr_ -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c +$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) - $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \ + --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/pmu-bison.c: util/pmu.y +$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ - -CFLAGS_parse-events-flex.o += -w -CFLAGS_pmu-flex.o += -w -CFLAGS_expr-flex.o += -w -CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w -CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w -CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \ + -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ + +FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26) +ifeq ($(FLEX_GE_26),1) + flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations + CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?) + ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1) + flex_flags += -Wno-misleading-indentation + endif +else + flex_flags := -w +endif +CFLAGS_parse-events-flex.o += $(flex_flags) +CFLAGS_pmu-flex.o += $(flex_flags) +CFLAGS_expr-flex.o += $(flex_flags) + +bison_flags := -DYYENABLE_NLS=0 +BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) +ifeq ($(BISON_GE_35),1) + bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum +else + bison_flags += -w +endif +CFLAGS_parse-events-bison.o += $(bison_flags) +CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) +CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 76bfb4a9d94e9..0a1fcf7875382 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1621,6 +1621,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil char *build_id_filename; char *build_id_path = NULL; char *pos; + int len; if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && !dso__is_kcore(dso)) @@ -1649,10 +1650,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) dirname(build_id_path); - if (dso__is_kcore(dso) || - readlink(build_id_path, linkname, sizeof(linkname)) < 0 || - strstr(linkname, DSO__NAME_KALLSYMS) || - access(filename, R_OK)) { + if (dso__is_kcore(dso)) + goto fallback; + + len = readlink(build_id_path, linkname, sizeof(linkname) - 1); + if (len < 0) + goto fallback; + + linkname[len] = '\0'; + if (strstr(linkname, DSO__NAME_KALLSYMS) || + access(filename, R_OK)) { fallback: /* * If we don't have build-ids or the build-id file isn't in the diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 25c639ac4ad43..42a85c86421d7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1349,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->initial_skip = 0; } +static int get_flag(const char **ptr, unsigned int *flags) +{ + while (1) { + char c = **ptr; + + if (c >= 'a' && c <= 'z') { + *flags |= 1 << (c - 'a'); + ++*ptr; + return 0; + } else if (c == ' ') { + ++*ptr; + continue; + } else { + return -1; + } + } +} + +static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags) +{ + while (1) { + switch (**ptr) { + case '+': + ++*ptr; + if (get_flag(ptr, plus_flags)) + return -1; + break; + case '-': + ++*ptr; + if (get_flag(ptr, minus_flags)) + return -1; + break; + case ' ': + ++*ptr; + break; + default: + return 0; + } + } +} + /* * Please check tools/perf/Documentation/perf-script.txt for information * about the options parsed here, which is introduced after this cset, @@ -1436,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, break; case 'e': synth_opts->errors = true; + if (get_flags(&p, &synth_opts->error_plus_flags, + &synth_opts->error_minus_flags)) + goto out_err; break; case 'd': synth_opts->log = true; + if (get_flags(&p, &synth_opts->log_plus_flags, + &synth_opts->log_minus_flags)) + goto out_err; break; case 'c': synth_opts->branches = true; @@ -1507,6 +1554,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'a': synth_opts->remote_access = true; break; + case 'q': + synth_opts->quick += 1; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 142ccf7d34dfa..951d2d14cf249 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -55,6 +55,11 @@ enum itrace_period_type { PERF_ITRACE_PERIOD_NANOSECS, }; +#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a')) +#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) + +#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) + /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set @@ -91,6 +96,11 @@ enum itrace_period_type { * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all * @ptime_range: time intervals to trace or NULL * @range_num: number of time intervals to trace + * @error_plus_flags: flags to affect what errors are reported + * @error_minus_flags: flags to affect what errors are reported + * @log_plus_flags: flags to affect what is logged + * @log_minus_flags: flags to affect what is logged + * @quick: quicker (less detailed) decoding */ struct itrace_synth_opts { bool set; @@ -124,6 +134,11 @@ struct itrace_synth_opts { unsigned long *cpu_bitmap; struct perf_time_interval *ptime_range; int range_num; + unsigned int error_plus_flags; + unsigned int error_minus_flags; + unsigned int log_plus_flags; + unsigned int log_minus_flags; + unsigned int quick; }; /** @@ -604,22 +619,32 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, struct evsel *evsel); #define ITRACE_HELP \ -" i: synthesize instructions events\n" \ +" i[period]: synthesize instructions events\n" \ " b: synthesize branches events (branch misses for Arm SPE)\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ " w: synthesize ptwrite events\n" \ " p: synthesize power events\n" \ -" e: synthesize error events\n" \ -" d: create a debug log\n" \ +" o: synthesize other events recorded due to the use\n" \ +" of aux-output (refer to perf record)\n" \ +" e[flags]: synthesize error events\n" \ +" each flag must be preceded by + or -\n" \ +" error flags are: o (overflow)\n" \ +" l (data lost)\n" \ +" d[flags]: create a debug log\n" \ +" each flag must be preceded by + or -\n" \ +" log flags are: a (all perf events)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ " a: synthesize remote access events\n" \ " g[len]: synthesize a call chain (use with i or x)\n" \ +" G[len]: synthesize a call chain on existing event records\n" \ " l[len]: synthesize last branch entries (use with i or x)\n" \ +" L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ +" q: quicker (less detailed) decoding\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c new file mode 100644 index 0000000000000..74365a5d99c1f --- /dev/null +++ b/tools/perf/util/clockid.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <subcmd/parse-options.h> +#include <stdio.h> +#include <time.h> +#include <strings.h> +#include <linux/time64.h> +#include "debug.h" +#include "clockid.h" +#include "record.h" + +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + +int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + +const char *clockid_name(clockid_t clk_id) +{ + const struct clockid_map *cm; + + for (cm = clockids; cm->name; cm++) { + if (cm->clockid == clk_id) + return cm->name; + } + return "(not found)"; +} diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h new file mode 100644 index 0000000000000..9b49b4711c768 --- /dev/null +++ b/tools/perf/util/clockid.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PERF_CLOCKID_H +#define __PERF_CLOCKID_H + +struct option; +int parse_clockid(const struct option *opt, const char *str, int unset); + +const char *clockid_name(clockid_t clk_id); + +#endif diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 5f36fc6a55780..27c5fef9ad546 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -31,6 +31,9 @@ #include "config.h" #include <linux/ctype.h> #include <linux/err.h> +#include <linux/time64.h> +#include "util.h" +#include "clockid.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1381,11 +1384,26 @@ do { \ return 0; } -static int ctf_writer__setup_clock(struct ctf_writer *cw) +static int ctf_writer__setup_clock(struct ctf_writer *cw, + struct perf_session *session, + bool tod) { struct bt_ctf_clock *clock = cw->clock; + const char *desc = "perf clock"; + int64_t offset = 0; - bt_ctf_clock_set_description(clock, "perf clock"); + if (tod) { + struct perf_env *env = &session->header.env; + + if (!env->clock.enabled) { + pr_err("Can't provide --tod time, missing clock data. " + "Please record with -k/--clockid option.\n"); + return -1; + } + + desc = clockid_name(env->clock.clockid); + offset = env->clock.tod_ns - env->clock.clockid_ns; + } #define SET(__n, __v) \ do { \ @@ -1394,8 +1412,8 @@ do { \ } while (0) SET(frequency, 1000000000); - SET(offset_s, 0); - SET(offset, 0); + SET(offset, offset); + SET(description, desc); SET(precision, 10); SET(is_absolute, 0); @@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw) memset(cw, 0, sizeof(*cw)); } -static int ctf_writer__init(struct ctf_writer *cw, const char *path) +static int ctf_writer__init(struct ctf_writer *cw, const char *path, + struct perf_session *session, bool tod) { struct bt_ctf_writer *writer; struct bt_ctf_stream_class *stream_class; @@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path) cw->clock = clock; - if (ctf_writer__setup_clock(cw)) { + if (ctf_writer__setup_clock(cw, session, tod)) { pr("Failed to setup CTF clock.\n"); goto err_cleanup; } @@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, if (err) return err; - /* CTF writer */ - if (ctf_writer__init(cw, path)) - return -1; - err = -1; /* perf.data session */ session = perf_session__new(&data, 0, &c.tool); - if (IS_ERR(session)) { - err = PTR_ERR(session); - goto free_writer; - } + if (IS_ERR(session)) + return PTR_ERR(session); + + /* CTF writer */ + if (ctf_writer__init(cw, path, session, opts->tod)) + goto free_session; if (c.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, @@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path, /* CTF writer env/clock setup */ if (ctf_writer__setup_env(cw, session)) - goto free_session; + goto free_writer; /* CTF events setup */ if (setup_events(cw, session)) - goto free_session; + goto free_writer; if (opts->all && setup_non_sample_events(cw, session)) - goto free_session; + goto free_writer; if (setup_streams(cw, session)) - goto free_session; + goto free_writer; err = perf_session__process_events(session); if (!err) @@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, return err; -free_session: - perf_session__delete(session); free_writer: ctf_writer__cleanup(cw); +free_session: + perf_session__delete(session); pr_err("Error during conversion setup.\n"); return err; } diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h index af90b6076c061..feab5f114e37e 100644 --- a/tools/perf/util/data-convert.h +++ b/tools/perf/util/data-convert.h @@ -5,6 +5,7 @@ struct perf_data_convert_opts { bool force; bool all; + bool tod; }; #endif /* __DATA_CONVERT_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 99f0a39c3c596..be991cbbe9f89 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -208,6 +208,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; @@ -898,6 +899,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); + else if (dso->binary_type == DSO_BINARY_TYPE__OOL) + *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d3d03274b0d1a..31c3a92449388 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -42,6 +42,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, DSO_BINARY_TYPE__BPF_IMAGE, + DSO_BINARY_TYPE__OOL, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1ab2682d5d2b7..a129726520064 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -77,7 +77,6 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; - u64 clockid_res_ns; /* * bpf_info_lock protects bpf rbtrees. This is needed because the @@ -100,6 +99,19 @@ struct perf_env { /* For fast cpu to numa node lookup via perf_env__numa_node */ int *numa_map; int nr_numa_map; + + /* For real clock time reference. */ + struct { + u64 tod_ns; + u64 clockid_ns; + u64 clockid_res_ns; + int clockid; + /* + * enabled is valid for report mode, and is true if above + * values are set, it's set in process_clock_data + */ + bool enabled; + } clock; }; enum perf_compress_type { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f581550a3015d..317a265718458 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -31,6 +31,7 @@ #include "stat.h" #include "session.h" #include "bpf-event.h" +#include "print_binary.h" #include "tool.h" #include "../perf.h" @@ -55,6 +56,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_KSYMBOL] = "KSYMBOL", [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", + [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, return machine__process_bpf(machine, event, sample); } +int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_text_poke(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n", @@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp) event->bpf.type, event->bpf.flags, event->bpf.id); } -size_t perf_event__fprintf(union perf_event *event, FILE *fp) +static int text_poke_printer(enum binary_printer_ops op, unsigned int val, + void *extra, FILE *fp) +{ + bool old = *(bool *)extra; + + switch ((int)op) { + case BINARY_PRINT_LINE_BEGIN: + return fprintf(fp, " %s bytes:", old ? "Old" : "New"); + case BINARY_PRINT_NUM_DATA: + return fprintf(fp, " %02x", val); + case BINARY_PRINT_LINE_END: + return fprintf(fp, "\n"); + default: + return 0; + } +} + +size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp) +{ + struct perf_record_text_poke_event *tp = &event->text_poke; + size_t ret; + bool old; + + ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr); + if (machine) { + struct addr_location al; + + al.map = maps__find(&machine->kmaps, tp->addr); + if (al.map && map__load(al.map) >= 0) { + al.addr = al.map->map_ip(al.map, tp->addr); + al.sym = map__find_symbol(al.map, al.addr); + if (al.sym) + ret += symbol__fprintf_symname_offs(al.sym, &al, fp); + } + } + ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len); + old = true; + ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer, + &old, fp); + old = false; + ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16, + text_poke_printer, &old, fp); + return ret; +} + +size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", perf_event__name(event->header.type)); @@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_BPF_EVENT: ret += perf_event__fprintf_bpf(event, fp); break; + case PERF_RECORD_TEXT_POKE: + ret += perf_event__fprintf_text_poke(event, machine, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6ae01c3c2ffa7..b828b99176f40 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_text_poke(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); -size_t perf_event__fprintf(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp); +size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ab48be4cf2584..e3fa3bf7498a2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -63,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, perf_evlist__set_maps(&evlist->core, cpus, threads); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; + evlist->ctl_fd.fd = -1; + evlist->ctl_fd.ack = -1; + evlist->ctl_fd.pos = -1; } struct evlist *evlist__new(void) @@ -79,7 +82,7 @@ struct evlist *perf_evlist__new_default(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_default(evlist)) { + if (evlist && evlist__add_default(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -91,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void) { struct evlist *evlist = evlist__new(); - if (evlist && perf_evlist__add_dummy(evlist)) { + if (evlist && evlist__add_dummy(evlist)) { evlist__delete(evlist); evlist = NULL; } @@ -231,7 +234,7 @@ void perf_evlist__set_leader(struct evlist *evlist) } } -int __perf_evlist__add_default(struct evlist *evlist, bool precise) +int __evlist__add_default(struct evlist *evlist, bool precise) { struct evsel *evsel = evsel__new_cycles(precise); @@ -242,7 +245,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int perf_evlist__add_dummy(struct evlist *evlist) +int evlist__add_dummy(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, @@ -258,8 +261,7 @@ int perf_evlist__add_dummy(struct evlist *evlist) return 0; } -static int evlist__add_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; LIST_HEAD(head); @@ -282,8 +284,7 @@ out_delete_partial_list: return -1; } -int __perf_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs) +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { size_t i; @@ -322,8 +323,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist, return NULL; } -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler) +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler) { struct evsel *evsel = evsel__newtp(sys, name); @@ -500,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist, int evlist__add_pollfd(struct evlist *evlist, int fd) { - return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default); } int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) @@ -540,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) if (sid) return container_of(sid->evsel, struct evsel, core); - if (!perf_evlist__sample_id_all(evlist)) + if (!evlist__sample_id_all(evlist)) return evlist__first(evlist); return NULL; @@ -1088,7 +1088,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); } -bool perf_evlist__valid_sample_type(struct evlist *evlist) +bool evlist__valid_sample_type(struct evlist *evlist) { struct evsel *pos; @@ -1107,7 +1107,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist) return true; } -u64 __perf_evlist__combined_sample_type(struct evlist *evlist) +u64 __evlist__combined_sample_type(struct evlist *evlist) { struct evsel *evsel; @@ -1120,13 +1120,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist) return evlist->combined_sample_type; } -u64 perf_evlist__combined_sample_type(struct evlist *evlist) +u64 evlist__combined_sample_type(struct evlist *evlist) { evlist->combined_sample_type = 0; - return __perf_evlist__combined_sample_type(evlist); + return __evlist__combined_sample_type(evlist); } -u64 perf_evlist__combined_branch_type(struct evlist *evlist) +u64 evlist__combined_branch_type(struct evlist *evlist) { struct evsel *evsel; u64 branch_type = 0; @@ -1191,7 +1191,7 @@ out: return size; } -bool perf_evlist__valid_sample_id_all(struct evlist *evlist) +bool evlist__valid_sample_id_all(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; @@ -1203,7 +1203,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist) return true; } -bool perf_evlist__sample_id_all(struct evlist *evlist) +bool evlist__sample_id_all(struct evlist *evlist) { struct evsel *first = evlist__first(evlist); return first->core.attr.sample_id_all; @@ -1464,8 +1464,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, return evsel__parse_sample_timestamp(evsel, event, timestamp); } -int perf_evlist__strerror_open(struct evlist *evlist, - int err, char *buf, size_t size) +int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size) { int printed, value; char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); @@ -1518,7 +1517,7 @@ out_default: return 0; } -int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) +int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) { char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0; @@ -1727,3 +1726,143 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } + +int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) +{ + if (fd == -1) { + pr_debug("Control descriptor is not initialized\n"); + return 0; + } + + evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, + fdarray_flag__nonfilterable); + if (evlist->ctl_fd.pos < 0) { + evlist->ctl_fd.pos = -1; + pr_err("Failed to add ctl fd entry: %m\n"); + return -1; + } + + evlist->ctl_fd.fd = fd; + evlist->ctl_fd.ack = ack; + + return 0; +} + +bool evlist__ctlfd_initialized(struct evlist *evlist) +{ + return evlist->ctl_fd.pos >= 0; +} + +int evlist__finalize_ctlfd(struct evlist *evlist) +{ + struct pollfd *entries = evlist->core.pollfd.entries; + + if (!evlist__ctlfd_initialized(evlist)) + return 0; + + entries[evlist->ctl_fd.pos].fd = -1; + entries[evlist->ctl_fd.pos].events = 0; + entries[evlist->ctl_fd.pos].revents = 0; + + evlist->ctl_fd.pos = -1; + evlist->ctl_fd.ack = -1; + evlist->ctl_fd.fd = -1; + + return 0; +} + +static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, + char *cmd_data, size_t data_size) +{ + int err; + char c; + size_t bytes_read = 0; + + memset(cmd_data, 0, data_size); + data_size--; + + do { + err = read(evlist->ctl_fd.fd, &c, 1); + if (err > 0) { + if (c == '\n' || c == '\0') + break; + cmd_data[bytes_read++] = c; + if (bytes_read == data_size) + break; + } else { + if (err == -1) + pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd); + break; + } + } while (1); + + pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data, + bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0"); + + if (err > 0) { + if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG, + (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_ENABLE; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG, + (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_DISABLE; + } + } + + return err; +} + +static int evlist__ctlfd_ack(struct evlist *evlist) +{ + int err; + + if (evlist->ctl_fd.ack == -1) + return 0; + + err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG, + sizeof(EVLIST_CTL_CMD_ACK_TAG)); + if (err == -1) + pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack); + + return err; +} + +int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) +{ + int err = 0; + char cmd_data[EVLIST_CTL_CMD_MAX_LEN]; + int ctlfd_pos = evlist->ctl_fd.pos; + struct pollfd *entries = evlist->core.pollfd.entries; + + if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents) + return 0; + + if (entries[ctlfd_pos].revents & POLLIN) { + err = evlist__ctlfd_recv(evlist, cmd, cmd_data, + EVLIST_CTL_CMD_MAX_LEN); + if (err > 0) { + switch (*cmd) { + case EVLIST_CTL_CMD_ENABLE: + evlist__enable(evlist); + break; + case EVLIST_CTL_CMD_DISABLE: + evlist__disable(evlist); + break; + case EVLIST_CTL_CMD_ACK: + case EVLIST_CTL_CMD_UNSUPPORTED: + default: + pr_debug("ctlfd: unsupported %d\n", *cmd); + break; + } + if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED)) + evlist__ctlfd_ack(evlist); + } + } + + if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR)) + evlist__finalize_ctlfd(evlist); + else + entries[ctlfd_pos].revents = 0; + + return err; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a8081dfc19cf5..c73f7f7f120b6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -74,6 +74,11 @@ struct evlist { pthread_t th; volatile int done; } thread; + struct { + int fd; /* control file descriptor */ + int ack; /* ack file descriptor for control commands */ + int pos; /* index at evlist core object to check signals */ + } ctl_fd; }; struct evsel_str_handler { @@ -92,20 +97,20 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int __perf_evlist__add_default(struct evlist *evlist, bool precise); +int __evlist__add_default(struct evlist *evlist, bool precise); -static inline int perf_evlist__add_default(struct evlist *evlist) +static inline int evlist__add_default(struct evlist *evlist) { - return __perf_evlist__add_default(evlist, true); + return __evlist__add_default(evlist, true); } -int __perf_evlist__add_default_attrs(struct evlist *evlist, +int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); -#define perf_evlist__add_default_attrs(evlist, array) \ - __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +#define evlist__add_default_attrs(evlist, array) \ + __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) -int perf_evlist__add_dummy(struct evlist *evlist); +int evlist__add_dummy(struct evlist *evlist); int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, @@ -116,8 +121,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); -int perf_evlist__add_newtp(struct evlist *evlist, - const char *sys, const char *name, void *handler); +int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); int __evlist__set_tracepoints_handlers(struct evlist *evlist, const struct evsel_str_handler *assocs, @@ -219,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct evlist *evlist); -u64 __perf_evlist__combined_sample_type(struct evlist *evlist); -u64 perf_evlist__combined_sample_type(struct evlist *evlist); -u64 perf_evlist__combined_branch_type(struct evlist *evlist); -bool perf_evlist__sample_id_all(struct evlist *evlist); +u64 __evlist__combined_sample_type(struct evlist *evlist); +u64 evlist__combined_sample_type(struct evlist *evlist); +u64 evlist__combined_branch_type(struct evlist *evlist); +bool evlist__sample_id_all(struct evlist *evlist); u16 perf_evlist__id_hdr_size(struct evlist *evlist); int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, @@ -232,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp); -bool perf_evlist__valid_sample_type(struct evlist *evlist); -bool perf_evlist__valid_sample_id_all(struct evlist *evlist); +bool evlist__valid_sample_type(struct evlist *evlist); +bool evlist__valid_sample_id_all(struct evlist *evlist); bool perf_evlist__valid_read_format(struct evlist *evlist); void perf_evlist__splice_list_tail(struct evlist *evlist, @@ -258,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist) return container_of(evsel, struct evsel, core); } -int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); -int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); +int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size); +int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size); bool perf_evlist__can_select_event(struct evlist *evlist, const char *str); void perf_evlist__to_front(struct evlist *evlist, @@ -356,4 +360,25 @@ void perf_evlist__force_leader(struct evlist *evlist); struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close); +#define EVLIST_CTL_CMD_ENABLE_TAG "enable" +#define EVLIST_CTL_CMD_DISABLE_TAG "disable" +#define EVLIST_CTL_CMD_ACK_TAG "ack\n" + +#define EVLIST_CTL_CMD_MAX_LEN 64 + +enum evlist_ctl_cmd { + EVLIST_CTL_CMD_UNSUPPORTED = 0, + EVLIST_CTL_CMD_ENABLE, + EVLIST_CTL_CMD_DISABLE, + EVLIST_CTL_CMD_ACK +}; + +int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); +int evlist__finalize_ctlfd(struct evlist *evlist); +bool evlist__ctlfd_initialized(struct evlist *evlist); +int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd); + +#define EVLIST_ENABLED_MSG "Events enabled\n" +#define EVLIST_DISABLED_MSG "Events disabled\n" + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef802f6d40c17..fd865002cbbd4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1014,12 +1014,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (callchain && callchain->enabled && !evsel->no_aux_samples) evsel__config_callchain(evsel, opts, callchain); - if (opts->sample_intr_regs && !evsel->no_aux_samples) { + if (opts->sample_intr_regs && !evsel->no_aux_samples && + !evsel__is_dummy_event(evsel)) { attr->sample_regs_intr = opts->sample_intr_regs; evsel__set_sample_bit(evsel, REGS_INTR); } - if (opts->sample_user_regs && !evsel->no_aux_samples) { + if (opts->sample_user_regs && !evsel->no_aux_samples && + !evsel__is_dummy_event(evsel)) { attr->sample_regs_user |= opts->sample_user_regs; evsel__set_sample_bit(evsel, REGS_USER); } @@ -1064,7 +1066,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; - attr->ksymbol = track && !perf_missing_features.ksymbol; + /* + * ksymbol is tracked separately with text poke because it needs to be + * system wide and enabled immediately. + */ + if (!opts->text_poke) + attr->ksymbol = track && !perf_missing_features.ksymbol; attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf; if (opts->record_namespaces) @@ -2495,8 +2502,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg + printed, size - printed, "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" - "access to performance monitoring and observability operations for users\n" - "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n" + "access to performance monitoring and observability operations for processes\n" + "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n" + "More information can be found at 'Perf events and tool security' document:\n" + "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n" "perf_event_paranoid setting is %d:\n" " -1: Allow use of (almost) all events by all users\n" " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" @@ -2528,6 +2537,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->core.attr.aux_output) + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support 'aux_output' feature", + evsel__name(evsel)); if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index f64ab91c432ba..53482ef53c411 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -1,10 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdbool.h> #include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include "metricgroup.h" +#include "debug.h" #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" #include <linux/kernel.h> +#include <linux/zalloc.h> +#include <ctype.h> #ifdef PARSER_DEBUG extern int expr_debug; @@ -30,35 +37,144 @@ static bool key_equal(const void *key1, const void *key2, } /* Caller must make sure id is allocated */ -int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) +int expr__add_id(struct expr_parse_ctx *ctx, const char *id) { - double *val_ptr = NULL, *old_val = NULL; + struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; - if (val != 0.0) { - val_ptr = malloc(sizeof(double)); - if (!val_ptr) - return -ENOMEM; - *val_ptr = val; + data_ptr = malloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + + data_ptr->parent = ctx->parent; + + ret = hashmap__set(&ctx->ids, id, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + free(old_key); + free(old_data); + return ret; +} + +/* Caller must make sure id is allocated */ +int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) +{ + struct expr_id_data *data_ptr = NULL, *old_data = NULL; + char *old_key = NULL; + int ret; + + data_ptr = malloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + data_ptr->val = val; + data_ptr->is_ref = false; + + ret = hashmap__set(&ctx->ids, id, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + free(old_key); + free(old_data); + return ret; +} + +int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) +{ + struct expr_id_data *data_ptr = NULL, *old_data = NULL; + char *old_key = NULL; + char *name, *p; + int ret; + + data_ptr = zalloc(sizeof(*data_ptr)); + if (!data_ptr) + return -ENOMEM; + + name = strdup(ref->metric_name); + if (!name) { + free(data_ptr); + return -ENOMEM; } - ret = hashmap__set(&ctx->ids, name, val_ptr, - (const void **)&old_key, (void **)&old_val); + + /* + * The jevents tool converts all metric expressions + * to lowercase, including metric references, hence + * we need to add lowercase name for metric, so it's + * properly found. + */ + for (p = name; *p; p++) + *p = tolower(*p); + + /* + * Intentionally passing just const char pointers, + * originally from 'struct pmu_event' object. + * We don't need to change them, so there's no + * need to create our own copy. + */ + data_ptr->ref.metric_name = ref->metric_name; + data_ptr->ref.metric_expr = ref->metric_expr; + data_ptr->ref.counted = false; + data_ptr->is_ref = true; + + ret = hashmap__set(&ctx->ids, name, data_ptr, + (const void **)&old_key, (void **)&old_data); + if (ret) + free(data_ptr); + + pr_debug2("adding ref metric %s: %s\n", + ref->metric_name, ref->metric_expr); + free(old_key); - free(old_val); + free(old_data); return ret; } -int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr) +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **data) +{ + return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; +} + +int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **datap) { - double *data; + struct expr_id_data *data; - if (!hashmap__find(&ctx->ids, id, (void **)&data)) + if (expr__get_id(ctx, id, datap) || !*datap) { + pr_debug("%s not found\n", id); return -1; - *val_ptr = (data == NULL) ? 0.0 : *data; + } + + data = *datap; + + pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n", + data->is_ref, data->ref.counted, data->val, id); + + if (data->is_ref && !data->ref.counted) { + data->ref.counted = true; + pr_debug("processing metric: %s ENTRY\n", id); + if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) { + pr_debug("%s failed to count\n", id); + return -1; + } + pr_debug("processing metric: %s EXIT: %f\n", id, data->val); + } + return 0; } +void expr__del_id(struct expr_parse_ctx *ctx, const char *id) +{ + struct expr_id_data *old_val = NULL; + char *old_key = NULL; + + hashmap__delete(&ctx->ids, id, + (const void **)&old_key, (void **)&old_val); + free(old_key); + free(old_val); +} + void expr__ctx_init(struct expr_parse_ctx *ctx) { hashmap__init(&ctx->ids, key_hash, key_equal, NULL); @@ -88,6 +204,8 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, void *scanner; int ret; + pr_debug2("parsing metric: %s\n", expr); + ret = expr_lex_init_extra(&scanner_ctx, &scanner); if (ret) return ret; @@ -116,16 +234,10 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx, int expr__find_other(const char *expr, const char *one, struct expr_parse_ctx *ctx, int runtime) { - double *old_val = NULL; - char *old_key = NULL; int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); - if (one) { - hashmap__delete(&ctx->ids, one, - (const void **)&old_key, (void **)&old_val); - free(old_key); - free(old_val); - } + if (one) + expr__del_id(ctx, one); return ret; } diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 8a2c1074f90f1..fc2b5e824a663 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -11,8 +11,30 @@ #include "util/hashmap.h" //#endif +struct metric_ref; + +struct expr_id { + char *id; + struct expr_id *parent; +}; + struct expr_parse_ctx { - struct hashmap ids; + struct hashmap ids; + struct expr_id *parent; +}; + +struct expr_id_data { + union { + double val; + struct { + const char *metric_name; + const char *metric_expr; + bool counted; + } ref; + struct expr_id *parent; + }; + + bool is_ref; }; struct expr_scanner_ctx { @@ -22,8 +44,14 @@ struct expr_scanner_ctx { void expr__ctx_init(struct expr_parse_ctx *ctx); void expr__ctx_clear(struct expr_parse_ctx *ctx); -int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); -int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr); +void expr__del_id(struct expr_parse_ctx *ctx, const char *id); +int expr__add_id(struct expr_parse_ctx *ctx, const char *id); +int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); +int expr__get_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **data); +int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, + struct expr_id_data **datap); int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); int expr__find_other(const char *expr, const char *one, diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index f397bf8b1a48e..13e5e3c75f563 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -100,6 +100,7 @@ symbol ({spec}|{sym})+ } } +d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } if { return IF; } @@ -110,6 +111,8 @@ else { return ELSE; } "|" { return '|'; } "^" { return '^'; } "&" { return '&'; } +"<" { return '<'; } +">" { return '>'; } "-" { return '-'; } "+" { return '+'; } "*" { return '*'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index bf3e898e30554..d34b370391c67 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -10,6 +10,14 @@ #include "smt.h" #include <string.h> +static double d_ratio(double val0, double val1) +{ + if (val1 == 0) { + return 0; + } + return val0 / val1; +} + %} %define api.pure full @@ -28,11 +36,12 @@ %token <num> NUMBER %token <str> ID %destructor { free ($$); } <str> -%token MIN MAX IF ELSE SMT_ON +%token MIN MAX IF ELSE SMT_ON D_RATIO %left MIN MAX IF %left '|' %left '^' %left '&' +%left '<' '>' %left '-' '+' %left '*' '/' '%' %left NEG NOT @@ -60,11 +69,12 @@ all_other: all_other other other: ID { - expr__add_id(ctx, $1, 0.0); + expr__add_id(ctx, $1); } | MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' - +| +'<' | '>' | D_RATIO all_expr: if_expr { *final_val = $1; } ; @@ -75,16 +85,22 @@ if_expr: ; expr: NUMBER - | ID { if (expr__get_id(ctx, $1, &$$)) { - pr_debug("%s not found\n", $1); + | ID { + struct expr_id_data *data; + + if (expr__resolve_id(ctx, $1, &data)) { + free($1); + YYABORT; + } + + $$ = data->val; free($1); - YYABORT; - } - free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } | expr '&' expr { $$ = (long)$1 & (long)$3; } | expr '^' expr { $$ = (long)$1 ^ (long)$3; } + | expr '<' expr { $$ = $1 < $3; } + | expr '>' expr { $$ = $1 > $3; } | expr '+' expr { $$ = $1 + $3; } | expr '-' expr { $$ = $1 - $3; } | expr '*' expr { $$ = $1 * $3; } @@ -105,6 +121,7 @@ expr: NUMBER | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } | SMT_ON { $$ = smt_on() > 0; } + | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } ; %% diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7a67d017d72c3..251faa9a57893 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -46,6 +46,7 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "clockid.h" #include <linux/ctype.h> #include <internal/lib.h> @@ -891,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff, static int write_clockid(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - return do_write(ff, &ff->ph->env.clockid_res_ns, - sizeof(ff->ph->env.clockid_res_ns)); + return do_write(ff, &ff->ph->env.clock.clockid_res_ns, + sizeof(ff->ph->env.clock.clockid_res_ns)); +} + +static int write_clock_data(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + u64 *data64; + u32 data32; + int ret; + + /* version */ + data32 = 1; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* clockid */ + data32 = ff->ph->env.clock.clockid; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* TOD ref time */ + data64 = &ff->ph->env.clock.tod_ns; + + ret = do_write(ff, data64, sizeof(*data64)); + if (ret < 0) + return ret; + + /* clockid ref time */ + data64 = &ff->ph->env.clock.clockid_ns; + + return do_write(ff, data64, sizeof(*data64)); } static int write_dir_format(struct feat_fd *ff, @@ -1546,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) static void print_clockid(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", - ff->ph->env.clockid_res_ns * 1000); + ff->ph->env.clock.clockid_res_ns * 1000); +} + +static void print_clock_data(struct feat_fd *ff, FILE *fp) +{ + struct timespec clockid_ns; + char tstr[64], date[64]; + struct timeval tod_ns; + clockid_t clockid; + struct tm ltime; + u64 ref; + + if (!ff->ph->env.clock.enabled) { + fprintf(fp, "# reference time disabled\n"); + return; + } + + /* Compute TOD time. */ + ref = ff->ph->env.clock.tod_ns; + tod_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= tod_ns.tv_sec * NSEC_PER_SEC; + tod_ns.tv_usec = ref / NSEC_PER_USEC; + + /* Compute clockid time. */ + ref = ff->ph->env.clock.clockid_ns; + clockid_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= clockid_ns.tv_sec * NSEC_PER_SEC; + clockid_ns.tv_nsec = ref; + + clockid = ff->ph->env.clock.clockid; + + if (localtime_r(&tod_ns.tv_sec, <ime) == NULL) + snprintf(tstr, sizeof(tstr), "<error>"); + else { + strftime(date, sizeof(date), "%F %T", <ime); + scnprintf(tstr, sizeof(tstr), "%s.%06d", + date, (int) tod_ns.tv_usec); + } + + fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); + fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", + tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, + clockid_ns.tv_sec, clockid_ns.tv_nsec, + clockid_name(clockid)); } static void print_dir_format(struct feat_fd *ff, FILE *fp) @@ -2732,9 +2810,43 @@ out: static int process_clockid(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns)) + return -1; + + return 0; +} + +static int process_clock_data(struct feat_fd *ff, + void *_data __maybe_unused) +{ + u32 data32; + u64 data64; + + /* version */ + if (do_read_u32(ff, &data32)) + return -1; + + if (data32 != 1) + return -1; + + /* clockid */ + if (do_read_u32(ff, &data32)) + return -1; + + ff->ph->env.clock.clockid = data32; + + /* TOD ref time */ + if (do_read_u64(ff, &data64)) + return -1; + + ff->ph->env.clock.tod_ns = data64; + + /* clockid ref time */ + if (do_read_u64(ff, &data64)) return -1; + ff->ph->env.clock.clockid_ns = data64; + ff->ph->env.clock.enabled = true; return 0; } @@ -3008,6 +3120,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), + FEAT_OPR(CLOCK_DATA, clock_data, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 650bd1c7a99b9..2aca71763ecfc 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -44,6 +44,7 @@ enum { HEADER_BPF_BTF, HEADER_COMPRESSED, HEADER_CPU_PMU_CAPS, + HEADER_CLOCK_DATA, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f8ccfd6be0eee..697513f351549 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -55,6 +55,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_TIP_PGD, INTEL_PT_STATE_FUP, INTEL_PT_STATE_FUP_NO_TIP, + INTEL_PT_STATE_RESAMPLE, }; static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) @@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: case INTEL_PT_STATE_TNT_CONT: + case INTEL_PT_STATE_RESAMPLE: return true; case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: @@ -109,6 +111,9 @@ struct intel_pt_decoder { bool fixup_last_mtc; bool have_last_ip; bool in_psb; + bool hop; + bool hop_psb_fup; + bool leap; enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; @@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; + decoder->hop = params->quick >= 1; + decoder->leap = params->quick >= 2; decoder->flags = params->flags; @@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); + if (decoder->hop) + intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n"); + return decoder; } @@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (intel_pt_fup_event(decoder)) return 0; return -EAGAIN; @@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->packet.count) + if (decoder->packet.count) { intel_pt_set_last_ip(decoder); + if (decoder->hop) { + /* Act on FUP at PSBEND */ + decoder->ip = decoder->last_ip; + decoder->hop_psb_fup = true; + } + } break; case INTEL_PT_MODE_TSX: @@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) } } +static int intel_pt_resample(struct intel_pt_decoder *decoder) +{ + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return 0; +} + +#define HOP_PROCESS 0 +#define HOP_IGNORE 1 +#define HOP_RETURN 2 +#define HOP_AGAIN 3 + +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); + +/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ +static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) +{ + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ + if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { + *err = intel_pt_scan_for_psb(decoder); + if (*err) + return HOP_RETURN; + } + + switch (decoder->packet.type) { + case INTEL_PT_TNT: + return HOP_IGNORE; + + case INTEL_PT_TIP_PGD: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type |= INTEL_PT_TRACE_END; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return HOP_RETURN; + + case INTEL_PT_TIP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + + case INTEL_PT_FUP: + if (!decoder->packet.count) + return HOP_IGNORE; + intel_pt_set_ip(decoder); + if (intel_pt_fup_event(decoder)) + return HOP_RETURN; + if (!decoder->branch_enable) + *no_tip = true; + if (*no_tip) { + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + } + *err = intel_pt_walk_fup_tip(decoder); + if (!*err) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + return HOP_RETURN; + + case INTEL_PT_PSB: + decoder->last_ip = 0; + decoder->have_last_ip = true; + decoder->hop_psb_fup = false; + *err = intel_pt_walk_psbend(decoder); + if (*err == -EAGAIN) + return HOP_AGAIN; + if (*err) + return HOP_RETURN; + if (decoder->hop_psb_fup) { + decoder->hop_psb_fup = false; + decoder->state.type = INTEL_PT_INSTRUCTION; + decoder->state.from_ip = decoder->ip; + decoder->state.to_ip = 0; + return HOP_RETURN; + } + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; + return HOP_RETURN; + } + return HOP_IGNORE; + + case INTEL_PT_BAD: + case INTEL_PT_PAD: + case INTEL_PT_TIP_PGE: + case INTEL_PT_TSC: + case INTEL_PT_TMA: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_CYC: + case INTEL_PT_VMCS: + case INTEL_PT_PSBEND: + case INTEL_PT_CBR: + case INTEL_PT_TRACESTOP: + case INTEL_PT_PIP: + case INTEL_PT_OVF: + case INTEL_PT_MNT: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + default: + return HOP_PROCESS; + } +} + static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) { bool no_tip = false; @@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + if (decoder->hop) { + switch (intel_pt_hop_trace(decoder, &no_tip, &err)) { + case HOP_IGNORE: + continue; + case HOP_RETURN: + return err; + case HOP_AGAIN: + goto next; + default: + break; + } + } + switch (decoder->packet.type) { case INTEL_PT_TNT: if (!decoder->packet.count) @@ -1913,6 +2064,12 @@ next: decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; decoder->state.type |= INTEL_PT_TRACE_BEGIN; + /* + * In hop mode, resample to get the to_ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return 0; } @@ -1942,17 +2099,13 @@ next: } if (decoder->set_fup_mwait) no_tip = true; + if (no_tip) + decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP; + else + decoder->pkt_state = INTEL_PT_STATE_FUP; err = intel_pt_walk_fup(decoder); - if (err != -EAGAIN) { - if (err) - return err; - if (no_tip) - decoder->pkt_state = - INTEL_PT_STATE_FUP_NO_TIP; - else - decoder->pkt_state = INTEL_PT_STATE_FUP; - return 0; - } + if (err != -EAGAIN) + return err; if (no_tip) { no_tip = false; break; @@ -1980,8 +2133,10 @@ next: * possibility of another CBR change that gets caught up * in the PSB+. */ - if (decoder->cbr != decoder->cbr_seen) + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; return 0; + } break; case INTEL_PT_PIP: @@ -2022,8 +2177,10 @@ next: case INTEL_PT_CBR: intel_pt_calc_cbr(decoder); - if (decoder->cbr != decoder->cbr_seen) + if (decoder->cbr != decoder->cbr_seen) { + decoder->state.type = 0; return 0; + } break; case INTEL_PT_MODE_EXEC: @@ -2032,7 +2189,7 @@ next: case INTEL_PT_MODE_TSX: /* MODE_TSX need not be followed by FUP */ - if (!decoder->pge) { + if (!decoder->pge || decoder->in_psb) { intel_pt_update_in_tx(decoder); break; } @@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) if (err) return err; - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + /* In hop mode, resample to get the to_ip as an "instruction" sample */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->overflow = false; decoder->state.from_ip = 0; @@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) decoder->ip = 0; intel_pt_clear_stack(&decoder->stack); +leap: err = intel_pt_scan_for_psb(decoder); if (err) return err; @@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (decoder->ip) { decoder->state.type = 0; /* Do not have a sample */ - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + /* + * In hop mode, resample to get the PSB FUP ip as an + * "instruction" sample. + */ + if (decoder->hop) + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + else + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + } else if (decoder->leap) { + /* + * In leap mode, only PSB+ is decoded, so keeping leaping to the + * next PSB until there is an ip. + */ + goto leap; } else { return intel_pt_sync_ip(decoder); } @@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_tip(decoder); break; case INTEL_PT_STATE_FUP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_fup_tip(decoder); - else if (!err) - decoder->pkt_state = INTEL_PT_STATE_FUP; break; case INTEL_PT_STATE_FUP_NO_TIP: - decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_fup(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); break; + case INTEL_PT_STATE_RESAMPLE: + err = intel_pt_resample(decoder); + break; default: err = intel_pt_bug(decoder); break; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index e289e463d635e..8645fc2654811 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -250,6 +250,7 @@ struct intel_pt_params { uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; enum intel_pt_param_flags flags; + unsigned int quick; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index cb3c1e569a2db..2a8d245351e74 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -236,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event) if (!intel_pt_enable_logging || !f) return; - perf_event__fprintf(event, f); + perf_event__fprintf(event, NULL, f); } static void intel_pt_dump_sample(struct perf_session *session, @@ -249,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session, intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); } +static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) +{ + struct perf_time_interval *range = pt->synth_opts.ptime_range; + int n = pt->synth_opts.range_num; + + if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return true; + + if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) + return false; + + /* perf_time__ranges_skip_sample does not work if time is zero */ + if (!tm) + tm = 1; + + return !n || !perf_time__ranges_skip_sample(range, n, tm); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -520,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, + u64 offset) +{ + struct auxtrace_cache *c = intel_pt_cache(dso, machine); + + if (!c) + return; + + auxtrace_cache__remove(dso->auxtrace_cache, offset); +} + static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) { return ip >= pt->kernel_start ? @@ -1001,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; + params.quick = pt->synth_opts.quick; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -1394,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->instructions_id; sample.stream_id = ptq->pt->instructions_id; - sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; + if (pt->synth_opts.quick) + sample.period = 1; + else + sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { @@ -1852,6 +1885,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, char msg[MAX_AUXTRACE_ERROR_MSG]; int err; + if (pt->synth_opts.error_minus_flags) { + if (code == INTEL_PT_ERR_OVR && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) + return 0; + if (code == INTEL_PT_ERR_LOST && + pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) + return 0; + } + intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, @@ -2566,10 +2608,6 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, return -EINVAL; } - intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", - cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, - &pt->tc)); - ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) return ret; @@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) +{ + if (!al->map || addr < al->map->start || addr >= al->map->end) { + if (!thread__find_map(thread, cpumode, addr, al)) + return -1; + } + + return 0; +} + +/* Invalidate all instruction cache entries that overlap the text poke */ +static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) +{ + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; + /* Assume text poke begins in a basic block no more than 4096 bytes */ + int cnt = 4096 + event->text_poke.new_len; + struct thread *thread = pt->unknown_thread; + struct addr_location al = { .map = NULL }; + struct machine *machine = pt->machine; + struct intel_pt_cache_entry *e; + u64 offset; + + if (!event->text_poke.new_len) + return 0; + + for (; cnt; cnt--, addr--) { + if (intel_pt_find_map(thread, cpumode, addr, &al)) { + if (addr < event->text_poke.addr) + return 0; + continue; + } + + if (!al.map->dso || !al.map->dso->auxtrace_cache) + continue; + + offset = al.map->map_ip(al.map, addr); + + e = intel_pt_cache_lookup(al.map->dso, machine, offset); + if (!e) + continue; + + if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { + /* + * No overlap. Working backwards there cannot be another + * basic block that overlaps the text poke if there is a + * branch instruction before the text poke address. + */ + if (e->branch != INTEL_PT_BR_NO_BRANCH) + return 0; + } else { + intel_pt_cache_invalidate(al.map->dso, machine, offset); + intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", + al.map->dso->long_name, addr); + } + } + + return 0; +} + static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -2662,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", - event->header.type, sample->cpu, sample->time, timestamp); - intel_pt_log_event(event); + if (!err && event->header.type == PERF_RECORD_TEXT_POKE) + err = intel_pt_text_poke(pt, event); + + if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); + } return err; } diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 32bb05e03fb2d..0804308ef2857 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -26,6 +26,7 @@ #include "jit.h" #include "jitdump.h" #include "genelf.h" +#include "thread.h" #include <linux/ctype.h> #include <linux/zalloc.h> @@ -749,6 +750,28 @@ jit_detect(char *mmap_name, pid_t pid) return 0; } +static void jit_add_pid(struct machine *machine, pid_t pid) +{ + struct thread *thread = machine__findnew_thread(machine, pid, pid); + + if (!thread) { + pr_err("%s: thread %d not found or created\n", __func__, pid); + return; + } + + thread->priv = (void *)1; +} + +static bool jit_has_pid(struct machine *machine, pid_t pid) +{ + struct thread *thread = machine__find_thread(machine, pid, pid); + + if (!thread) + return 0; + + return (bool)thread->priv; +} + int jit_process(struct perf_session *session, struct perf_data *output, @@ -764,8 +787,13 @@ jit_process(struct perf_session *session, /* * first, detect marker mmap (i.e., the jitdump mmap) */ - if (jit_detect(filename, pid)) + if (jit_detect(filename, pid)) { + // Strip //anon* mmaps if we processed a jitdump for this pid + if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0)) + return 1; + return 0; + } memset(&jd, 0, sizeof(jd)); @@ -784,6 +812,7 @@ jit_process(struct perf_session *session, ret = jit_inject(&jd, filename); if (!ret) { + jit_add_pid(machine, pid); *nbytes = jd.bytes_written; ret = 1; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d5384807372b5..96af544eac8fb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -762,6 +762,12 @@ static int machine__process_ksymbol_register(struct machine *machine, return -ENOMEM; } + if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) { + map->dso->binary_type = DSO_BINARY_TYPE__OOL; + map->dso->data.file_size = event->ksymbol.len; + dso__set_loaded(map->dso); + } + map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); @@ -808,6 +814,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, return machine__process_ksymbol_register(machine, event, sample); } +int machine__process_text_poke(struct machine *machine, union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct map *map = maps__find(&machine->kmaps, event->text_poke.addr); + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + if (dump_trace) + perf_event__fprintf_text_poke(event, machine, stdout); + + if (!event->text_poke.new_len) + return 0; + + if (cpumode != PERF_RECORD_MISC_KERNEL) { + pr_debug("%s: unsupported cpumode - ignoring\n", __func__); + return 0; + } + + if (map && map->dso) { + u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len; + int ret; + + /* + * Kernel maps might be changed when loading symbols so loading + * must be done prior to using kernel maps. + */ + map__load(map); + ret = dso__data_write_cache_addr(map->dso, map, machine, + event->text_poke.addr, + new_bytes, + event->text_poke.new_len); + if (ret != event->text_poke.new_len) + pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n", + event->text_poke.addr); + } else { + pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n", + event->text_poke.addr); + } + + return 0; +} + static struct map *machine__addnew_module_map(struct machine *machine, u64 start, const char *filename) { @@ -1930,6 +1977,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_ksymbol(machine, event, sample); break; case PERF_RECORD_BPF_EVENT: ret = machine__process_bpf(machine, event, sample); break; + case PERF_RECORD_TEXT_POKE: + ret = machine__process_text_poke(machine, event, sample); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fa1be9ea00fa8..062c36a8433ce 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even int machine__process_ksymbol(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_text_poke(struct machine *machine, + union perf_event *event, + struct perf_sample *sample); int machine__process_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 53d96611e6a6a..f9dc8c5493ea3 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -267,6 +267,11 @@ bool __map__is_bpf_prog(const struct map *map) return name && (strstr(name, "bpf_prog_") == name); } +bool __map__is_ool(const struct map *map) +{ + return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL; +} + bool map__has_symbols(const struct map *map) { return dso__has_symbols(map->dso); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 067036e8970c8..9e312ae2d6563 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -147,11 +147,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, bool __map__is_kernel(const struct map *map); bool __map__is_extra_kernel_map(const struct map *map); bool __map__is_bpf_prog(const struct map *map); +bool __map__is_ool(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) && - !__map__is_bpf_prog(map); + !__map__is_bpf_prog(map) && !__map__is_ool(map); } bool map__has_symbols(const struct map *map); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 9e21aa767e417..8831b964288fa 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -24,6 +24,7 @@ #include <subcmd/parse-options.h> #include <api/fs/fs.h> #include "util.h" +#include <asm/bug.h> struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -76,23 +77,78 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, return &me->nd; } +static void metric_event_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct metric_event *me = container_of(rb_node, struct metric_event, nd); + struct metric_expr *expr, *tmp; + + list_for_each_entry_safe(expr, tmp, &me->head, nd) { + free(expr->metric_refs); + free(expr); + } + + free(me); +} + static void metricgroup__rblist_init(struct rblist *metric_events) { rblist__init(metric_events); metric_events->node_cmp = metric_event_cmp; metric_events->node_new = metric_event_new; + metric_events->node_delete = metric_event_delete; } -struct egroup { +void metricgroup__rblist_exit(struct rblist *metric_events) +{ + rblist__exit(metric_events); +} + +/* + * A node in the list of referenced metrics. metric_expr + * is held as a convenience to avoid a search through the + * metric list. + */ +struct metric_ref_node { + const char *metric_name; + const char *metric_expr; + struct list_head list; +}; + +struct metric { struct list_head nd; struct expr_parse_ctx pctx; const char *metric_name; const char *metric_expr; const char *metric_unit; + struct list_head metric_refs; + int metric_refs_cnt; int runtime; bool has_constraint; }; +#define RECURSION_ID_MAX 1000 + +struct expr_ids { + struct expr_id id[RECURSION_ID_MAX]; + int cnt; +}; + +static struct expr_id *expr_ids__alloc(struct expr_ids *ids) +{ + if (ids->cnt >= RECURSION_ID_MAX) + return NULL; + return &ids->id[ids->cnt++]; +} + +static void expr_ids__exit(struct expr_ids *ids) +{ + int i; + + for (i = 0; i < ids->cnt; i++) + free(ids->id[i].id); +} + /** * Find a group of events in perf_evlist that correpond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -119,7 +175,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, unsigned long *evlist_used) { struct evsel *ev, *current_leader = NULL; - double *val_ptr; + struct expr_id_data *val_ptr; int i = 0, matched_events = 0, events_to_match; const int idnum = (int)hashmap__size(&pctx->ids); @@ -206,7 +262,7 @@ static int metricgroup__setup_events(struct list_head *groups, struct metric_expr *expr; int i = 0; int ret = 0; - struct egroup *eg; + struct metric *m; struct evsel *evsel, *tmp; unsigned long *evlist_used; @@ -214,22 +270,23 @@ static int metricgroup__setup_events(struct list_head *groups, if (!evlist_used) return -ENOMEM; - list_for_each_entry (eg, groups, nd) { + list_for_each_entry (m, groups, nd) { struct evsel **metric_events; + struct metric_ref *metric_refs = NULL; metric_events = calloc(sizeof(void *), - hashmap__size(&eg->pctx.ids) + 1); + hashmap__size(&m->pctx.ids) + 1); if (!metric_events) { ret = -ENOMEM; break; } - evsel = find_evsel_group(perf_evlist, &eg->pctx, + evsel = find_evsel_group(perf_evlist, &m->pctx, metric_no_merge, - eg->has_constraint, metric_events, + m->has_constraint, metric_events, evlist_used); if (!evsel) { pr_debug("Cannot resolve %s: %s\n", - eg->metric_name, eg->metric_expr); + m->metric_name, m->metric_expr); free(metric_events); continue; } @@ -247,11 +304,41 @@ static int metricgroup__setup_events(struct list_head *groups, free(metric_events); break; } - expr->metric_expr = eg->metric_expr; - expr->metric_name = eg->metric_name; - expr->metric_unit = eg->metric_unit; + + /* + * Collect and store collected nested expressions + * for metric processing. + */ + if (m->metric_refs_cnt) { + struct metric_ref_node *ref; + + metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); + if (!metric_refs) { + ret = -ENOMEM; + free(metric_events); + break; + } + + i = 0; + list_for_each_entry(ref, &m->metric_refs, list) { + /* + * Intentionally passing just const char pointers, + * originally from 'struct pmu_event' object. + * We don't need to change them, so there's no + * need to create our own copy. + */ + metric_refs[i].metric_name = ref->metric_name; + metric_refs[i].metric_expr = ref->metric_expr; + i++; + } + }; + + expr->metric_refs = metric_refs; + expr->metric_expr = m->metric_expr; + expr->metric_name = m->metric_name; + expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; - expr->runtime = eg->runtime; + expr->runtime = m->runtime; list_add(&expr->nd, &me->head); } @@ -552,123 +639,339 @@ int __weak arch_get_runtimeparam(void) return 1; } -static int __metricgroup__add_metric(struct list_head *group_list, - struct pmu_event *pe, - bool metric_no_group, - int runtime) +static int __add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + int runtime, + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids) { - struct egroup *eg; + struct metric_ref_node *ref; + struct metric *m; - eg = malloc(sizeof(*eg)); - if (!eg) - return -ENOMEM; + if (*mp == NULL) { + /* + * We got in here for the parent group, + * allocate it and put it on the list. + */ + m = zalloc(sizeof(*m)); + if (!m) + return -ENOMEM; + + expr__ctx_init(&m->pctx); + m->metric_name = pe->metric_name; + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + INIT_LIST_HEAD(&m->metric_refs); + m->metric_refs_cnt = 0; + *mp = m; + + parent = expr_ids__alloc(ids); + if (!parent) { + free(m); + return -EINVAL; + } + + parent->id = strdup(pe->metric_name); + if (!parent->id) { + free(m); + return -ENOMEM; + } + } else { + /* + * We got here for the referenced metric, via the + * recursive metricgroup__add_metric call, add + * it to the parent group. + */ + m = *mp; - expr__ctx_init(&eg->pctx); - eg->metric_name = pe->metric_name; - eg->metric_expr = pe->metric_expr; - eg->metric_unit = pe->unit; - eg->runtime = runtime; - eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + ref = malloc(sizeof(*ref)); + if (!ref) + return -ENOMEM; - if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) { - expr__ctx_clear(&eg->pctx); - free(eg); + /* + * Intentionally passing just const char pointers, + * from 'pe' object, so they never go away. We don't + * need to change them, so there's no need to create + * our own copy. + */ + ref->metric_name = pe->metric_name; + ref->metric_expr = pe->metric_expr; + + list_add(&ref->list, &m->metric_refs); + m->metric_refs_cnt++; + } + + /* Force all found IDs in metric to have us as parent ID. */ + WARN_ON_ONCE(!parent); + m->pctx.parent = parent; + + /* + * For both the parent and referenced metrics, we parse + * all the metric's IDs and add it to the parent context. + */ + if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { + expr__ctx_clear(&m->pctx); + free(m); return -EINVAL; } - if (list_empty(group_list)) - list_add(&eg->nd, group_list); + /* + * We add new group only in the 'parent' call, + * so bail out for referenced metric case. + */ + if (m->metric_refs_cnt) + return 0; + + if (list_empty(metric_list)) + list_add(&m->nd, metric_list); else { struct list_head *pos; /* Place the largest groups at the front. */ - list_for_each_prev(pos, group_list) { - struct egroup *old = list_entry(pos, struct egroup, nd); + list_for_each_prev(pos, metric_list) { + struct metric *old = list_entry(pos, struct metric, nd); - if (hashmap__size(&eg->pctx.ids) <= + if (hashmap__size(&m->pctx.ids) <= hashmap__size(&old->pctx.ids)) break; } - list_add(&eg->nd, pos); + list_add(&m->nd, pos); } return 0; } +#define map_for_each_event(__pe, __idx, __map) \ + for (__idx = 0, __pe = &__map->table[__idx]; \ + __pe->name || __pe->metric_group || __pe->metric_name; \ + __pe = &__map->table[++__idx]) + +#define map_for_each_metric(__pe, __idx, __map, __metric) \ + map_for_each_event(__pe, __idx, __map) \ + if (__pe->metric_expr && \ + (match_metric(__pe->metric_group, __metric) || \ + match_metric(__pe->metric_name, __metric))) + +static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map) +{ + struct pmu_event *pe; + int i; + + map_for_each_event(pe, i, map) { + if (match_metric(pe->metric_name, metric)) + return pe; + } + + return NULL; +} + +static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, + struct expr_ids *ids) +{ + struct expr_id_data *data; + struct expr_id *p; + int ret; + + /* + * We get the parent referenced by 'id' argument and + * traverse through all the parent object IDs to check + * if we already processed 'id', if we did, it's recursion + * and we fail. + */ + ret = expr__get_id(&m->pctx, id, &data); + if (ret) + return ret; + + p = data->parent; + + while (p->parent) { + if (!strcmp(p->id, id)) { + pr_err("failed: recursion detected for %s\n", id); + return -1; + } + p = p->parent; + } + + /* + * If we are over the limit of static entris, the metric + * is too difficult/nested to process, fail as well. + */ + p = expr_ids__alloc(ids); + if (!p) { + pr_err("failed: too many nested metrics\n"); + return -EINVAL; + } + + p->id = strdup(id); + p->parent = data->parent; + *parent = p; + + return p->id ? 0 : -ENOMEM; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **mp, + struct expr_id *parent, + struct expr_ids *ids); + +static int __resolve_metric(struct metric *m, + bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + bool all; + int ret; + + /* + * Iterate all the parsed IDs and if there's metric, + * add it to the context. + */ + do { + all = true; + hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { + struct expr_id *parent; + struct pmu_event *pe; + + pe = find_metric(cur->key, map); + if (!pe) + continue; + + ret = recursion_check(m, cur->key, &parent, ids); + if (ret) + return ret; + + all = false; + /* The metric key itself needs to go out.. */ + expr__del_id(&m->pctx, cur->key); + + /* ... and it gets resolved to the parent context. */ + ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); + if (ret) + return ret; + + /* + * We added new metric to hashmap, so we need + * to break the iteration and start over. + */ + break; + } + } while (!all); + + return 0; +} + +static int resolve_metric(bool metric_no_group, + struct list_head *metric_list, + struct pmu_events_map *map, + struct expr_ids *ids) +{ + struct metric *m; + int err; + + list_for_each_entry(m, metric_list, nd) { + err = __resolve_metric(m, metric_no_group, metric_list, map, ids); + if (err) + return err; + } + return 0; +} + +static int add_metric(struct list_head *metric_list, + struct pmu_event *pe, + bool metric_no_group, + struct metric **m, + struct expr_id *parent, + struct expr_ids *ids) +{ + struct metric *orig = *m; + int ret = 0; + + pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + + if (!strstr(pe->metric_expr, "?")) { + ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); + } else { + int j, count; + + count = arch_get_runtimeparam(); + + /* This loop is added to create multiple + * events depend on count value and add + * those events to metric_list. + */ + + for (j = 0; j < count && !ret; j++, *m = orig) + ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); + } + + return ret; +} + static int metricgroup__add_metric(const char *metric, bool metric_no_group, struct strbuf *events, - struct list_head *group_list) + struct list_head *metric_list, + struct pmu_events_map *map) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct expr_ids ids = { .cnt = 0, }; struct pmu_event *pe; - struct egroup *eg; + struct metric *m; + LIST_HEAD(list); int i, ret; bool has_match = false; - if (!map) - return 0; + map_for_each_metric(pe, i, map, metric) { + has_match = true; + m = NULL; - for (i = 0; ; i++) { - pe = &map->table[i]; + ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); + if (ret) + return ret; - if (!pe->name && !pe->metric_group && !pe->metric_name) { - /* End of pmu events. */ - if (!has_match) - return -EINVAL; - break; - } - if (!pe->metric_expr) - continue; - if (match_metric(pe->metric_group, metric) || - match_metric(pe->metric_name, metric)) { - has_match = true; - pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); - - if (!strstr(pe->metric_expr, "?")) { - ret = __metricgroup__add_metric(group_list, - pe, - metric_no_group, - 1); - if (ret) - return ret; - } else { - int j, count; - - count = arch_get_runtimeparam(); - - /* This loop is added to create multiple - * events depend on count value and add - * those events to group_list. - */ - - for (j = 0; j < count; j++) { - ret = __metricgroup__add_metric( - group_list, pe, - metric_no_group, j); - if (ret) - return ret; - } - } - } + /* + * Process any possible referenced metrics + * included in the expression. + */ + ret = resolve_metric(metric_no_group, + &list, map, &ids); + if (ret) + return ret; } - list_for_each_entry(eg, group_list, nd) { + + /* End of pmu events. */ + if (!has_match) + return -EINVAL; + + list_for_each_entry(m, &list, nd) { if (events->len > 0) strbuf_addf(events, ","); - if (eg->has_constraint) { + if (m->has_constraint) { metricgroup__add_metric_non_group(events, - &eg->pctx); + &m->pctx); } else { metricgroup__add_metric_weak_group(events, - &eg->pctx); + &m->pctx); } } + + list_splice(&list, metric_list); + expr_ids__exit(&ids); return 0; } static int metricgroup__add_metric_list(const char *list, bool metric_no_group, struct strbuf *events, - struct list_head *group_list) + struct list_head *metric_list, + struct pmu_events_map *map) { char *llist, *nlist, *p; int ret = -EINVAL; @@ -683,7 +986,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, while ((p = strsep(&llist, ",")) != NULL) { ret = metricgroup__add_metric(p, metric_no_group, events, - group_list); + metric_list, map); if (ret == -EINVAL) { fprintf(stderr, "Cannot find metric or group `%s'\n", p); @@ -698,50 +1001,88 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, return ret; } -static void metricgroup__free_egroups(struct list_head *group_list) +static void metric__free_refs(struct metric *metric) { - struct egroup *eg, *egtmp; + struct metric_ref_node *ref, *tmp; - list_for_each_entry_safe (eg, egtmp, group_list, nd) { - expr__ctx_clear(&eg->pctx); - list_del_init(&eg->nd); - free(eg); + list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { + list_del(&ref->list); + free(ref); } } -int metricgroup__parse_groups(const struct option *opt, - const char *str, - bool metric_no_group, - bool metric_no_merge, - struct rblist *metric_events) +static void metricgroup__free_metrics(struct list_head *metric_list) +{ + struct metric *m, *tmp; + + list_for_each_entry_safe (m, tmp, metric_list, nd) { + metric__free_refs(m); + expr__ctx_clear(&m->pctx); + list_del_init(&m->nd); + free(m); + } +} + +static int parse_groups(struct evlist *perf_evlist, const char *str, + bool metric_no_group, + bool metric_no_merge, + struct perf_pmu *fake_pmu, + struct rblist *metric_events, + struct pmu_events_map *map) { struct parse_events_error parse_error; - struct evlist *perf_evlist = *(struct evlist **)opt->value; struct strbuf extra_events; - LIST_HEAD(group_list); + LIST_HEAD(metric_list); int ret; if (metric_events->nr_entries == 0) metricgroup__rblist_init(metric_events); ret = metricgroup__add_metric_list(str, metric_no_group, - &extra_events, &group_list); + &extra_events, &metric_list, map); if (ret) return ret; pr_debug("adding %s\n", extra_events.buf); bzero(&parse_error, sizeof(parse_error)); - ret = parse_events(perf_evlist, extra_events.buf, &parse_error); + ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); if (ret) { parse_events_print_error(&parse_error, extra_events.buf); goto out; } strbuf_release(&extra_events); - ret = metricgroup__setup_events(&group_list, metric_no_merge, + ret = metricgroup__setup_events(&metric_list, metric_no_merge, perf_evlist, metric_events); out: - metricgroup__free_egroups(&group_list); + metricgroup__free_metrics(&metric_list); return ret; } +int metricgroup__parse_groups(const struct option *opt, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events) +{ + struct evlist *perf_evlist = *(struct evlist **)opt->value; + struct pmu_events_map *map = perf_pmu__find_map(NULL); + + if (!map) + return 0; + + return parse_groups(perf_evlist, str, metric_no_group, + metric_no_merge, NULL, metric_events, map); +} + +int metricgroup__parse_groups_test(struct evlist *evlist, + struct pmu_events_map *map, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events) +{ + return parse_groups(evlist, str, metric_no_group, + metric_no_merge, &perf_pmu__fake, metric_events, map); +} + bool metricgroup__has_metric(const char *metric) { struct pmu_events_map *map = perf_pmu__find_map(NULL); diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 287850bcdeca1..62623a39cbecd 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -7,8 +7,10 @@ #include <stdbool.h> struct evsel; +struct evlist; struct option; struct rblist; +struct pmu_events_map; struct metric_event { struct rb_node nd; @@ -16,12 +18,18 @@ struct metric_event { struct list_head head; /* list of metric_expr */ }; +struct metric_ref { + const char *metric_name; + const char *metric_expr; +}; + struct metric_expr { struct list_head nd; const char *metric_expr; const char *metric_name; const char *metric_unit; struct evsel **metric_events; + struct metric_ref *metric_refs; int runtime; }; @@ -34,8 +42,16 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_merge, struct rblist *metric_events); +int metricgroup__parse_groups_test(struct evlist *evlist, + struct pmu_events_map *map, + const char *str, + bool metric_no_group, + bool metric_no_merge, + struct rblist *metric_events); + void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(void); +void metricgroup__rblist_exit(struct rblist *metric_events); #endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3decbb203846a..9f7260e691134 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -767,8 +767,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_state->error->help = strdup("(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, 0, + strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -784,36 +784,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state, return 0; list_for_each_entry(term, head_config, list) { - char errbuf[BUFSIZ]; int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - snprintf(errbuf, sizeof(errbuf), - "Invalid config term for BPF object"); - errbuf[BUFSIZ - 1] = '\0'; - - parse_state->error->idx = term->err_term; - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, term->err_term, + strdup("Invalid config term for BPF object"), + NULL); return -EINVAL; } err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos); if (err) { + char errbuf[BUFSIZ]; + int idx; + bpf__strerror_config_obj(obj, term, parse_state->evlist, &error_pos, err, errbuf, sizeof(errbuf)); - parse_state->error->help = strdup( + + if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) + idx = term->err_val; + else + idx = term->err_term + error_pos; + + parse_events__handle_error(parse_state->error, idx, + strdup(errbuf), + strdup( "Hint:\tValid config terms:\n" " \tmap:[<arraymap>].value<indices>=[value]\n" " \tmap:[<eventmap>].event<indices>=[event]\n" "\n" " \twhere <indices> is something like [0,3...5] or [all]\n" -" \t(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); - if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) - parse_state->error->idx = term->err_val; - else - parse_state->error->idx = term->err_term + error_pos; +" \t(add -v to see detail)")); return err; } } @@ -877,8 +879,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_state->error->help = strdup("(add -v to see detail)"); - parse_state->error->str = strdup(errbuf); + parse_events__handle_error(parse_state->error, 0, + strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -1450,7 +1452,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, fprintf(stderr, "' that may result in non-fatal errors\n"); } - pmu = perf_pmu__find(name); + pmu = parse_state->fake_pmu ?: perf_pmu__find(name); if (!pmu) { char *err_str; @@ -1483,7 +1485,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } } - if (perf_pmu__check_alias(pmu, head_config, &info)) + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info)) return -EINVAL; if (verbose > 1) { @@ -1516,7 +1518,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) return -ENOMEM; - if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { + if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct evsel_config_term *pos, *tmp; list_for_each_entry_safe(pos, tmp, &config_terms, list) { @@ -2017,6 +2019,32 @@ err: perf_pmu__parse_cleanup(); } +/* + * This function injects special term in + * perf_pmu_events_list so the test code + * can check on this functionality. + */ +int perf_pmu__test_parse_init(void) +{ + struct perf_pmu_event_symbol *list; + + list = malloc(sizeof(*list) * 1); + if (!list) + return -ENOMEM; + + list->type = PMU_EVENT_SYMBOL; + list->symbol = strdup("read"); + + if (!list->symbol) { + free(list); + return -ENOMEM; + } + + perf_pmu_events_list = list; + perf_pmu_events_list_num = 1; + return 0; +} + enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name) { @@ -2078,6 +2106,8 @@ int parse_events_terms(struct list_head *terms, const char *str) int ret; ret = parse_events__scanner(str, &parse_state); + perf_pmu__parse_cleanup(); + if (!ret) { list_splice(parse_state.terms, terms); zfree(&parse_state.terms); @@ -2088,15 +2118,16 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct evlist *evlist, const char *str, - struct parse_events_error *err) +int __parse_events(struct evlist *evlist, const char *str, + struct parse_events_error *err, struct perf_pmu *fake_pmu) { struct parse_events_state parse_state = { - .list = LIST_HEAD_INIT(parse_state.list), - .idx = evlist->core.nr_entries, - .error = err, - .evlist = evlist, - .stoken = PE_START_EVENTS, + .list = LIST_HEAD_INIT(parse_state.list), + .idx = evlist->core.nr_entries, + .error = err, + .evlist = evlist, + .stoken = PE_START_EVENTS, + .fake_pmu = fake_pmu, }; int ret; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 1fe23a2f9b36e..00cde7d2e30c7 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -33,8 +33,15 @@ const char *event_type(int type); int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); -int parse_events(struct evlist *evlist, const char *str, - struct parse_events_error *error); +int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error, + struct perf_pmu *fake_pmu); + +static inline int parse_events(struct evlist *evlist, const char *str, + struct parse_events_error *err) +{ + return __parse_events(evlist, str, err, NULL); +} + int parse_events_terms(struct list_head *terms, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); int exclude_perf(const struct option *opt, const char *arg, int unset); @@ -127,9 +134,10 @@ struct parse_events_state { int idx; int nr_groups; struct parse_events_error *error; - struct evlist *evlist; + struct evlist *evlist; struct list_head *terms; int stoken; + struct perf_pmu *fake_pmu; }; void parse_events__handle_error(struct parse_events_error *err, int idx, @@ -253,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ +int perf_pmu__test_parse_init(void); + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 002802e17059e..3ca5fd2829ca2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base) return __value(yylval, text, base, PE_VALUE); } -static int raw(yyscan_t scanner) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - return __value(yylval, text + 1, 16, PE_RAW); -} - static int str(yyscan_t scanner, int token) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token) return token; } +static int raw(yyscan_t scanner) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL) + return str(scanner, PE_NAME); + + return __value(yylval, text + 1, 16, PE_RAW); +} + static bool isbpf_suffix(char *text) { int len = strlen(text); @@ -129,12 +132,16 @@ do { \ yyless(0); \ } while (0) -static int pmu_str_check(yyscan_t scanner) +static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state) { YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); yylval->str = strdup(text); + + if (parse_state->fake_pmu) + return PE_PMU_EVENT_FAKE; + switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; @@ -289,6 +296,7 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } r{num_raw_hex} { return raw(yyscanner); } +r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } @@ -376,7 +384,7 @@ r{num_raw_hex} { return raw(yyscanner); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } -{name} { return pmu_str_check(yyscanner); } +{name} { return pmu_str_check(yyscanner, _parse_state); } {name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } - { return '-'; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index acef87d9af588..b9fb91fdc5de9 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list, %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %type <num> PE_VALUE @@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list, %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM %destructor { free ($$); } <str> %type <term> event_term @@ -356,6 +356,43 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc YYABORT; $$ = list; } +| +PE_PMU_EVENT_FAKE sep_dc +{ + struct list_head *list; + int err; + + list = alloc_list(); + if (!list) + YYABORT; + + err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false); + free($1); + if (err < 0) { + free(list); + YYABORT; + } + $$ = list; +} +| +PE_PMU_EVENT_FAKE opt_pmu_config +{ + struct list_head *list; + int err; + + list = alloc_list(); + if (!list) + YYABORT; + + err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false); + free($1); + parse_events_terms__delete($2); + if (err < 0) { + free(list); + YYABORT; + } + $$ = list; +} value_sym: PE_VALUE_SYM_HW diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 1337965673d70..3840d02f0f7b0 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel) evsel->core.attr.context_switch = 1; } +static void perf_probe_text_poke(struct evsel *evsel) +{ + evsel->core.attr.text_poke = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void) return perf_probe_api(perf_probe_context_switch); } +bool perf_can_record_text_poke_events(void) +{ + return perf_probe_api(perf_probe_text_poke); +} + bool perf_can_record_cpu_wide(void) { struct perf_event_attr attr = { diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index 706c3c6426e2f..d5506a983a94a 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -9,6 +9,7 @@ bool perf_can_aux_sample(void); bool perf_can_comm_exec(void); bool perf_can_record_cpu_wide(void); bool perf_can_record_switch_events(void); +bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index b94fa07f5d326..e67a227c0ce7e 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); + PRINT_ATTRf(text_poke, p_unsigned); return ret; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 93fe72a9dc0b2..f1688e1f6ed78 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -26,6 +26,8 @@ #include "strbuf.h" #include "fncache.h" +struct perf_pmu perf_pmu__fake; + struct perf_pmu_format { char *name; int value; @@ -1400,6 +1402,7 @@ struct sevent { char *pmu; char *metric_expr; char *metric_name; + int is_cpu; }; static int cmp_sevent(const void *a, const void *b) @@ -1416,6 +1419,11 @@ static int cmp_sevent(const void *a, const void *b) if (n) return n; } + + /* Order CPU core events to be first */ + if (as->is_cpu != bs->is_cpu) + return bs->is_cpu - as->is_cpu; + return strcmp(as->name, bs->name); } @@ -1475,7 +1483,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = !strcmp(pmu->name, "cpu"); + bool is_cpu = is_pmu_core(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1507,6 +1515,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, aliases[j].pmu = pmu->name; aliases[j].metric_expr = alias->metric_expr; aliases[j].metric_name = alias->metric_name; + aliases[j].is_cpu = is_cpu; j++; } if (pmu->selectable && diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index f971d9aa4570a..44ccbdbb1c374 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -43,6 +43,8 @@ struct perf_pmu { struct list_head list; /* ELEM */ }; +extern struct perf_pmu perf_pmu__fake; + struct perf_pmu_info { const char *unit; const char *metric_expr; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index df713a5d1e26a..99d36ac77c084 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -375,9 +375,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo, /* Find the address of given function */ map__for_each_symbol_by_name(map, pp->function, sym) { - if (uprobes) + if (uprobes) { address = sym->start; - else + if (sym->type == STT_GNU_IFUNC) + pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n" + "Consider identifying the final function used at run time and set the probe directly on that.\n", + pp->function); + } else address = map->unmap_ip(map, sym->start) - map->reloc; break; } @@ -2968,6 +2972,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + /* There can be duplicated symbols in the map */ + for (i = 0; i < j; i++) + if (sym->start == syms[i]->start) { + pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n", + sym->name, sym->start); + break; + } + if (i != j) + continue; + tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 55924255c5355..659024342e9ac 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1408,6 +1408,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev, char *type; int i, j, ret; + if (!ntevs) + return -ENOENT; + for (i = 0; i < pev->nargs; i++) { type = NULL; for (j = 0; j < ntevs; j++) { @@ -1464,7 +1467,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); - if (ret < 0) { + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); zfree(tevs); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 39d1de4b2a369..03678ff255391 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -48,6 +48,7 @@ struct record_opts { bool sample_id; bool no_bpf_event; bool kcore; + bool text_poke; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; @@ -61,7 +62,7 @@ struct record_opts { const char *auxtrace_snapshot_opts; const char *auxtrace_sample_opts; bool sample_transaction; - unsigned initial_delay; + int initial_delay; bool use_clockid; clockid_t clockid; u64 clockid_res_ns; @@ -70,6 +71,8 @@ struct record_opts { int mmap_flush; unsigned int comp_level; unsigned int nr_threads_synthesize; + int ctl_fd; + int ctl_fd_ack; }; extern const char * const *record_usage; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a157e84a04a5..ffbc9d35a383f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -115,12 +115,12 @@ static int perf_session__open(struct perf_session *session) if (perf_header__has_feat(&session->header, HEADER_STAT)) return 0; - if (!perf_evlist__valid_sample_type(session->evlist)) { + if (!evlist__valid_sample_type(session->evlist)) { pr_err("non matching sample_type\n"); return -1; } - if (!perf_evlist__valid_sample_id_all(session->evlist)) { + if (!evlist__valid_sample_id_all(session->evlist)) { pr_err("non matching sample_id_all\n"); return -1; } @@ -252,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data, /* * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is - * processed, so perf_evlist__sample_id_all is not meaningful here. + * processed, so evlist__sample_id_all is not meaningful here. */ if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps && - tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) { + tool->ordered_events && !evlist__sample_id_all(session->evlist)) { dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); tool->ordered_events = false; } @@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->ksymbol = perf_event__process_ksymbol; if (tool->bpf == NULL) tool->bpf = perf_event__process_bpf; + if (tool->text_poke == NULL) + tool->text_poke = perf_event__process_text_poke; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) swap_sample_id_all(event, &event->context_switch + 1); } +static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all) +{ + event->text_poke.addr = bswap_64(event->text_poke.addr); + event->text_poke.old_len = bswap_16(event->text_poke.old_len); + event->text_poke.new_len = bswap_16(event->text_poke.new_len); + + if (sample_id_all) { + size_t len = sizeof(event->text_poke.old_len) + + sizeof(event->text_poke.new_len) + + event->text_poke.old_len + + event->text_poke.new_len; + void *data = &event->text_poke.old_len; + + data += PERF_ALIGN(len, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1160,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(evlist); + u64 sample_type = __evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(evlist)) { + !evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines, return tool->ksymbol(tool, event, sample, machine); case PERF_RECORD_BPF_EVENT: return tool->bpf(tool, event, sample, machine); + case PERF_RECORD_TEXT_POKE: + return tool->text_poke(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1655,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, return -1; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, evlist__sample_id_all(session->evlist)); out_parse_sample: @@ -1704,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session, int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(evlist)); + event_swap(event, evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a7c13a88ecb9f..e1ba6c1b916a6 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -730,25 +730,17 @@ static void print_smi_cost(struct perf_stat_config *config, out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); } -static void generic_metric(struct perf_stat_config *config, - const char *metric_expr, - struct evsel **metric_events, - char *name, - const char *metric_name, - const char *metric_unit, - int runtime, - int cpu, - struct perf_stat_output_ctx *out, - struct runtime_stat *st) +static int prepare_metric(struct evsel **metric_events, + struct metric_ref *metric_refs, + struct expr_parse_ctx *pctx, + int cpu, + struct runtime_stat *st) { - print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx pctx; - double ratio, scale; - int i; - void *ctxp = out->ctx; + double scale; char *n, *pn; + int i, j, ret; - expr__ctx_init(&pctx); + expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -771,7 +763,7 @@ static void generic_metric(struct perf_stat_config *config, n = strdup(metric_events[i]->name); if (!n) - return; + return -ENOMEM; /* * This display code with --no-merge adds [cpu] postfixes. * These are not supported by the parser. Remove everything @@ -782,11 +774,42 @@ static void generic_metric(struct perf_stat_config *config, *pn = 0; if (metric_total) - expr__add_id(&pctx, n, metric_total); + expr__add_id_val(pctx, n, metric_total); else - expr__add_id(&pctx, n, avg_stats(stats)*scale); + expr__add_id_val(pctx, n, avg_stats(stats)*scale); } + for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { + ret = expr__add_ref(pctx, &metric_refs[j]); + if (ret) + return ret; + } + + return i; +} + +static void generic_metric(struct perf_stat_config *config, + const char *metric_expr, + struct evsel **metric_events, + struct metric_ref *metric_refs, + char *name, + const char *metric_name, + const char *metric_unit, + int runtime, + int cpu, + struct perf_stat_output_ctx *out, + struct runtime_stat *st) +{ + print_metric_t print_metric = out->print_metric; + struct expr_parse_ctx pctx; + double ratio, scale; + int i; + void *ctxp = out->ctx; + + i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); + if (i < 0) + return; + if (!metric_events[i]) { if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; @@ -827,6 +850,20 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_clear(&pctx); } +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) +{ + struct expr_parse_ctx pctx; + double ratio; + + if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) + return 0.; + + if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) + return 0.; + + return ratio; +} + void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, double avg, int cpu, @@ -1035,8 +1072,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, NULL, 1, cpu, out, st); + generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, + evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; @@ -1064,7 +1101,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (num++ > 0) out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, - evsel->name, mexp->metric_name, + mexp->metric_refs, evsel->name, mexp->metric_name, mexp->metric_unit, mexp->runtime, cpu, out, st); } } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f75ae679eb281..f8778cffd9419 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -116,7 +116,7 @@ struct perf_stat_config { FILE *output; unsigned int interval; unsigned int timeout; - unsigned int initial_delay; + int initial_delay; unsigned int unit_width; unsigned int metric_only_len; int times; @@ -133,6 +133,8 @@ struct perf_stat_config { struct perf_cpu_map *cpus_aggr_map; u64 *walltime_run; struct rblist metric_events; + int ctl_fd; + int ctl_fd_ack; }; void perf_stat__set_big_num(int set); @@ -230,4 +232,7 @@ perf_evlist__print_counters(struct evlist *evlist, struct target *_target, struct timespec *ts, int argc, const char **argv); + +struct metric_expr; +double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); #endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5ddf84dcbae79..053468ffcb8af 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -666,6 +666,8 @@ static bool symbol__is_idle(const char *name) "poll_idle", "ppc64_runlatch_off", "pseries_dedicated_idle_sleep", + "psw_idle", + "psw_idle_exit", NULL }; int i; @@ -1563,6 +1565,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 3fb67bd31e4a0..bbbc0dcd461ff 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -57,7 +57,8 @@ struct perf_tool { throttle, unthrottle, ksymbol, - bpf; + bpf, + text_poke; event_attr_op attr; event_attr_op event_update; diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README index a12d295a09d84..4f048789b2608 100644 --- a/tools/testing/ktest/examples/README +++ b/tools/testing/ktest/examples/README @@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against lots of different architectures. It only does build tests, but makes it easy to compile test different archs. You can download the arch cross compilers from: - http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ + https://kernel.org/pub/tools/crosstool/files/bin/x86_64/ test.conf - A generic example of a config. This is based on an actual config used to perform real testing. diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf index 6907f32590b28..3b15e85f26bd6 100644 --- a/tools/testing/ktest/examples/crosstests.conf +++ b/tools/testing/ktest/examples/crosstests.conf @@ -3,7 +3,7 @@ # # In this config, it is expected that the tool chains from: # -# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/ +# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/ # # running on a x86_64 system have been downloaded and installed into: # diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 7570e36d636d8..cb16d2aac51c3 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -11,6 +11,7 @@ use File::Path qw(mkpath); use File::Copy qw(cp); use FileHandle; use FindBin; +use IO::Handle; my $VERSION = "0.2"; @@ -81,6 +82,8 @@ my %default = ( "IGNORE_UNUSED" => 0, ); +my $test_log_start = 0; + my $ktest_config = "ktest.conf"; my $version; my $have_version = 0; @@ -98,6 +101,7 @@ my $final_post_ktest; my $pre_ktest; my $post_ktest; my $pre_test; +my $pre_test_die; my $post_test; my $pre_build; my $post_build; @@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin; my $mailto; my $mailer; my $mail_path; +my $mail_max_size; my $mail_command; my $email_on_error; my $email_when_finished; @@ -259,6 +264,7 @@ my %option_map = ( "MAILTO" => \$mailto, "MAILER" => \$mailer, "MAIL_PATH" => \$mail_path, + "MAIL_MAX_SIZE" => \$mail_max_size, "MAIL_COMMAND" => \$mail_command, "EMAIL_ON_ERROR" => \$email_on_error, "EMAIL_WHEN_FINISHED" => \$email_when_finished, @@ -273,6 +279,7 @@ my %option_map = ( "PRE_KTEST" => \$pre_ktest, "POST_KTEST" => \$post_ktest, "PRE_TEST" => \$pre_test, + "PRE_TEST_DIE" => \$pre_test_die, "POST_TEST" => \$post_test, "BUILD_TYPE" => \$build_type, "BUILD_OPTIONS" => \$build_options, @@ -507,9 +514,7 @@ EOF sub _logit { if (defined($opt{"LOG_FILE"})) { - open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; - print OUT @_; - close(OUT); + print LOG @_; } } @@ -909,6 +914,12 @@ sub process_expression { } } + if ($val =~ s/^\s*NOT\s+(.*)//) { + my $express = $1; + my $ret = process_expression($name, $express); + return !$ret; + } + if ($val =~ /^\s*0\s*$/) { return 0; } elsif ($val =~ /^\s*\d+\s*$/) { @@ -1485,8 +1496,32 @@ sub dodie { if ($email_on_error) { my $name = get_test_name; + my $log_file; + + if (defined($opt{"LOG_FILE"})) { + my $whence = 0; # beginning of file + my $pos = $test_log_start; + + if (defined($mail_max_size)) { + my $log_size = tell LOG; + $log_size -= $test_log_start; + if ($log_size > $mail_max_size) { + $whence = 2; # end of file + $pos = - $mail_max_size; + } + } + $log_file = "$tmpdir/log"; + open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)"; + open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n"; + seek(L, $pos, $whence); + while (<L>) { + print O; + } + close O; + close L; + } send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n"); + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { @@ -1508,7 +1543,7 @@ sub create_pty { my $TIOCGPTN = 0x80045430; sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or - dodie "Cant open /dev/ptmx"; + dodie "Can't open /dev/ptmx"; # unlockpt() $tmp = pack("i", 0); @@ -1772,8 +1807,6 @@ sub run_command { (fail "unable to exec $command" and return 0); if (defined($opt{"LOG_FILE"})) { - open(LOG, ">>$opt{LOG_FILE}") or - dodie "failed to write to log"; $dolog = 1; } @@ -1821,7 +1854,6 @@ sub run_command { } close(CMD); - close(LOG) if ($dolog); close(RD) if ($dord); $end_time = time; @@ -3188,6 +3220,8 @@ sub config_bisect_end { doprint "***************************************\n\n"; } +my $pass = 1; + sub run_config_bisect { my ($good, $bad, $last_result) = @_; my $reset = ""; @@ -3210,11 +3244,15 @@ sub run_config_bisect { $ret = run_config_bisect_test $config_bisect_type; if ($ret) { - doprint "NEW GOOD CONFIG\n"; + doprint "NEW GOOD CONFIG ($pass)\n"; + system("cp $output_config $tmpdir/good_config.tmp.$pass"); + $pass++; # Return 3 for good config return 3; } else { - doprint "NEW BAD CONFIG\n"; + doprint "NEW BAD CONFIG ($pass)\n"; + system("cp $output_config $tmpdir/bad_config.tmp.$pass"); + $pass++; # Return 4 for bad config return 4; } @@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) { } } -if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) { - unlink $opt{"LOG_FILE"}; +if (defined($opt{"LOG_FILE"})) { + if ($opt{"CLEAR_LOG"}) { + unlink $opt{"LOG_FILE"}; + } + open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + LOG->autoflush(1); } doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; @@ -4171,7 +4213,7 @@ sub find_mailer { } sub do_send_mail { - my ($subject, $message) = @_; + my ($subject, $message, $file) = @_; if (!defined($mail_path)) { # find the mailer @@ -4181,16 +4223,30 @@ sub do_send_mail { } } + my $header_file = "$tmpdir/header"; + open (HEAD, ">$header_file") or die "Can not create $header_file\n"; + print HEAD "To: $mailto\n"; + print HEAD "Subject: $subject\n\n"; + print HEAD "$message\n"; + close HEAD; + if (!defined($mail_command)) { if ($mailer eq "mail" || $mailer eq "mailx") { - $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'"; + $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO"; } elsif ($mailer eq "sendmail" ) { - $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO"; + $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO"; } else { die "\nYour mailer: $mailer is not supported.\n"; } } + if (defined($file)) { + $mail_command =~ s/\$BODY_FILE/$file/g; + } else { + $mail_command =~ s/\$BODY_FILE//g; + } + + $mail_command =~ s/\$HEADER_FILE/$header_file/g; $mail_command =~ s/\$MAILER/$mailer/g; $mail_command =~ s/\$MAIL_PATH/$mail_path/g; $mail_command =~ s/\$MAILTO/$mailto/g; @@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { } doprint "\n\n"; + + if (defined($opt{"LOG_FILE"})) { + $test_log_start = tell(LOG); + } + doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n"; if (defined($pre_test)) { - run_command $pre_test; + my $ret = run_command $pre_test; + if (!$ret && defined($pre_test_die) && + $pre_test_die) { + dodie "failed to pre_test\n"; + } } unlink $dmesg; @@ -4441,4 +4506,10 @@ if ($email_when_finished) { send_email("KTEST: Your test has finished!", "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } + +if (defined($opt{"LOG_FILE"})) { + print "\n See $opt{LOG_FILE} for the record of results.\n\n"; + close LOG; +} + exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 27666b8007edb..5e7d1d7297529 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -442,6 +442,19 @@ # Users can cancel the test by Ctrl^C # (default 0) #EMAIL_WHEN_CANCELED = 1 +# +# If a test ends with an error and EMAIL_ON_ERROR is set as well +# as a LOG_FILE is defined, then the log of the failing test will +# be included in the email that is sent. +# It is possible that the log may be very large, in which case, +# only the last amount of the log should be sent. To limit how +# much of the log is sent, set MAIL_MAX_SIZE. This will be the +# size in bytes of the last portion of the log of the failed +# test file. That is, if this is set to 100000, then only the +# last 100 thousand bytes of the log file will be included in +# the email. +# (default undef) +#MAIL_MAX_SIZE = 1000000 # Start a test setup. If you leave this off, all options # will be default and the test will run once. @@ -557,6 +570,11 @@ # default (undefined) #PRE_TEST = ${SSH} reboot_to_special_kernel +# To kill the entire test if PRE_TEST is defined but fails set this +# to 1. +# (default 0) +#PRE_TEST_DIE = 1 + # If there is a command you want to run after the individual test case # completes, then you can set this option. # diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a8ee5c4d41ebb..a1a5dc645b401 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -173,6 +173,9 @@ struct nfit_test_fw { u64 version; u32 size_received; u64 end_time; + bool armed; + bool missed_activate; + unsigned long last_activate; }; struct nfit_test { @@ -345,7 +348,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t, __func__, t, nd_cmd, buf_len, idx); if (fw->state == FW_STATE_UPDATED) { - /* update already done, need cold boot */ + /* update already done, need activation */ nd_cmd->status = 0x20007; return 0; } @@ -430,6 +433,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, } dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; + fw->missed_activate = false; /* fall through */ case FW_STATE_UPDATED: nd_cmd->status = 0; @@ -1178,6 +1182,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t, return 0; } +static unsigned long last_activate; + +static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t, + struct nd_intel_bus_fw_activate_businfo *nd_cmd, + unsigned int buf_len) +{ + int i, armed = 0; + int state; + u64 tmo; + + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (fw->armed) + armed++; + } + + /* + * Emulate 3 second activation max, and 1 second incremental + * quiesce time per dimm requiring multiple activates to get all + * DIMMs updated. + */ + if (armed) + state = ND_INTEL_FWA_ARMED; + else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ)) + state = ND_INTEL_FWA_IDLE; + else + state = ND_INTEL_FWA_BUSY; + + tmo = armed * USEC_PER_SEC; + *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) { + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE + | ND_INTEL_BUS_FWA_CAP_RESET, + .state = state, + .activate_tmo = tmo, + .cpu_quiesce_tmo = tmo, + .io_quiesce_tmo = tmo, + .max_quiesce_tmo = 3 * USEC_PER_SEC, + }; + + return 0; +} + +static int nvdimm_bus_intel_fw_activate(struct nfit_test *t, + struct nd_intel_bus_fw_activate *nd_cmd, + unsigned int buf_len) +{ + struct nd_intel_bus_fw_activate_businfo info; + u32 status = 0; + int i; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + if (info.state == ND_INTEL_FWA_BUSY) + status = ND_INTEL_BUS_FWA_STATUS_BUSY; + else if (info.activate_tmo > info.max_quiesce_tmo) + status = ND_INTEL_BUS_FWA_STATUS_TMO; + else if (info.state == ND_INTEL_FWA_IDLE) + status = ND_INTEL_BUS_FWA_STATUS_NOARM; + + dev_dbg(&t->pdev.dev, "status: %d\n", status); + nd_cmd->status = status; + if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO) + return 0; + + last_activate = jiffies; + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (!fw->armed) + continue; + if (fw->state != FW_STATE_UPDATED) + fw->missed_activate = true; + else + fw->state = FW_STATE_NEW; + fw->armed = false; + fw->last_activate = last_activate; + } + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t, + struct nd_intel_fw_activate_dimminfo *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nd_intel_bus_fw_activate_businfo info; + struct nfit_test_fw *fw = &t->fw[dimm]; + u32 result, state; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + + if (info.state == ND_INTEL_FWA_BUSY) + state = ND_INTEL_FWA_BUSY; + else if (info.state == ND_INTEL_FWA_IDLE) + state = ND_INTEL_FWA_IDLE; + else if (fw->armed) + state = ND_INTEL_FWA_ARMED; + else + state = ND_INTEL_FWA_IDLE; + + result = ND_INTEL_DIMM_FWA_NONE; + if (last_activate && fw->last_activate == last_activate && + state == ND_INTEL_FWA_IDLE) { + if (fw->missed_activate) + result = ND_INTEL_DIMM_FWA_NOTSTAGED; + else + result = ND_INTEL_DIMM_FWA_SUCCESS; + } + + *nd_cmd = (struct nd_intel_fw_activate_dimminfo) { + .result = result, + .state = state, + }; + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t, + struct nd_intel_fw_activate_arm *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nfit_test_fw *fw = &t->fw[dimm]; + + fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM; + nd_cmd->status = 0; + return 0; +} static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) { @@ -1192,6 +1324,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) return i; } +static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int len) +{ + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + unsigned int func = cmd; + unsigned int family = 0; + + if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *pkg = buf; + + len = pkg->nd_size_in; + family = pkg->nd_family; + buf = pkg->nd_payload; + func = pkg->nd_command; + } + dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n", + nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func, + len); + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4, + buf, min(len, 256u), true); +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -1205,6 +1360,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, cmd_rc = &__cmd_rc; *cmd_rc = 0; + nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len); + if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -1224,6 +1381,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, i = get_dimm(nfit_mem, func); if (i < 0) return i; + if (i >= NUM_DCR) { + dev_WARN_ONCE(&t->pdev.dev, 1, + "ND_CMD_CALL only valid for nfit_test0\n"); + return -EINVAL; + } switch (func) { case NVDIMM_INTEL_GET_SECURITY_STATE: @@ -1252,11 +1414,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, break; case NVDIMM_INTEL_OVERWRITE: rc = nd_intel_test_cmd_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_QUERY_OVERWRITE: rc = nd_intel_test_cmd_query_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_SET_MASTER_PASSPHRASE: rc = nd_intel_test_cmd_master_set_pass(t, @@ -1266,54 +1428,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nd_intel_test_cmd_master_secure_erase(t, buf, buf_len, i); break; + case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO: + rc = nd_intel_test_cmd_fw_activate_dimminfo( + t, buf, buf_len, i); + break; + case NVDIMM_INTEL_FW_ACTIVATE_ARM: + rc = nd_intel_test_cmd_fw_activate_arm( + t, buf, buf_len, i); + break; case ND_INTEL_ENABLE_LSS_STATUS: rc = nd_intel_test_cmd_set_lss_status(t, buf, buf_len); break; case ND_INTEL_FW_GET_INFO: rc = nd_intel_test_get_fw_info(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_START_UPDATE: rc = nd_intel_test_start_update(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_SEND_DATA: rc = nd_intel_test_send_data(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_UPDATE: rc = nd_intel_test_finish_fw(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_QUERY: rc = nd_intel_test_finish_query(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_SMART: rc = nfit_test_cmd_smart(buf, buf_len, - &t->smart[i - t->dcr_idx]); + &t->smart[i]); break; case ND_INTEL_SMART_THRESHOLD: rc = nfit_test_cmd_smart_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx]); + &t->smart_threshold[i]); break; case ND_INTEL_SMART_SET_THRESHOLD: rc = nfit_test_cmd_smart_set_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; case ND_INTEL_SMART_INJECT: rc = nfit_test_cmd_smart_inject(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; default: @@ -1353,9 +1520,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!nd_desc) return -ENOTTY; - if (cmd == ND_CMD_CALL) { + if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_NFIT) { func = call_pkg->nd_command; - buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; buf = (void *) call_pkg->nd_payload; @@ -1379,7 +1546,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, default: return -ENOTTY; } - } + } else if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_INTEL) { + func = call_pkg->nd_command; + buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; + buf = (void *) call_pkg->nd_payload; + + switch (func) { + case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO: + rc = nvdimm_bus_intel_fw_activate_businfo(t, + buf, buf_len); + return rc; + case NVDIMM_BUS_INTEL_FW_ACTIVATE: + rc = nvdimm_bus_intel_fw_activate(t, buf, + buf_len); + return rc; + default: + return -ENOTTY; + } + } else if (cmd == ND_CMD_CALL) + return -ENOTTY; if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; @@ -1805,6 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; unsigned int offset = 0, i; + unsigned long *acpi_mask; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -2507,10 +2694,10 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask); set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); @@ -2531,6 +2718,12 @@ static void nfit_test0_setup(struct nfit_test *t) &acpi_desc->dimm_cmd_force_en); set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en); + + acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]; + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask); + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask); } static void nfit_test1_setup(struct nfit_test *t) @@ -2699,14 +2892,18 @@ static int nfit_ctl_test(struct device *dev) struct acpi_nfit_desc *acpi_desc; const u64 test_val = 0x0123456789abcdefULL; unsigned long mask, cmd_size, offset; - union { - struct nd_cmd_get_config_size cfg_size; - struct nd_cmd_clear_error clear_err; - struct nd_cmd_ars_status ars_stat; - struct nd_cmd_ars_cap ars_cap; - char buf[sizeof(struct nd_cmd_ars_status) - + sizeof(struct nd_ars_record)]; - } cmds; + struct nfit_ctl_test_cmd { + struct nd_cmd_pkg pkg; + union { + struct nd_cmd_get_config_size cfg_size; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_status ars_stat; + struct nd_cmd_ars_cap ars_cap; + struct nd_intel_bus_fw_activate_businfo fwa_info; + char buf[sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record)]; + }; + } cmd; adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL); if (!adev) @@ -2731,11 +2928,15 @@ static int nfit_ctl_test(struct device *dev) .module = THIS_MODULE, .provider_name = "ACPI.NFIT", .ndctl = acpi_nfit_ctl, - .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA - | 1UL << NFIT_CMD_ARS_INJECT_SET - | 1UL << NFIT_CMD_ARS_INJECT_CLEAR - | 1UL << NFIT_CMD_ARS_INJECT_GET, + .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT + | 1UL << NVDIMM_BUS_FAMILY_INTEL, }, + .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA + | 1UL << NFIT_CMD_ARS_INJECT_SET + | 1UL << NFIT_CMD_ARS_INJECT_CLEAR + | 1UL << NFIT_CMD_ARS_INJECT_GET, + .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] = + NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK, .dev = &adev->dev, }; @@ -2766,21 +2967,21 @@ static int nfit_ctl_test(struct device *dev) /* basic checkout of a typical 'get config size' command */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 0, .config_size = SZ_128K, .max_xfer = SZ_4K, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); - if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0 - || cmds.cfg_size.config_size != SZ_128K - || cmds.cfg_size.max_xfer != SZ_4K) { + if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0 + || cmd.cfg_size.config_size != SZ_128K + || cmd.cfg_size.max_xfer != SZ_4K) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); return -EIO; @@ -2789,14 +2990,14 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with zero output */ cmd_size = offsetof(struct nd_cmd_ars_status, address); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = 0, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2806,16 +3007,16 @@ static int nfit_ctl_test(struct device *dev) /* test ars_cap with benign extended status */ - cmd_size = sizeof(cmds.ars_cap); - cmds.ars_cap = (struct nd_cmd_ars_cap) { + cmd_size = sizeof(cmd.ars_cap); + cmd.ars_cap = (struct nd_cmd_ars_cap) { .status = ND_ARS_PERSISTENT << 16, }; offset = offsetof(struct nd_cmd_ars_cap, status); - rc = setup_result(cmds.buf + offset, cmd_size - offset); + rc = setup_result(cmd.buf + offset, cmd_size - offset); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2825,19 +3026,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'status' trimmed from 'out_length' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size - 4, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2847,19 +3048,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'Output (Size)' including 'status' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2869,15 +3070,15 @@ static int nfit_ctl_test(struct device *dev) /* test extended status for get_config_size results in failure */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 1 << 16, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc >= 0) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2886,16 +3087,46 @@ static int nfit_ctl_test(struct device *dev) } /* test clear error */ - cmd_size = sizeof(cmds.clear_err); - cmds.clear_err = (struct nd_cmd_clear_error) { + cmd_size = sizeof(cmd.clear_err); + cmd.clear_err = (struct nd_cmd_clear_error) { .length = 512, .cleared = 512, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); + if (rc < 0 || cmd_rc) { + dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", + __func__, __LINE__, rc, cmd_rc); + return -EIO; + } + + /* test firmware activate bus info */ + cmd_size = sizeof(cmd.fwa_info); + cmd = (struct nfit_ctl_test_cmd) { + .pkg = { + .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, + .nd_family = NVDIMM_BUS_FAMILY_INTEL, + .nd_size_out = cmd_size, + .nd_fw_size = cmd_size, + }, + .fwa_info = { + .state = ND_INTEL_FWA_IDLE, + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE, + .activate_tmo = 1, + .cpu_quiesce_tmo = 1, + .io_quiesce_tmo = 1, + .max_quiesce_tmo = 1, + }, + }; + rc = setup_result(cmd.buf, cmd_size); + if (rc) + return rc; + rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL, + &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index e03bc15ce7312..9018f45d631de 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -32,6 +32,7 @@ TARGETS += lkdtm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug +TARGETS += mincore TARGETS += mount TARGETS += mqueue TARGETS += net diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48a..a83b5827532f7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -142,7 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool @@ -180,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -237,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -300,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -320,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -344,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -352,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -366,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -420,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -443,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f8..7375d9a6d2427 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb959..7043e6ded0e60 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92a..11a769e18f5d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 0000000000000..e7b9417537fbc --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab7..73da7fe8c1524 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index aa6de65b08384..84cfcabea8387 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -2,3 +2,4 @@ test_memcontrol test_core test_freezer +test_kmem
\ No newline at end of file diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 967f268fde74e..f027d933595b3 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -6,11 +6,13 @@ all: TEST_FILES := with_stress.sh TEST_PROGS := test_stress.sh TEST_GEN_PROGS = test_memcontrol +TEST_GEN_PROGS += test_kmem TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer include ../lib.mk $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h +$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 8a637ca7d73a4..05853b0b88318 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control, /* Handle the case of comparing against empty string */ if (!expected) - size = 32; + return -1; else size = strlen(expected) + 1; diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c new file mode 100644 index 0000000000000..0941aa16157e3 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/wait.h> +#include <errno.h> +#include <sys/sysinfo.h> +#include <pthread.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + + +/* + * Memory cgroup charging and vmstat data aggregation is performed using + * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum + * discrepancy between charge and vmstat entries is number of cpus multiplied + * by 32 pages multiplied by 2. + */ +#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs()) + + +static int alloc_dcache(const char *cgroup, void *arg) +{ + unsigned long i; + struct stat st; + char buf[128]; + + for (i = 0; i < (unsigned long)arg; i++) { + snprintf(buf, sizeof(buf), + "/something-non-existent-with-a-long-name-%64lu-%d", + i, getpid()); + stat(buf, &st); + } + + return 0; +} + +/* + * This test allocates 100000 of negative dentries with long names. + * Then it checks that "slab" in memory.stat is larger than 1M. + * Then it sets memory.high to 1M and checks that at least 1/2 + * of slab memory has been reclaimed. + */ +static int test_kmem_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *cg = NULL; + long slab0, slab1, current; + + cg = cg_name(root, "kmem_basic_test"); + if (!cg) + goto cleanup; + + if (cg_create(cg)) + goto cleanup; + + if (cg_run(cg, alloc_dcache, (void *)100000)) + goto cleanup; + + slab0 = cg_read_key_long(cg, "memory.stat", "slab "); + if (slab0 < (1 << 20)) + goto cleanup; + + cg_write(cg, "memory.high", "1M"); + slab1 = cg_read_key_long(cg, "memory.stat", "slab "); + if (slab1 <= 0) + goto cleanup; + + current = cg_read_long(cg, "memory.current"); + if (current <= 0) + goto cleanup; + + if (slab1 < slab0 / 2 && current < slab0 / 2) + ret = KSFT_PASS; +cleanup: + cg_destroy(cg); + free(cg); + + return ret; +} + +static void *alloc_kmem_fn(void *arg) +{ + alloc_dcache(NULL, (void *)100); + return NULL; +} + +static int alloc_kmem_smp(const char *cgroup, void *arg) +{ + int nr_threads = 2 * get_nprocs(); + pthread_t *tinfo; + unsigned long i; + int ret = -1; + + tinfo = calloc(nr_threads, sizeof(pthread_t)); + if (tinfo == NULL) + return -1; + + for (i = 0; i < nr_threads; i++) { + if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn, + (void *)i)) { + free(tinfo); + return -1; + } + } + + for (i = 0; i < nr_threads; i++) { + ret = pthread_join(tinfo[i], NULL); + if (ret) + break; + } + + free(tinfo); + return ret; +} + +static int cg_run_in_subcgroups(const char *parent, + int (*fn)(const char *cgroup, void *arg), + void *arg, int times) +{ + char *child; + int i; + + for (i = 0; i < times; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) { + cg_destroy(child); + free(child); + return -1; + } + + if (cg_run(child, fn, NULL)) { + cg_destroy(child); + free(child); + return -1; + } + + cg_destroy(child); + free(child); + } + + return 0; +} + +/* + * The test creates and destroys a large number of cgroups. In each cgroup it + * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS + * threads. Then it checks the sanity of numbers on the parent level: + * the total size of the cgroups should be roughly equal to + * anon + file + slab + kernel_stack. + */ +static int test_kmem_memcg_deletion(const char *root) +{ + long current, slab, anon, file, kernel_stack, sum; + int ret = KSFT_FAIL; + char *parent; + + parent = cg_name(root, "kmem_memcg_deletion_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100)) + goto cleanup; + + current = cg_read_long(parent, "memory.current"); + slab = cg_read_key_long(parent, "memory.stat", "slab "); + anon = cg_read_key_long(parent, "memory.stat", "anon "); + file = cg_read_key_long(parent, "memory.stat", "file "); + kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); + if (current < 0 || slab < 0 || anon < 0 || file < 0 || + kernel_stack < 0) + goto cleanup; + + sum = slab + anon + file + kernel_stack; + if (abs(sum - current) < MAX_VMSTAT_ERROR) { + ret = KSFT_PASS; + } else { + printf("memory.current = %ld\n", current); + printf("slab + anon + file + kernel_stack = %ld\n", sum); + printf("slab = %ld\n", slab); + printf("anon = %ld\n", anon); + printf("file = %ld\n", file); + printf("kernel_stack = %ld\n", kernel_stack); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +/* + * The test reads the entire /proc/kpagecgroup. If the operation went + * successfully (and the kernel didn't panic), the test is treated as passed. + */ +static int test_kmem_proc_kpagecgroup(const char *root) +{ + unsigned long buf[128]; + int ret = KSFT_FAIL; + ssize_t len; + int fd; + + fd = open("/proc/kpagecgroup", O_RDONLY); + if (fd < 0) + return ret; + + do { + len = read(fd, buf, sizeof(buf)); + } while (len > 0); + + if (len == 0) + ret = KSFT_PASS; + + close(fd); + return ret; +} + +static void *pthread_wait_fn(void *arg) +{ + sleep(100); + return NULL; +} + +static int spawn_1000_threads(const char *cgroup, void *arg) +{ + int nr_threads = 1000; + pthread_t *tinfo; + unsigned long i; + long stack; + int ret = -1; + + tinfo = calloc(nr_threads, sizeof(pthread_t)); + if (tinfo == NULL) + return -1; + + for (i = 0; i < nr_threads; i++) { + if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn, + (void *)i)) { + free(tinfo); + return(-1); + } + } + + stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack "); + if (stack >= 4096 * 1000) + ret = 0; + + free(tinfo); + return ret; +} + +/* + * The test spawns a process, which spawns 1000 threads. Then it checks + * that memory.stat's kernel_stack is at least 1000 pages large. + */ +static int test_kmem_kernel_stacks(const char *root) +{ + int ret = KSFT_FAIL; + char *cg = NULL; + + cg = cg_name(root, "kmem_kernel_stacks_test"); + if (!cg) + goto cleanup; + + if (cg_create(cg)) + goto cleanup; + + if (cg_run(cg, spawn_1000_threads, NULL)) + goto cleanup; + + ret = KSFT_PASS; +cleanup: + cg_destroy(cg); + free(cg); + + return ret; +} + +/* + * This test sequentionally creates 30 child cgroups, allocates some + * kernel memory in each of them, and deletes them. Then it checks + * that the number of dying cgroups on the parent level is 0. + */ +static int test_kmem_dead_cgroups(const char *root) +{ + int ret = KSFT_FAIL; + char *parent; + long dead; + int i; + + parent = cg_name(root, "kmem_dead_cgroups_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30)) + goto cleanup; + + for (i = 0; i < 5; i++) { + dead = cg_read_key_long(parent, "cgroup.stat", + "nr_dying_descendants "); + if (dead == 0) { + ret = KSFT_PASS; + break; + } + /* + * Reclaiming cgroups might take some time, + * let's wait a bit and repeat. + */ + sleep(1); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +/* + * This test creates a sub-tree with 1000 memory cgroups. + * Then it checks that the memory.current on the parent level + * is greater than 0 and approximates matches the percpu value + * from memory.stat. + */ +static int test_percpu_basic(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + long current, percpu; + int i; + + parent = cg_name(root, "percpu_basic_test"); + if (!parent) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + if (!child) + return -1; + + if (cg_create(child)) + goto cleanup_children; + + free(child); + } + + current = cg_read_long(parent, "memory.current"); + percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + + if (current > 0 && percpu > 0 && abs(current - percpu) < + MAX_VMSTAT_ERROR) + ret = KSFT_PASS; + else + printf("memory.current %ld\npercpu %ld\n", + current, percpu); + +cleanup_children: + for (i = 0; i < 1000; i++) { + child = cg_name_indexed(parent, "child", i); + cg_destroy(child); + free(child); + } + +cleanup: + cg_destroy(parent); + free(parent); + + return ret; +} + +#define T(x) { x, #x } +struct kmem_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_kmem_basic), + T(test_kmem_memcg_deletion), + T(test_kmem_proc_kpagecgroup), + T(test_kmem_kernel_stacks), + T(test_kmem_dead_cgroups), + T(test_percpu_basic), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 94b02a18f230b..344a99c6da1b7 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -10,3 +10,4 @@ execveat.denatured /recursion-depth xxxxxxxx* pipe +S_I*.test diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 4453b8f8def37..0a13b110c1e66 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script +TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test @@ -11,7 +11,8 @@ TEST_FILES := Makefile TEST_GEN_PROGS += recursion-depth -EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* +EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \ + $(OUTPUT)/S_I*.test include ../lib.mk diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c new file mode 100644 index 0000000000000..cd3a34aca93e5 --- /dev/null +++ b/tools/testing/selftests/exec/non-regular.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +#include "../kselftest_harness.h" + +/* Remove a file, ignoring the result if it didn't exist. */ +void rm(struct __test_metadata *_metadata, const char *pathname, + int is_dir) +{ + int rc; + + if (is_dir) + rc = rmdir(pathname); + else + rc = unlink(pathname); + + if (rc < 0) { + ASSERT_EQ(errno, ENOENT) { + TH_LOG("Not ENOENT: %s", pathname); + } + } else { + ASSERT_EQ(rc, 0) { + TH_LOG("Failed to remove: %s", pathname); + } + } +} + +FIXTURE(file) { + char *pathname; + int is_dir; +}; + +FIXTURE_VARIANT(file) +{ + const char *name; + int expected; + int is_dir; + void (*setup)(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant); + int major, minor, mode; /* for mknod() */ +}; + +void setup_link(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + const char * const paths[] = { + "/bin/true", + "/usr/bin/true", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(paths); i++) { + if (access(paths[i], X_OK) == 0) { + ASSERT_EQ(symlink(paths[i], self->pathname), 0); + return; + } + } + ASSERT_EQ(1, 0) { + TH_LOG("Could not find viable 'true' binary"); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFLNK) +{ + .name = "S_IFLNK", + .expected = ELOOP, + .setup = setup_link, +}; + +void setup_dir(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkdir(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFDIR) +{ + .name = "S_IFDIR", + .is_dir = 1, + .expected = EACCES, + .setup = setup_dir, +}; + +void setup_node(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + dev_t dev; + int rc; + + dev = makedev(variant->major, variant->minor); + rc = mknod(self->pathname, 0755 | variant->mode, dev); + ASSERT_EQ(rc, 0) { + if (errno == EPERM) + SKIP(return, "Please run as root; cannot mknod(%s)", + variant->name); + } +} + +FIXTURE_VARIANT_ADD(file, S_IFBLK) +{ + .name = "S_IFBLK", + .expected = EACCES, + .setup = setup_node, + /* /dev/loop0 */ + .major = 7, + .minor = 0, + .mode = S_IFBLK, +}; + +FIXTURE_VARIANT_ADD(file, S_IFCHR) +{ + .name = "S_IFCHR", + .expected = EACCES, + .setup = setup_node, + /* /dev/zero */ + .major = 1, + .minor = 5, + .mode = S_IFCHR, +}; + +void setup_fifo(struct __test_metadata *_metadata, + FIXTURE_DATA(file) *self, + const FIXTURE_VARIANT(file) *variant) +{ + ASSERT_EQ(mkfifo(self->pathname, 0755), 0); +} + +FIXTURE_VARIANT_ADD(file, S_IFIFO) +{ + .name = "S_IFIFO", + .expected = EACCES, + .setup = setup_fifo, +}; + +FIXTURE_SETUP(file) +{ + ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6); + self->is_dir = variant->is_dir; + + rm(_metadata, self->pathname, variant->is_dir); + variant->setup(_metadata, self, variant); +} + +FIXTURE_TEARDOWN(file) +{ + rm(_metadata, self->pathname, self->is_dir); +} + +TEST_F(file, exec_errno) +{ + char * const argv[2] = { (char * const)self->pathname, NULL }; + + EXPECT_LT(execv(argv[0], argv), 0); + EXPECT_EQ(errno, variant->expected); +} + +/* S_IFSOCK */ +FIXTURE(sock) +{ + int fd; +}; + +FIXTURE_SETUP(sock) +{ + self->fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_TEARDOWN(sock) +{ + if (self->fd >= 0) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(sock, exec_errno) +{ + char * const argv[2] = { " magic socket ", NULL }; + char * const envp[1] = { NULL }; + + EXPECT_LT(fexecve(self->fd, argv, envp), 0); + EXPECT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index ea2147248ebe6..afd42387e8b21 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -343,7 +343,7 @@ kmod_test_0001_driver() kmod_defaults_driver config_num_threads 1 - printf '\000' >"$DIR"/config_test_driver + printf $NAME >"$DIR"/config_test_driver config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND } @@ -354,7 +354,7 @@ kmod_test_0001_fs() kmod_defaults_fs config_num_threads 1 - printf '\000' >"$DIR"/config_test_fs + printf $NAME >"$DIR"/config_test_fs config_trigger ${FUNCNAME[0]} config_expect_result ${FUNCNAME[0]} -EINVAL } diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore new file mode 100644 index 0000000000000..15c4dfc2df004 --- /dev/null +++ b/tools/testing/selftests/mincore/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +mincore_selftest diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile new file mode 100644 index 0000000000000..38c7db1e89265 --- /dev/null +++ b/tools/testing/selftests/mincore/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0+ + +CFLAGS += -Wall + +TEST_GEN_PROGS := mincore_selftest +include ../lib.mk diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c new file mode 100644 index 0000000000000..5a1e85ff5d32a --- /dev/null +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * kselftest suite for mincore(). + * + * Copyright (C) 2020 Collabora, Ltd. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <string.h> +#include <fcntl.h> +#include <string.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +/* Default test file size: 4MB */ +#define MB (1UL << 20) +#define FILE_SIZE (4 * MB) + + +/* + * Tests the user interface. This test triggers most of the documented + * error conditions in mincore(). + */ +TEST(basic_interface) +{ + int retval; + int page_size; + unsigned char vec[1]; + char *addr; + + page_size = sysconf(_SC_PAGESIZE); + + /* Query a 0 byte sized range */ + retval = mincore(0, 0, vec); + EXPECT_EQ(0, retval); + + /* Addresses in the specified range are invalid or unmapped */ + errno = 0; + retval = mincore(NULL, page_size, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(ENOMEM, errno); + + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + + /* <addr> argument is not page-aligned */ + errno = 0; + retval = mincore(addr + 1, page_size, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(EINVAL, errno); + + /* <length> argument is too large */ + errno = 0; + retval = mincore(addr, -1, vec); + EXPECT_EQ(-1, retval); + EXPECT_EQ(ENOMEM, errno); + + /* <vec> argument points to an illegal address */ + errno = 0; + retval = mincore(addr, page_size, NULL); + EXPECT_EQ(-1, retval); + EXPECT_EQ(EFAULT, errno); + munmap(addr, page_size); +} + + +/* + * Test mincore() behavior on a private anonymous page mapping. + * Check that the page is not loaded into memory right after the mapping + * but after accessing it (on-demand allocation). + * Then free the page and check that it's not memory-resident. + */ +TEST(check_anonymous_locked_pages) +{ + unsigned char vec[1]; + char *addr; + int retval; + int page_size; + + page_size = sysconf(_SC_PAGESIZE); + + /* Map one page and check it's not memory-resident */ + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page found in memory before use"); + } + + /* Touch the page and check again. It should now be in memory */ + addr[0] = 1; + mlock(addr, page_size); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[0]) { + TH_LOG("Page not found in memory after use"); + } + + /* + * It shouldn't be memory-resident after unlocking it and + * marking it as unneeded. + */ + munlock(addr, page_size); + madvise(addr, page_size, MADV_DONTNEED); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page in memory after being zapped"); + } + munmap(addr, page_size); +} + + +/* + * Check mincore() behavior on huge pages. + * This test will be skipped if the mapping fails (ie. if there are no + * huge pages available). + * + * Make sure the system has at least one free huge page, check + * "HugePages_Free" in /proc/meminfo. + * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if + * needed. + */ +TEST(check_huge_pages) +{ + unsigned char vec[1]; + char *addr; + int retval; + int page_size; + + page_size = sysconf(_SC_PAGESIZE); + + errno = 0; + addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + if (addr == MAP_FAILED) { + if (errno == ENOMEM) + SKIP(return, "No huge pages available."); + else + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(0, vec[0]) { + TH_LOG("Page found in memory before use"); + } + + addr[0] = 1; + mlock(addr, page_size); + retval = mincore(addr, page_size, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[0]) { + TH_LOG("Page not found in memory after use"); + } + + munlock(addr, page_size); + munmap(addr, page_size); +} + + +/* + * Test mincore() behavior on a file-backed page. + * No pages should be loaded into memory right after the mapping. Then, + * accessing any address in the mapping range should load the page + * containing the address and a number of subsequent pages (readahead). + * + * The actual readahead settings depend on the test environment, so we + * can't make a lot of assumptions about that. This test covers the most + * general cases. + */ +TEST(check_file_mmap) +{ + unsigned char *vec; + int vec_size; + char *addr; + int retval; + int page_size; + int fd; + int i; + int ra_pages = 0; + + page_size = sysconf(_SC_PAGESIZE); + vec_size = FILE_SIZE / page_size; + if (FILE_SIZE % page_size) + vec_size++; + + vec = calloc(vec_size, sizeof(unsigned char)); + ASSERT_NE(NULL, vec) { + TH_LOG("Can't allocate array"); + } + + errno = 0; + fd = open(".", O_TMPFILE | O_RDWR, 0600); + ASSERT_NE(-1, fd) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + errno = 0; + retval = fallocate(fd, 0, 0, FILE_SIZE); + ASSERT_EQ(0, retval) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + + /* + * Map the whole file, the pages shouldn't be fetched yet. + */ + errno = 0; + addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + for (i = 0; i < vec_size; i++) { + ASSERT_EQ(0, vec[i]) { + TH_LOG("Unexpected page in memory"); + } + } + + /* + * Touch a page in the middle of the mapping. We expect the next + * few pages (the readahead window) to be populated too. + */ + addr[FILE_SIZE / 2] = 1; + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { + TH_LOG("Page not found in memory after use"); + } + + i = FILE_SIZE / 2 / page_size + 1; + while (i < vec_size && vec[i]) { + ra_pages++; + i++; + } + EXPECT_GT(ra_pages, 0) { + TH_LOG("No read-ahead pages found in memory"); + } + + EXPECT_LT(i, vec_size) { + TH_LOG("Read-ahead pages reached the end of the file"); + } + /* + * End of the readahead window. The rest of the pages shouldn't + * be in memory. + */ + if (i < vec_size) { + while (i < vec_size && !vec[i]) + i++; + EXPECT_EQ(vec_size, i) { + TH_LOG("Unexpected page in memory beyond readahead window"); + } + } + + munmap(addr, FILE_SIZE); + close(fd); + free(vec); +} + + +/* + * Test mincore() behavior on a page backed by a tmpfs file. This test + * performs the same steps as the previous one. However, we don't expect + * any readahead in this case. + */ +TEST(check_tmpfs_mmap) +{ + unsigned char *vec; + int vec_size; + char *addr; + int retval; + int page_size; + int fd; + int i; + int ra_pages = 0; + + page_size = sysconf(_SC_PAGESIZE); + vec_size = FILE_SIZE / page_size; + if (FILE_SIZE % page_size) + vec_size++; + + vec = calloc(vec_size, sizeof(unsigned char)); + ASSERT_NE(NULL, vec) { + TH_LOG("Can't allocate array"); + } + + errno = 0; + fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600); + ASSERT_NE(-1, fd) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + errno = 0; + retval = fallocate(fd, 0, 0, FILE_SIZE); + ASSERT_EQ(0, retval) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + + /* + * Map the whole file, the pages shouldn't be fetched yet. + */ + errno = 0; + addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, addr) { + TH_LOG("mmap error: %s", strerror(errno)); + } + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + for (i = 0; i < vec_size; i++) { + ASSERT_EQ(0, vec[i]) { + TH_LOG("Unexpected page in memory"); + } + } + + /* + * Touch a page in the middle of the mapping. We expect only + * that page to be fetched into memory. + */ + addr[FILE_SIZE / 2] = 1; + retval = mincore(addr, FILE_SIZE, vec); + ASSERT_EQ(0, retval); + ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) { + TH_LOG("Page not found in memory after use"); + } + + i = FILE_SIZE / 2 / page_size + 1; + while (i < vec_size && vec[i]) { + ra_pages++; + i++; + } + ASSERT_EQ(ra_pages, 0) { + TH_LOG("Read-ahead pages found in memory"); + } + + munmap(addr, FILE_SIZE); + close(fd); + free(vec); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1cf..8df5cb8f71ff9 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd01..090620c3e10c5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 0453c50c949cb..55ef15184057d 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -9,7 +9,17 @@ * This selftest exercises the powerpc alignment fault handler. * * We create two sets of source and destination buffers, one in regular memory, - * the other cache-inhibited (we use /dev/fb0 for this). + * the other cache-inhibited (by default we use /dev/fb0 for this, but an + * alterative path for cache-inhibited memory may be provided). + * + * One way to get cache-inhibited memory is to use the "mem" kernel parameter + * to limit the kernel to less memory than actually exists. Addresses above + * the limit may still be accessed but will be treated as cache-inhibited. For + * example, if there is actually 4GB of memory and the parameter "mem=3GB" is + * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. + * To access this region /dev/mem is used. The kernel should be configured + * without CONFIG_STRICT_DEVMEM. In this case use: + * ./alignment_handler /dev/mem 0xc0000000 * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the @@ -48,11 +58,14 @@ #include <asm/cputable.h> #include "utils.h" +#include "instructions.h" int bufsize; int debug; int testing; volatile int gotsig; +char *cipath = "/dev/fb0"; +long cioffset; void sighandler(int sig, siginfo_t *info, void *ctx) { @@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } \ rc |= do_test(#name, test_##name) +#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + ld_op(ld_reg, %0, 0, 0) \ + st_op(st_reg, %1, 0, 0) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) @@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) #define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) +#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0) +#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0) + +#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32) +#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0) /* FIXME: Unimplemented tests: */ // STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ @@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) printf("\tDoing %s:\t", test_name); - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) { printf("\n"); - perror("Can't open /dev/fb0 now?"); + perror("Can't open ci file now?"); return 1; } - ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, 0x0); - ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, bufsize); + ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset); + ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset + bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { printf("\n"); perror("mmap failed"); @@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) return rc; } -static bool can_open_fb0(void) +static bool can_open_cifile(void) { int fd; - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) return false; @@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("VSX: 2.06B\n"); @@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); printf("VSX: 2.07B\n"); @@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); printf("VSX: 3.00B\n"); @@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void) return rc; } +int test_alignment_handler_vsx_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("VSX: PREFIX\n"); + LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1); + STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1); + return rc; +} + int test_alignment_handler_integer(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Integer\n"); LOAD_DFORM_TEST(lbz); @@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Integer: 2.06\n"); @@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void) return rc; } +int test_alignment_handler_integer_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Integer: PREFIX\n"); + LOAD_MLS_PREFIX_TEST(PLBZ); + LOAD_MLS_PREFIX_TEST(PLHZ); + LOAD_MLS_PREFIX_TEST(PLHA); + LOAD_MLS_PREFIX_TEST(PLWZ); + LOAD_8LS_PREFIX_TEST(PLWA); + LOAD_8LS_PREFIX_TEST(PLD); + STORE_MLS_PREFIX_TEST(PSTB); + STORE_MLS_PREFIX_TEST(PSTH); + STORE_MLS_PREFIX_TEST(PSTW); + STORE_8LS_PREFIX_TEST(PSTD); + return rc; +} + int test_alignment_handler_vmx(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)); printf("VMX\n"); @@ -451,7 +527,7 @@ int test_alignment_handler_fp(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Floating point\n"); LOAD_FLOAT_DFORM_TEST(lfd); @@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05)); printf("Floating point: 2.05\n"); @@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Floating point: 2.06\n"); @@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void) return rc; } + +int test_alignment_handler_fp_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Floating point: PREFIX\n"); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFS); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFD); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFS); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFD); + return rc; +} + void usage(char *prog) { - printf("Usage: %s [options]\n", prog); + printf("Usage: %s [options] [path [offset]]\n", prog); printf(" -d Enable debug error output\n"); printf("\n"); - printf("This test requires a POWER8 or POWER9 CPU and a usable "); - printf("framebuffer at /dev/fb0.\n"); + printf("This test requires a POWER8, POWER9 or POWER10 CPU "); + printf("and either a usable framebuffer at /dev/fb0 or "); + printf("the path to usable cache inhibited memory and optional "); + printf("offset to be provided\n"); } int main(int argc, char *argv[]) @@ -533,6 +628,13 @@ int main(int argc, char *argv[]) exit(1); } } + argc -= optind; + argv += optind; + + if (argc > 0) + cipath = argv[0]; + if (argc > 1) + cioffset = strtol(argv[1], 0, 0x10); bufsize = getpagesize(); @@ -552,10 +654,14 @@ int main(int argc, char *argv[]) "test_alignment_handler_vsx_207"); rc |= test_harness(test_alignment_handler_vsx_300, "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_vsx_prefix, + "test_alignment_handler_vsx_prefix"); rc |= test_harness(test_alignment_handler_integer, "test_alignment_handler_integer"); rc |= test_harness(test_alignment_handler_integer_206, "test_alignment_handler_integer_206"); + rc |= test_harness(test_alignment_handler_integer_prefix, + "test_alignment_handler_integer_prefix"); rc |= test_harness(test_alignment_handler_vmx, "test_alignment_handler_vmx"); rc |= test_harness(test_alignment_handler_fp, @@ -564,5 +670,7 @@ int main(int argc, char *argv[]) "test_alignment_handler_fp_205"); rc |= test_harness(test_alignment_handler_fp_206, "test_alignment_handler_fp_206"); + rc |= test_harness(test_alignment_handler_fp_prefix, + "test_alignment_handler_fp_prefix"); return rc; } diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index a2e8c9da7fa53..d50cc05df4952 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -19,6 +19,7 @@ #include <limits.h> #include <sys/time.h> #include <sys/syscall.h> +#include <sys/sysinfo.h> #include <sys/types.h> #include <sys/shm.h> #include <linux/futex.h> @@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) { - int pid; - cpu_set_t cpuset; + int pid, ncpus; + cpu_set_t *cpuset; + size_t size; pid = fork(); if (pid == -1) { @@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) if (pid) return; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); + ncpus = get_nprocs(); + size = CPU_ALLOC_SIZE(ncpus); + cpuset = CPU_ALLOC(ncpus); + if (!cpuset) { + perror("malloc"); + exit(1); + } + CPU_ZERO_S(size, cpuset); + CPU_SET_S(cpu, size, cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + if (sched_setaffinity(0, size, cpuset)) { perror("sched_setaffinity"); + CPU_FREE(cpuset); exit(1); } + CPU_FREE(cpuset); fn(arg); exit(0); diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index f52ed92b53e74..00dc32c0ed75c 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -5,12 +5,17 @@ pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" - if ! [ -e "$path" ] ; then + # if a driver doesn't support the error handling callbacks then the + # device is recovered by removing and re-probing it. This causes the + # sysfs directory to disappear so read the PE state once and squash + # any potential error messages + local eeh_state="$(cat $path 2>/dev/null)" + if [ -z "$eeh_state" ]; then return 1; fi - local fw_state="$(cut -d' ' -f1 < $path)" - local sw_state="$(cut -d' ' -f2 < $path)" + local fw_state="$(echo $eeh_state | cut -d' ' -f1)" + local sw_state="$(echo $eeh_state | cut -d' ' -f2)" # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an # error state or being recovered. Either way, not ok. diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h index f36061eb6f0f1..4efa6314bd963 100644 --- a/tools/testing/selftests/powerpc/include/instructions.h +++ b/tools/testing/selftests/powerpc/include/instructions.h @@ -66,4 +66,81 @@ static inline int paste_last(void *i) #define PPC_INST_PASTE __PASTE(0, 0, 0, 0) #define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) +/* This defines the prefixed load/store instructions */ +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif + +#define __PPC_RA(a) (((a) & 0x1f) << 16) +#define __PPC_RS(s) (((s) & 0x1f) << 21) +#define __PPC_RT(t) __PPC_RS(t) +#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20) + +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +#define PPC_INST_LBZ 0x88000000 +#define PPC_INST_LHZ 0xa0000000 +#define PPC_INST_LHA 0xa8000000 +#define PPC_INST_LWZ 0x80000000 +#define PPC_INST_STB 0x98000000 +#define PPC_INST_STH 0xb0000000 +#define PPC_INST_STW 0x90000000 +#define PPC_INST_STD 0xf8000000 +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_STFD 0xd8000000 + +#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_MLS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_8LS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +/* Prefixed Integer Load/Store instructions */ +#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d) +#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d) +#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d) +#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d) +#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d) +#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d) +#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d) +#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d) +#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d) +#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d) +#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d) +#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d) + +/* Prefixed Floating-Point Load/Store Instructions */ +#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d) +#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d) +#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d) +#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d) + +/* Prefixed VSX Load/Store Instructions */ +#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d) +#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d) +#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d) +#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d) +#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d) +#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d) +#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d) +#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d) + #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */ diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h new file mode 100644 index 0000000000000..3312cb1b058d7 --- /dev/null +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020, Sandipan Das, IBM Corp. + */ + +#ifndef _SELFTESTS_POWERPC_PKEYS_H +#define _SELFTESTS_POWERPC_PKEYS_H + +#include <sys/mman.h> + +#include "reg.h" +#include "utils.h" + +/* + * Older versions of libc use the Intel-specific access rights. + * Hence, override the definitions as they might be incorrect. + */ +#undef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x3 + +#undef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 + +#undef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 + +/* Older versions of libc do not not define this */ +#ifndef SEGV_PKUERR +#define SEGV_PKUERR 4 +#endif + +#define SI_PKEY_OFFSET 0x20 + +#define __NR_pkey_mprotect 386 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 + +#define PKEY_BITS_PER_PKEY 2 +#define NR_PKEYS 32 +#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) + +inline unsigned long pkeyreg_get(void) +{ + return mfspr(SPRN_AMR); +} + +inline void pkeyreg_set(unsigned long amr) +{ + set_amr(amr); +} + +void pkey_set_rights(int pkey, unsigned long rights) +{ + unsigned long amr, shift; + + shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; + amr = pkeyreg_get(); + amr &= ~(PKEY_BITS_MASK << shift); + amr |= (rights & PKEY_BITS_MASK) << shift; + pkeyreg_set(amr); +} + +int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) +{ + return syscall(__NR_pkey_mprotect, addr, len, prot, pkey); +} + +int sys_pkey_alloc(unsigned long flags, unsigned long rights) +{ + return syscall(__NR_pkey_alloc, flags, rights); +} + +int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +int pkeys_unsupported(void) +{ + bool hash_mmu = false; + int pkey; + + /* Protection keys are currently supported on Hash MMU only */ + FAIL_IF(using_hash_mmu(&hash_mmu)); + SKIP_IF(!hash_mmu); + + /* Check if the system call is supported */ + pkey = sys_pkey_alloc(0, 0); + SKIP_IF(pkey < 0); + sys_pkey_free(pkey); + + return 0; +} + +int siginfo_pkey(siginfo_t *si) +{ + /* + * In older versions of libc, siginfo_t does not have si_pkey as + * a member. + */ +#ifdef si_pkey + return si->si_pkey; +#else + return *((int *)(((char *) si) + SI_PKEY_OFFSET)); +#endif +} + +#define pkey_rights(r) ({ \ + static char buf[4] = "rwx"; \ + unsigned int amr_bits; \ + if ((r) & PKEY_DISABLE_EXECUTE) \ + buf[2] = '-'; \ + amr_bits = (r) & PKEY_BITS_MASK; \ + if (amr_bits & PKEY_DISABLE_WRITE) \ + buf[1] = '-'; \ + if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \ + buf[0] = '-'; \ + buf; \ +}) + +unsigned long next_pkey_rights(unsigned long rights) +{ + if (rights == PKEY_DISABLE_ACCESS) + return PKEY_DISABLE_EXECUTE; + else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE)) + return 0; + + if ((rights & PKEY_BITS_MASK) == 0) + rights |= PKEY_DISABLE_WRITE; + else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE) + rights |= PKEY_DISABLE_ACCESS; + + return rights; +} + +#endif /* _SELFTESTS_POWERPC_PKEYS_H */ diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 022c5076b2c56..c0f2742a3a59c 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -57,6 +57,12 @@ #define SPRN_PPR 896 /* Program Priority Register */ #define SPRN_AMR 13 /* Authority Mask Register - problem state */ +#define set_amr(v) asm volatile("isync;" \ + "mtspr " __stringify(SPRN_AMR) ",%0;" \ + "isync" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + /* TEXASR register bits */ #define TEXASR_FC 0xFE00000000000000 #define TEXASR_FP 0x0100000000000000 diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index e089a0c30d9af..71d2924f5b8b3 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,16 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) +#include <unistd.h> +#include <sys/syscall.h> + +static inline pid_t gettid(void) +{ + return syscall(SYS_gettid); +} +#endif + static inline bool have_hwcap(unsigned long ftr) { return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; @@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2) #endif bool is_ppc64le(void); +int using_hash_mmu(bool *using_hash); /* Yes, this is evil */ #define FAIL_IF(x) \ @@ -71,6 +82,15 @@ do { \ } \ } while (0) +#define FAIL_IF_EXIT(x) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + _exit(1); \ + } \ +} while (0) + /* The test harness uses this, yes it's gross */ #define MAGIC_SKIP_RETURN_VALUE 99 @@ -96,11 +116,20 @@ do { \ #define _str(s) #s #define str(s) _str(s) +#define sigsafe_err(msg) ({ \ + ssize_t nbytes __attribute__((unused)); \ + nbytes = write(STDERR_FILENO, msg, strlen(msg)); }) + /* POWER9 feature */ #ifndef PPC_FEATURE2_ARCH_3_00 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif +/* POWER10 feature */ +#ifndef PPC_FEATURE2_ARCH_3_1 +#define PPC_FEATURE2_ARCH_3_1 0x00040000 +#endif + #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] #define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index e31ca6f453ed4..d0c23b2e4b609 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -6,3 +6,4 @@ vmx_preempt fpu_signal vmx_signal vsx_preempt +fpu_denormal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 11a10d7a2bbd9..fcc91c2059841 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt +TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt top_srcdir = ../../../../.. include ../../lib.mk @@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S $(OUTPUT)/fpu_preempt: fpu_asm.S $(OUTPUT)/fpu_signal: fpu_asm.S -$(OUTPUT)/vmx_syscall: vmx_asm.S -$(OUTPUT)/vmx_preempt: vmx_asm.S -$(OUTPUT)/vmx_signal: vmx_asm.S +$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx -$(OUTPUT)/vsx_preempt: vsx_asm.S +$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c new file mode 100644 index 0000000000000..5f96682abaa8f --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright IBM Corp. 2020 + * + * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately + * if the denormal handler is not configured or working properly, this can cause a bad + * crash in kernel mode when the kernel tries to save FP registers when the process + * exits. + */ + +#include <stdio.h> +#include <string.h> + +#include "utils.h" + +static int test_denormal_fpu(void) +{ + unsigned int m32; + unsigned long m64; + volatile float f; + volatile double d; + + /* try to induce lfs <denormal> ; stfd */ + + m32 = 0x00715fcf; /* random denormal */ + memcpy((float *)&f, &m32, sizeof(f)); + d = f; + memcpy(&m64, (double *)&d, sizeof(d)); + + FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_denormal_fpu, "fpu_denormal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c index 2e059f154e775..6761d6ce30eca 100644 --- a/tools/testing/selftests/powerpc/math/vmx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -57,6 +57,9 @@ int test_preempt_vmx(void) int i, rc, threads; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c index 785a48e0976fa..b340a5c4e79dd 100644 --- a/tools/testing/selftests/powerpc/math/vmx_signal.c +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -96,6 +96,9 @@ int test_signal_vmx(void) void *rc_p; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c index 9ee293cc868e8..03c78dfe34440 100644 --- a/tools/testing/selftests/powerpc/math/vmx_syscall.c +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -49,9 +49,14 @@ int test_vmx_syscall(void) * Setup an environment with much context switching */ pid_t pid2; - pid_t pid = fork(); + pid_t pid; int ret; int child_ret; + + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + + pid = fork(); FAIL_IF(pid == -1); pid2 = fork(); diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c index 63de9c6e2cd3d..d1601bb889d4c 100644 --- a/tools/testing/selftests/powerpc/math/vsx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c @@ -92,6 +92,8 @@ int test_preempt_vsx(void) int i, rc, threads; pthread_t *tids; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 2ca523255b1b2..91c775c23c660 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -2,9 +2,12 @@ hugetlb_vs_thp_test subpage_prot tempfile -prot_sao segv_errors wild_bctr large_vm_fork_separation bad_accesses tlbie_test +pkey_exec_prot +pkey_siginfo +stack_expansion_ldst +stack_expansion_signal diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index b9103c4bb4145..250ce172e0dac 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,23 +2,31 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ - large_vm_fork_separation bad_accesses +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \ + large_vm_fork_separation bad_accesses pkey_exec_prot \ + pkey_siginfo stack_expansion_signal stack_expansion_ldst + TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk -$(TEST_GEN_PROGS): ../harness.c - -$(OUTPUT)/prot_sao: ../utils.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 +$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64 +$(OUTPUT)/pkey_siginfo: CFLAGS += -m64 + +$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c + +$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector +$(OUTPUT)/stack_expansion_ldst: ../utils.c $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread +$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index adc465f499efa..a864ed7e20081 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -64,34 +64,6 @@ int bad_access(char *p, bool write) return 0; } -static int using_hash_mmu(bool *using_hash) -{ - char line[128]; - FILE *f; - int rc; - - f = fopen("/proc/cpuinfo", "r"); - FAIL_IF(!f); - - rc = 0; - while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { - *using_hash = true; - goto out; - } - - if (strcmp(line, "MMU : Radix\n") == 0) { - *using_hash = false; - goto out; - } - } - - rc = -1; -out: - fclose(f); - return rc; -} - static int test(void) { unsigned long i, j, addr, region_shift, page_shift, page_size; diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c new file mode 100644 index 0000000000000..9e5c7f3f498a7 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if applying execute protection on pages using memory + * protection keys works as expected. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> + +#include <unistd.h> + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_TRAP 0x7fe00008 +#define PPC_INST_BLR 0x4e800020 + +static volatile sig_atomic_t fault_pkey, fault_code, fault_type; +static volatile sig_atomic_t remaining_faults; +static volatile unsigned int *fault_addr; +static unsigned long pgsize, numinsns; +static unsigned int *insns; + +static void trap_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) + sigsafe_err("got a fault for an unexpected address\n"); + + _exit(1); +} + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + int signal_pkey; + + signal_pkey = siginfo_pkey(sinfo); + fault_code = sinfo->si_code; + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if too many faults have occurred for a single test case */ + if (!remaining_faults) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + + /* Restore permissions in order to continue */ + switch (fault_code) { + case SEGV_ACCERR: + if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) { + sigsafe_err("failed to set access permissions\n"); + _exit(1); + } + break; + case SEGV_PKUERR: + if (signal_pkey != fault_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + switch (fault_type) { + case PKEY_DISABLE_ACCESS: + pkey_set_rights(fault_pkey, 0); + break; + case PKEY_DISABLE_EXECUTE: + /* + * Reassociate the exec-only pkey with the region + * to be able to continue. Unlike AMR, we cannot + * set IAMR directly from userspace to restore the + * permissions. + */ + if (mprotect(insns, pgsize, PROT_EXEC)) { + sigsafe_err("failed to set execute permissions\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected type\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected code\n"); + _exit(1); + } + + remaining_faults--; +} + +static int test(void) +{ + struct sigaction segv_act, trap_act; + unsigned long rights; + int pkey, ret, i; + + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Setup SIGSEGV handler */ + segv_act.sa_handler = 0; + segv_act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0); + segv_act.sa_flags = SA_SIGINFO; + segv_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0); + + /* Setup SIGTRAP handler */ + trap_act.sa_handler = 0; + trap_act.sa_sigaction = trap_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0); + trap_act.sa_flags = SA_SIGINFO; + trap_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0); + + /* Setup executable region */ + pgsize = getpagesize(); + numinsns = pgsize / sizeof(unsigned int); + insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(insns == MAP_FAILED); + + /* Write the instruction words */ + for (i = 1; i < numinsns - 1; i++) + insns[i] = PPC_INST_NOP; + + /* + * Set the first instruction as an unconditional trap. If + * the last write to this address succeeds, this should + * get overwritten by a no-op. + */ + insns[0] = PPC_INST_TRAP; + + /* + * Later, to jump to the executable region, we use a branch + * and link instruction (bctrl) which sets the return address + * automatically in LR. Use that to return back. + */ + insns[numinsns - 1] = PPC_INST_BLR; + + /* Allocate a pkey that restricts execution */ + rights = PKEY_DISABLE_EXECUTE; + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Pick the first instruction's address from the executable + * region. + */ + fault_addr = insns; + + /* The following two cases will avoid SEGV_PKUERR */ + fault_type = -1; + fault_pkey = -1; + + /* + * Read an instruction word from the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should not generate a fault as having PROT_EXEC + * implies PROT_READ on GNU systems. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 0; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0); + + /* + * Write an instruction word to the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should generate an access fault as having just + * PROT_EXEC also restricts writes. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_TRAP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* The following three cases will generate SEGV_PKUERR */ + rights |= PKEY_DISABLE_ACCESS; + fault_type = PKEY_DISABLE_ACCESS; + fault_pkey = pkey; + + /* + * Read an instruction word from the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate a pkey fault based on AMR bits only + * as having PROT_EXEC implicitly allows reads. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR); + + /* + * Write an instruction word to the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate two faults. First, a pkey fault + * based on AMR bits and then an access fault since + * PROT_EXEC does not allow writes. + */ + remaining_faults = 2; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_NOP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + rights = 0; + do { + /* + * Allocate pkeys with all valid combinations of read, + * write and execute restrictions. + */ + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Jump to the executable region. AMR bits may or may not + * be set but they should not affect execution. + * + * This should generate pkey faults based on IAMR bits which + * may be set to restrict execution. + * + * The first iteration also checks if the overwrite of the + * first instruction word from a trap to a no-op succeeded. + */ + fault_pkey = pkey; + fault_type = -1; + remaining_faults = 0; + if (rights & PKEY_DISABLE_EXECUTE) { + fault_type = PKEY_DISABLE_EXECUTE; + remaining_faults = 1; + } + + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("execute at %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + asm volatile("mtctr %0; bctrl" : : "r"(insns)); + FAIL_IF(remaining_faults != 0); + if (rights & PKEY_DISABLE_EXECUTE) + FAIL_IF(fault_code != SEGV_PKUERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + /* Find next valid combination of pkey rights */ + rights = next_pkey_rights(rights); + } while (rights); + + /* Cleanup */ + munmap((void *) insns, pgsize); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_exec_prot"); +} diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c new file mode 100644 index 0000000000000..4f815d7c12145 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if the signal information reports the correct memory protection + * key upon getting a key access violation fault for a page that was + * attempted to be protected by two different keys from two competing + * threads at the same time. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> + +#include <unistd.h> +#include <pthread.h> +#include <sys/mman.h> + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_BLR 0x4e800020 +#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) + +#define NUM_ITERATIONS 1000000 + +static volatile sig_atomic_t perm_pkey, rest_pkey; +static volatile sig_atomic_t rights, fault_count; +static volatile unsigned int *volatile fault_addr; +static pthread_barrier_t iteration_barrier; + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + void *pgstart; + size_t pgsize; + int pkey; + + pkey = siginfo_pkey(sinfo); + + /* Check if this fault originated from a pkey access violation */ + if (sinfo->si_code != SEGV_PKUERR) { + sigsafe_err("got a fault for an unexpected reason\n"); + _exit(1); + } + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if this fault originated from the restrictive pkey */ + if (pkey != rest_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + /* Check if too many faults have occurred for the same iteration */ + if (fault_count > 0) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + pgsize = getpagesize(); + pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1)); + + /* + * If the current fault occurred due to lack of execute rights, + * reassociate the page with the exec-only pkey since execute + * rights cannot be changed directly for the faulting pkey as + * IAMR is inaccessible from userspace. + * + * Otherwise, if the current fault occurred due to lack of + * read-write rights, change the AMR permission bits for the + * pkey. + * + * This will let the test continue. + */ + if (rights == PKEY_DISABLE_EXECUTE && + mprotect(pgstart, pgsize, PROT_EXEC)) + _exit(1); + else + pkey_set_rights(pkey, 0); + + fault_count++; +} + +struct region { + unsigned long rights; + unsigned int *base; + size_t size; +}; + +static void *protect(void *p) +{ + unsigned long rights; + unsigned int *base; + size_t size; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + FAIL_IF_EXIT(!base); + + /* No read, write and execute restrictions */ + rights = 0; + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + + /* Allocate the permissive pkey */ + perm_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(perm_pkey < 0); + + /* + * Repeatedly try to protect the common region with a permissive + * pkey + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * restrictive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the permissive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + perm_pkey)); + } + + /* Free the permissive pkey */ + sys_pkey_free(perm_pkey); + + return NULL; +} + +static void *protect_access(void *p) +{ + size_t size, numinsns; + unsigned int *base; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + rights = ((struct region *) p)->rights; + numinsns = size / sizeof(base[0]); + FAIL_IF_EXIT(!base); + + /* Allocate the restrictive pkey */ + rest_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(rest_pkey < 0); + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + printf("tid %d, %s randomly in range [%p, %p]\n", tid, + (rights == PKEY_DISABLE_EXECUTE) ? "execute" : + (rights == PKEY_DISABLE_WRITE) ? "write" : "read", + base, base + numinsns); + + /* + * Repeatedly try to protect the common region with a restrictive + * pkey and read, write or execute from it + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * permissive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the restrictive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + rest_pkey)); + + /* Choose a random instruction word address from the region */ + fault_addr = base + (rand() % numinsns); + fault_count = 0; + + switch (rights) { + /* Read protection test */ + case PKEY_DISABLE_ACCESS: + /* + * Read an instruction word from the region and + * verify if it has not been overwritten to + * something unexpected + */ + FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP && + *fault_addr != PPC_INST_BLR); + break; + + /* Write protection test */ + case PKEY_DISABLE_WRITE: + /* + * Write an instruction word to the region and + * verify if the overwrite has succeeded + */ + *fault_addr = PPC_INST_BLR; + FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR); + break; + + /* Execute protection test */ + case PKEY_DISABLE_EXECUTE: + /* Jump to the region and execute instructions */ + asm volatile( + "mtctr %0; bctrl" + : : "r"(fault_addr) : "ctr", "lr"); + break; + } + + /* + * Restore the restrictions originally imposed by the + * restrictive pkey as the signal handler would have + * cleared out the corresponding AMR bits + */ + pkey_set_rights(rest_pkey, rights); + } + + /* Free restrictive pkey */ + sys_pkey_free(rest_pkey); + + return NULL; +} + +static void reset_pkeys(unsigned long rights) +{ + int pkeys[NR_PKEYS], i; + + /* Exhaustively allocate all available pkeys */ + for (i = 0; i < NR_PKEYS; i++) + pkeys[i] = sys_pkey_alloc(0, rights); + + /* Free all allocated pkeys */ + for (i = 0; i < NR_PKEYS; i++) + sys_pkey_free(pkeys[i]); +} + +static int test(void) +{ + pthread_t prot_thread, pacc_thread; + struct sigaction act; + pthread_attr_t attr; + size_t numinsns; + struct region r; + int ret, i; + + srand(time(NULL)); + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Allocate the region */ + r.size = getpagesize(); + r.base = mmap(NULL, r.size, PROT_RWX, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(r.base == MAP_FAILED); + + /* + * Fill the region with no-ops with a branch at the end + * for returning to the caller + */ + numinsns = r.size / sizeof(r.base[0]); + for (i = 0; i < numinsns - 1; i++) + r.base[i] = PPC_INST_NOP; + r.base[i] = PPC_INST_BLR; + + /* Setup SIGSEGV handler */ + act.sa_handler = 0; + act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0); + act.sa_flags = SA_SIGINFO; + act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + /* + * For these tests, the parent process should clear all bits of + * AMR and IAMR, i.e. impose no restrictions, for all available + * pkeys. This will be the base for the initial AMR and IAMR + * values for all the test thread pairs. + * + * If the AMR and IAMR bits of all available pkeys are cleared + * before running the tests and a fault is generated when + * attempting to read, write or execute instructions from a + * pkey protected region, the pkey responsible for this must be + * the one from the protect-and-access thread since the other + * one is fully permissive. Despite that, if the pkey reported + * by siginfo is not the restrictive pkey, then there must be a + * kernel bug. + */ + reset_pkeys(0); + + /* Setup barrier for protect and protect-and-access threads */ + FAIL_IF(pthread_attr_init(&attr) != 0); + FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0); + + /* Setup and start protect and protect-and-read threads */ + puts("starting thread pair (protect, protect-and-read)"); + r.rights = PKEY_DISABLE_ACCESS; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-write threads */ + puts("starting thread pair (protect, protect-and-write)"); + r.rights = PKEY_DISABLE_WRITE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-execute threads */ + puts("starting thread pair (protect, protect-and-execute)"); + r.rights = PKEY_DISABLE_EXECUTE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Cleanup */ + FAIL_IF(pthread_attr_destroy(&attr) != 0); + FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0); + munmap(r.base, r.size); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_siginfo"); +} diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c deleted file mode 100644 index e2eed65b77359..0000000000000 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2016, Michael Ellerman, IBM Corp. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> - -#include <asm/cputable.h> - -#include "utils.h" - -#define SIZE (64 * 1024) - -int test_prot_sao(void) -{ - char *p; - - /* 2.06 or later should support SAO */ - SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - - /* - * Ensure we can ask for PROT_SAO. - * We can't really verify that it does the right thing, but at least we - * confirm the kernel will accept it. - */ - p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - FAIL_IF(p == MAP_FAILED); - - /* Write to the mapping, to at least cause a fault */ - memset(p, 0xaa, SIZE); - - return 0; -} - -int main(void) -{ - return test_harness(test_prot_sao, "prot-sao"); -} diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c new file mode 100644 index 0000000000000..ed9143990888d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that loads/stores expand the stack segment, or trigger a SEGV, in + * various conditions. + * + * Based on test code by Tom Lane. + */ + +#undef NDEBUG +#include <assert.h> + +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#define _KB (1024) +#define _MB (1024 * 1024) + +volatile char *stack_top_ptr; +volatile unsigned long stack_top_sp; +volatile char c; + +enum access_type { + LOAD, + STORE, +}; + +/* + * Consume stack until the stack pointer is below @target_sp, then do an access + * (load or store) at offset @delta from either the base of the stack or the + * current stack pointer. + */ +__attribute__ ((noinline)) +int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type) +{ + unsigned long target; + char stack_cur; + + if ((unsigned long)&stack_cur > target_sp) + return consume_stack(target_sp, stack_high, delta, type); + else { + // We don't really need this, but without it GCC might not + // generate a recursive call above. + stack_top_ptr = &stack_cur; + +#ifdef __powerpc__ + asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp)); +#else + asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp)); +#endif + target = stack_high - delta + 1; + volatile char *p = (char *)target; + + if (type == STORE) + *p = c; + else + c = *p; + + // Do something to prevent the stack frame being popped prior to + // our access above. + getpid(); + } + + return 0; +} + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +int child(unsigned int stack_used, int delta, enum access_type type) +{ + unsigned long low, stack_high; + + assert(search_proc_maps("[stack]", &low, &stack_high) == 0); + + assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0); + + printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n", + type == LOAD ? "load" : "store", delta, stack_used, stack_high, + stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1); + + return 0; +} + +static int test_one(unsigned int stack_used, int delta, enum access_type type) +{ + pid_t pid; + int rc; + + pid = fork(); + if (pid == 0) + exit(child(stack_used, delta, type)); + + assert(waitpid(pid, &rc, 0) != -1); + + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) + return 0; + + // We don't expect a non-zero exit that's not a signal + assert(!WIFEXITED(rc)); + + printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n", + type == LOAD ? "load" : "store", delta, stack_used, + WTERMSIG(rc)); + + return 1; +} + +// This is fairly arbitrary but is well below any of the targets below, +// so that the delta between the stack pointer and the target is large. +#define DEFAULT_SIZE (32 * _KB) + +static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur) +{ + unsigned long delta; + + // We should be able to access anywhere within the rlimit + for (delta = page_size; delta <= rlim_cur; delta += page_size) + assert(test_one(DEFAULT_SIZE, delta, type) == 0); + + assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0); + + // But if we go past the rlimit it should fail + assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0); +} + +static int test(void) +{ + unsigned long page_size; + struct rlimit rlimit; + + page_size = getpagesize(); + getrlimit(RLIMIT_STACK, &rlimit); + printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + + printf("Testing loads ...\n"); + test_one_type(LOAD, page_size, rlimit.rlim_cur); + printf("Testing stores ...\n"); + test_one_type(STORE, page_size, rlimit.rlim_cur); + + printf("All OK\n"); + + return 0; +} + +#ifdef __powerpc__ +#include "utils.h" + +int main(void) +{ + return test_harness(test, "stack_expansion_ldst"); +} +#else +int main(void) +{ + return test(); +} +#endif diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c new file mode 100644 index 0000000000000..c8b32a29e2747 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that signal delivery is able to expand the stack segment without + * triggering a SEGV. + * + * Based on test code by Tom Lane. + */ + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../pmu/lib.h" +#include "utils.h" + +#define _KB (1024) +#define _MB (1024 * 1024) + +static char *stack_base_ptr; +static char *stack_top_ptr; + +static volatile sig_atomic_t sig_occurred = 0; + +static void sigusr1_handler(int signal_arg) +{ + sig_occurred = 1; +} + +static int consume_stack(unsigned int stack_size, union pipe write_pipe) +{ + char stack_cur; + + if ((stack_base_ptr - &stack_cur) < stack_size) + return consume_stack(stack_size, write_pipe); + else { + stack_top_ptr = &stack_cur; + + FAIL_IF(notify_parent(write_pipe)); + + while (!sig_occurred) + barrier(); + } + + return 0; +} + +static int child(unsigned int stack_size, union pipe write_pipe) +{ + struct sigaction act; + char stack_base; + + act.sa_handler = sigusr1_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGUSR1, &act, NULL) < 0) + err(1, "sigaction"); + + stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL); + + FAIL_IF(consume_stack(stack_size, write_pipe)); + + printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n", + stack_size, stack_base_ptr, stack_top_ptr, + stack_base_ptr - stack_top_ptr); + + return 0; +} + +static int test_one_size(unsigned int stack_size) +{ + union pipe read_pipe, write_pipe; + pid_t pid; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + close(read_pipe.read_fd); + close(write_pipe.write_fd); + exit(child(stack_size, read_pipe)); + } + + close(read_pipe.write_fd); + close(write_pipe.read_fd); + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + kill(pid, SIGUSR1); + + FAIL_IF(wait_for_child(pid)); + + close(read_pipe.read_fd); + close(write_pipe.write_fd); + + return 0; +} + +int test(void) +{ + unsigned int i, size; + + // Test with used stack from 1MB - 64K to 1MB + 64K + // Increment by 64 to get more coverage of odd sizes + for (i = 0; i < (128 * _KB); i += 64) { + size = i + (1 * _MB) - (64 * _KB); + FAIL_IF(test_one_size(size)); + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "stack_expansion_signal"); +} diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 7b4ac4537702c..2980abca31e0d 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,6 +9,7 @@ #include <stdbool.h> #include <string.h> #include <sys/prctl.h> +#include <asm/cputable.h> #include "event.h" #include "utils.h" @@ -104,6 +105,9 @@ static int test_body(void) struct event events[3]; u64 overhead; + // The STCX_FAIL event we use works on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions"); setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles"); setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail"); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c index a2d7b0e3dca97..a26ac122c759f 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c @@ -91,8 +91,6 @@ int back_to_back_ebbs(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c index bc893813483ee..bb9f587fa76e8 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c @@ -42,8 +42,6 @@ int cycles(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c index dcd351d203289..9ae795ce314e6 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c @@ -99,8 +99,6 @@ int cycles_with_freeze(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); printf("EBBs while frozen %d\n", ebbs_while_frozen); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 94c99c12c0f23..4b45a2e70f62b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -71,8 +71,6 @@ int cycles_with_mmcr2(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index dfbc5c3ad52d7..21537d6eb6b7d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c index ca2f7d729155b..b208bf6ad58d3 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c @@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); FAIL_IF(ebb_state.stats.ebb_count == 0); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c index ac3e6e182614a..ba2681a12cc7b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -75,7 +75,6 @@ static int test_body(void) ebb_freeze_pmcs(); ebb_global_disable(); - count_pmc(4, sample_period); mtspr(SPRN_PMC4, 0xdead); dump_summary_ebb_state(); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c index b8242e9d97d2d..791d37ba327b5 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c @@ -70,13 +70,6 @@ int multi_counter(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - count_pmc(2, sample_period); - count_pmc(3, sample_period); - count_pmc(4, sample_period); - count_pmc(5, sample_period); - count_pmc(6, sample_period); - dump_ebb_state(); for (i = 0; i < 6; i++) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c index a05c0e18ded63..9b0f70d597020 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c @@ -61,8 +61,6 @@ static int cycles_child(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_summary_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c index 153ebc92234fd..2904c741e04e5 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c @@ -82,8 +82,6 @@ static int test_body(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); if (mmcr0_mismatch) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index eadad75ed7e6f..b29f8ba22d1e6 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -76,8 +76,6 @@ int pmc56_overflow(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(2, sample_period); - dump_ebb_state(); printf("PMC5/6 overflow %d\n", pmc56_overflowed); diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index fa12e7d0b4d39..bf1bec013bbb4 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -6,6 +6,7 @@ #ifndef __SELFTESTS_POWERPC_PMU_LIB_H #define __SELFTESTS_POWERPC_PMU_LIB_H +#include <stdbool.h> #include <stdio.h> #include <stdint.h> #include <string.h> diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2756fe2efdc58..2d37942bf72b4 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,6 +12,8 @@ #include <string.h> #include <sys/prctl.h> +#include <asm/cputable.h> + #include "event.h" #include "lib.h" #include "utils.h" @@ -23,12 +25,9 @@ static int per_event_excludes(void) { struct event *e, events[4]; - char *platform; int i; - platform = (char *)get_auxv_entry(AT_BASE_PLATFORM); - FAIL_IF(!platform); - SKIP_IF(strcmp(platform, "power8") != 0); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); /* * We need to create the events disabled, otherwise the running/enabled diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index d5c64fee032dc..bbc05ffc5860a 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -150,7 +150,7 @@ static int child(struct shared_info *info) printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr); + set_amr(info->amr); /* * We won't use pkey3. This tests whether the kernel restores the UAMOR diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bdbbbe8431e03..bc454f8991246 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -44,7 +44,7 @@ struct shared_info { unsigned long amr2; /* AMR value that ptrace should refuse to write to the child. */ - unsigned long amr3; + unsigned long invalid_amr; /* IAMR value the parent expects to read from the child. */ unsigned long expected_iamr; @@ -57,8 +57,8 @@ struct shared_info { * (even though they're valid ones) because userspace doesn't have * access to those registers. */ - unsigned long new_iamr; - unsigned long new_uamor; + unsigned long invalid_iamr; + unsigned long invalid_uamor; }; static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) @@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) return syscall(__NR_pkey_alloc, flags, init_access_rights); } -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int child(struct shared_info *info) { unsigned long reg; @@ -100,33 +95,37 @@ static int child(struct shared_info *info) info->amr1 |= 3ul << pkeyshift(pkey1); info->amr2 |= 3ul << pkeyshift(pkey2); - info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3); + /* + * invalid amr value where we try to force write + * things which are deined by a uamor setting. + */ + info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor); + /* + * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr + */ if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); else info->expected_iamr &= ~(1ul << pkeyshift(pkey1)); - info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); - - info->expected_uamor |= 3ul << pkeyshift(pkey1) | - 3ul << pkeyshift(pkey2); - info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); - info->new_uamor |= 3ul << pkeyshift(pkey1); + /* + * We allocated pkey2 and pkey 3 above. Clear the IAMR bits. + */ + info->expected_iamr &= ~(1ul << pkeyshift(pkey2)); + info->expected_iamr &= ~(1ul << pkeyshift(pkey3)); /* - * We won't use pkey3. We just want a plausible but invalid key to test - * whether ptrace will let us write to AMR bits we are not supposed to. - * - * This also tests whether the kernel restores the UAMOR permissions - * after a key is freed. + * Create an IAMR value different from expected value. + * Kernel will reject an IAMR and UAMOR change. */ - sys_pkey_free(pkey3); + info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2)); + info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1)); printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr1, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr1); + set_amr(info->amr1); /* Wait for parent to read our AMR value and write a new one. */ ret = prod_parent(&info->child_sync); @@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid) PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); PARENT_FAIL_IF(ret, &info->child_sync); - info->amr1 = info->amr2 = info->amr3 = regs[0]; - info->expected_iamr = info->new_iamr = regs[1]; - info->expected_uamor = info->new_uamor = regs[2]; + info->amr1 = info->amr2 = regs[0]; + info->expected_iamr = regs[1]; + info->expected_uamor = regs[2]; /* Wake up child so that it can set itself up. */ ret = prod_child(&info->child_sync); @@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid) return ret; /* Write invalid AMR value in child. */ - ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1); + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1); PARENT_FAIL_IF(ret, &info->child_sync); - printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr); /* Wake up child so that it can verify it didn't change. */ ret = prod_child(&info->child_sync); @@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid) /* Try to write to IAMR. */ regs[0] = info->amr1; - regs[1] = info->new_iamr; + regs[1] = info->invalid_iamr; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2); PARENT_FAIL_IF(!ret, &info->child_sync); @@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid) ptrace_write_running, regs[0], regs[1]); /* Try to write to IAMR and UAMOR. */ - regs[2] = info->new_uamor; + regs[2] = info->invalid_uamor; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3); PARENT_FAIL_IF(!ret, &info->child_sync); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 58cb1a860cc9b..4436ca9d3caf8 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -78,6 +78,9 @@ int ptrace_tar(void) pid_t pid; int ret, status; + // TAR was added in v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index c4fe0e893306d..cb9875f764ca2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,6 +61,8 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < VEC_MAX; i++) diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 8c6b982af2a89..c8d82b7841023 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -183,6 +183,16 @@ int spectre_v2_test(void) if (miss_percent > 15) { printf("Branch misses > 15%% unexpected in this configuration!\n"); printf("Possible mis-match between reported & actual mitigation\n"); + /* + * Such a mismatch may be caused by a guest system + * reporting as vulnerable when the host is mitigated. + * Return skip code to avoid detecting this as an error. + * We are not vulnerable and reporting otherwise, so + * missing such a mismatch is safe. + */ + if (state == VULNERABLE) + return 4; + return 1; } break; diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 7fc0623d85c31..9c39f55a58ff3 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null TEST_GEN_PROGS := memcmp_64 strlen -$(OUTPUT)/memcmp_64: memcmp.c +$(OUTPUT)/memcmp_64: memcmp.c ../utils.c $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec ifeq ($(build_32bit),1) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index b1fa7546957f7..979df3d983685 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -2,7 +2,9 @@ #include <malloc.h> #include <stdlib.h> #include <string.h> +#include <sys/mman.h> #include <time.h> +#include <asm/cputable.h> #include "utils.h" #define SIZE 256 @@ -13,6 +15,9 @@ #define LARGE_MAX_OFFSET 32 #define LARGE_SIZE_START 4096 +/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */ +#define MAP_SIZE (64 * 1024) + #define MAX_OFFSET_DIFF_S1_S2 48 int vmx_count; @@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset, static int testcase(bool islarge) { - char *s1; - char *s2; - unsigned long i; - - unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE); - unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; - int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; - - s1 = memalign(128, alloc_size); - if (!s1) { - perror("memalign"); - exit(1); - } + unsigned long i, comp_size, alloc_size; + char *p, *s1, *s2; + int iterations; - s2 = memalign(128, alloc_size); - if (!s2) { - perror("memalign"); - exit(1); - } + comp_size = (islarge ? LARGE_SIZE : SIZE); + alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; + iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; + + p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); + + /* Put s1/s2 at the end of a page */ + s1 = p + MAP_SIZE - alloc_size; + s2 = p + 3 * MAP_SIZE - alloc_size; + + /* And unmap the subsequent page to force a fault if we overread */ + munmap(p + MAP_SIZE, MAP_SIZE); + munmap(p + 3 * MAP_SIZE, MAP_SIZE); srandom(time(0)); @@ -147,6 +152,11 @@ static int testcase(bool islarge) static int testcases(void) { +#ifdef __powerpc64__ + // vcmpequd used in memcmp_64.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); +#endif + testcase(0); testcase(1); return 0; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 5ee0e98c48967..18b6a773d5c73 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -16,6 +16,7 @@ #include <string.h> #include <sys/ioctl.h> #include <sys/stat.h> +#include <sys/sysinfo.h> #include <sys/types.h> #include <sys/utsname.h> #include <unistd.h> @@ -88,28 +89,40 @@ void *get_auxv_entry(int type) int pick_online_cpu(void) { - cpu_set_t mask; - int cpu; + int ncpus, cpu = -1; + cpu_set_t *mask; + size_t size; + + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); + return -1; + } - CPU_ZERO(&mask); + CPU_ZERO_S(size, mask); - if (sched_getaffinity(0, sizeof(mask), &mask)) { + if (sched_getaffinity(0, size, mask)) { perror("sched_getaffinity"); - return -1; + goto done; } /* We prefer a primary thread, but skip 0 */ - for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = 8; cpu < ncpus; cpu += 8) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; /* Search for anything, but in reverse */ - for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = ncpus - 1; cpu >= 0; cpu--) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; printf("No cpus in affinity mask?!\n"); - return -1; + +done: + CPU_FREE(mask); + return cpu; } bool is_ppc64le(void) @@ -293,3 +306,31 @@ void set_dscr(unsigned long val) asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); } + +int using_hash_mmu(bool *using_hash) +{ + char line[128]; + FILE *f; + int rc; + + f = fopen("/proc/cpuinfo", "r"); + FAIL_IF(!f); + + rc = 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (strcmp(line, "MMU : Hash\n") == 0) { + *using_hash = true; + goto out; + } + + if (strcmp(line, "MMU : Radix\n") == 0) { + *using_hash = false; + goto out; + } + } + + rc = -1; +out: + fclose(f); + return rc; +} diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore index d5a2da428752a..be8266f5d04c0 100644 --- a/tools/testing/selftests/splice/.gitignore +++ b/tools/testing/selftests/splice/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only default_file_splice_read +splice_read diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile index e519b159b60d8..541cd826d5a55 100644 --- a/tools/testing/selftests/splice/Makefile +++ b/tools/testing/selftests/splice/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := default_file_splice_read.sh -TEST_GEN_PROGS_EXTENDED := default_file_splice_read +TEST_PROGS := default_file_splice_read.sh short_splice_read.sh +TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read include ../lib.mk diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config new file mode 100644 index 0000000000000..058c928368b89 --- /dev/null +++ b/tools/testing/selftests/splice/config @@ -0,0 +1 @@ +CONFIG_TEST_LKM=m diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings new file mode 100644 index 0000000000000..89cedfc0d12b1 --- /dev/null +++ b/tools/testing/selftests/splice/settings @@ -0,0 +1 @@ +timeout=5 diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh new file mode 100755 index 0000000000000..7810d3589d9ab --- /dev/null +++ b/tools/testing/selftests/splice/short_splice_read.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +set -e + +ret=0 + +do_splice() +{ + filename="$1" + bytes="$2" + expected="$3" + + out=$(./splice_read "$filename" "$bytes" | cat) + if [ "$out" = "$expected" ] ; then + echo "ok: $filename $bytes" + else + echo "FAIL: $filename $bytes" + ret=1 + fi +} + +test_splice() +{ + filename="$1" + + full=$(cat "$filename") + two=$(echo "$full" | grep -m1 . | cut -c-2) + + # Make sure full splice has the same contents as a standard read. + do_splice "$filename" 4096 "$full" + + # Make sure a partial splice see the first two characters. + do_splice "$filename" 2 "$two" +} + +# proc_single_open(), seq_read() +test_splice /proc/$$/limits +# special open, seq_read() +test_splice /proc/$$/comm + +# proc_handler, proc_dointvec_minmax +test_splice /proc/sys/fs/nr_open +# proc_handler, proc_dostring +test_splice /proc/sys/kernel/modprobe +# proc_handler, special read +test_splice /proc/sys/kernel/version + +if ! [ -d /sys/module/test_module/sections ] ; then + modprobe test_module +fi +# kernfs, attr +test_splice /sys/module/test_module/coresize +# kernfs, binattr +test_splice /sys/module/test_module/sections/.init.text + +exit $ret diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c new file mode 100644 index 0000000000000..46dae6a25cfb9 --- /dev/null +++ b/tools/testing/selftests/splice/splice_read.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> + +int main(int argc, char *argv[]) +{ + int fd; + size_t size; + ssize_t spliced; + + if (argc < 2) { + fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + return EXIT_FAILURE; + } + + if (argc == 3) + size = atol(argv[2]); + else { + struct stat statbuf; + + if (fstat(fd, &statbuf) < 0) { + perror(argv[1]); + return EXIT_FAILURE; + } + + if (statbuf.st_size > INT_MAX) { + fprintf(stderr, "%s: Too big\n", argv[1]); + return EXIT_FAILURE; + } + + size = statbuf.st_size; + } + + /* splice(2) file to stdout. */ + spliced = splice(fd, NULL, STDOUT_FILENO, NULL, + size, SPLICE_F_MOVE); + if (spliced < 0) { + perror("splice"); + return EXIT_FAILURE; + } + + close(fd); + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 91d38a29956b6..93fc5cadce619 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -942,6 +942,41 @@ TEST_F(hmm, migrate_fault) } /* + * Migrate anonymous shared memory to device private memory. + */ +TEST_F(hmm, migrate_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, -ENOENT); + + hmm_buffer_free(buffer); +} + +/* * Try to migrate various memory types to device private memory. */ TEST_F(hmm2, migrate_mixed) diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index dbf14c1e2188d..f2640e505c4e7 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, (__virtio_test_bit((dev), feature)) /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline bool virtio_is_little_endian(struct virtio_device *vdev) |