diff options
Diffstat (limited to 'tools')
105 files changed, 2115 insertions, 326 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 9a5d85cfd1fb..139d5e87dc95 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -75,11 +75,13 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A76AE 0xD0E #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -119,9 +121,11 @@ #define QCOM_CPU_PART_KRYO 0x200 #define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 #define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 +#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 +#define QCOM_CPU_PART_ORYON_X1 0x001 #define NVIDIA_CPU_PART_DENVER 0x003 #define NVIDIA_CPU_PART_CARMEL 0x004 @@ -129,6 +133,7 @@ #define FUJITSU_CPU_PART_A64FX 0x001 #define HISI_CPU_PART_TSV110 0xD01 +#define HISI_CPU_PART_HIP09 0xD02 #define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 @@ -159,11 +164,13 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) @@ -196,13 +203,26 @@ #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) #define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) #define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) +#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) +#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1) + +/* + * NOTES: + * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77 + * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER + * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1 + * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78 + * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55 + */ + #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) #define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) @@ -291,6 +311,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void) return read_cpuid(MIDR_EL1); } +struct target_impl_cpu { + u64 midr; + u64 revidr; + u64 aidr; +}; + +bool cpu_errata_set_target_impl(u64 num, void *impl_cpus); + static inline u64 __attribute_const__ read_cpuid_mpidr(void) { return read_cpuid(MPIDR_EL1); diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 690b6ebd118f..65f2759ea27a 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -1080,9 +1080,6 @@ #define ARM64_FEATURE_FIELD_BITS 4 -/* Defined for compatibility only, do not add new users. */ -#define ARM64_FEATURE_MASK(x) (x##_MASK) - #ifdef __ASSEMBLY__ .macro mrs_s, rt, sreg diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index eaeda001784e..077c5437f521 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -1,18 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * Copyright IBM Corp. 2007 * * Authors: Hollis Blanchard <hollisb@us.ibm.com> diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index ee176236c2be..06fc0479a23f 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -218,6 +218,7 @@ #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ #define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ #define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ +#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ @@ -456,10 +457,14 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ + #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */ + #define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ @@ -487,6 +492,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +550,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 5cfb5d74dd5f..b65c3ba5fa14 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -419,6 +419,7 @@ #define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) +#define DEBUGCTLMSR_RTM_DEBUG BIT(15) #define MSR_PEBS_FRONTEND 0x000003f7 @@ -733,6 +734,11 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +/* AMD Hardware Feedback Support MSRs */ +#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 +#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501 +#define MSR_AMD_WORKLOAD_HRST 0xc0000502 + /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e @@ -831,6 +837,7 @@ #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) #define MSR_K7_HWCR_IRPERF_EN_BIT 30 #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) +#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 #define MSR_K7_HWCR_CPB_DIS_BIT 25 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 57c669d2aa90..55d59ed507d5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (stat.st_size < BOOTCONFIG_FOOTER_SIZE) return 0; - if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) return pr_errno("Failed to lseek for magic", -errno); if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) @@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) + if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(uint32_t)) < 0) diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index ed565eb52275..342e056c8c66 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -77,7 +77,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN) clean: rm -f $(ALL_PROGRAMS) - rm -f $(OUTPUT)include/linux/gpio.h + rm -rf $(OUTPUT)include find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7ad056219115..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include <linux/const.h> #include <vdso/bits.h> #include <uapi/linux/bits.h> -#include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -50,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -79,28 +81,9 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2892a45023af..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 468 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7415a3863891..f0f0d49d2544 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -935,6 +961,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index ef032e17fec4..eb295756c3bf 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -830,7 +830,7 @@ class TypeArrayNest(Type): 'ynl_attr_for_each_nested(attr2, attr) {', '\tif (ynl_attr_validate(yarg, attr2))', '\t\treturn YNL_PARSE_CB_ERROR;', - f'\t{var}->_count.{self.c_name}++;', + f'\tn_{self.c_name}++;', '}'] return get_lines, None, local_vars diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index e39f86d97002..a80b75f7b061 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, struct table_info *next_table; unsigned long tmp_insn_offset; unsigned long tmp_rodata_offset; + bool is_valid_list = false; rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate"); if (!rsec) @@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, INIT_LIST_HEAD(&table_list); for_each_reloc(rsec, reloc) { + if (reloc->sym->sec->rodata) + continue; + + if (strcmp(insn->sec->name, reloc->sym->sec->name)) + continue; + orig_table = malloc(sizeof(struct table_info)); if (!orig_table) { WARN("malloc failed"); @@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file, if (reloc_idx(reloc) + 1 == sec_num_entries(rsec)) break; + + if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) { + list_for_each_entry(orig_table, &table_list, jump_info) { + if (orig_table->insn_offset == insn->offset) { + is_valid_list = true; + break; + } + } + + if (!is_valid_list) { + list_del_init(&table_list); + continue; + } + + break; + } } list_for_each_entry(orig_table, &table_list, jump_info) { diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 27c1d5ebcd91..b07e699aaa3c 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -482,3 +482,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1e8c44c7b614..7a7049c2c307 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -382,3 +382,5 @@ 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat 467 n64 open_tree_attr sys_open_tree_attr +468 n64 file_getattr sys_file_getattr +469 n64 file_setattr sys_file_setattr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 9a084bdb8926..b453e80dfc00 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -558,3 +558,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a4569b96ef06..8a6744d658db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -470,3 +470,5 @@ 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr sys_file_setattr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 52a7652fcff6..5e9c9eff5539 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -471,3 +471,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 83e45eb6c095..ebb7d06d1044 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -513,3 +513,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index ac007ea00979..4877e16da69a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -473,3 +473,5 @@ 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat 467 i386 open_tree_attr sys_open_tree_attr +468 i386 file_getattr sys_file_getattr +469 i386 file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index cfb5ca41e30d..92cf0fe2291e 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -391,6 +391,8 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c index 8d0ea7a4bbc1..1eba3b4594ef 100644 --- a/tools/perf/arch/x86/tests/topdown.c +++ b/tools/perf/arch/x86/tests/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "arch-tests.h" #include "../util/topdown.h" +#include "debug.h" #include "evlist.h" #include "parse-events.h" #include "pmu.h" diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index f657a77314f8..374e4cb788d8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index aad572a78d7f..12387ea88b9a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -85,7 +85,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, if (typeflag == FTW_D || typeflag == FTW_SL) return 0; - if (filename__read_build_id(fpath, &bid) < 0) + if (filename__read_build_id(fpath, &bid, /*block=*/true) < 0) return 0; dso->name = realpath(fpath, NULL); diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index c98104481c8a..2e0f2004696a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -180,7 +180,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -204,7 +204,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -280,7 +280,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (!dso__build_id_filename(dso, filename, sizeof(filename), false)) return true; - if (filename__read_build_id(filename, &bid) == -1) { + if (filename__read_build_id(filename, &bid, /*block=*/true) == -1) { if (errno == ENOENT) return false; @@ -309,7 +309,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 40ba6a94f719..a114b3fa1bea 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -680,12 +680,12 @@ static int dso__read_build_id(struct dso *dso) mutex_lock(dso__lock(dso)); nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); else if (dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); free(new_name); } diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c index 30c7da79e109..8b31d1d05f90 100644 --- a/tools/perf/tests/pe-file-parsing.c +++ b/tools/perf/tests/pe-file-parsing.c @@ -37,7 +37,7 @@ static int run_dir(const char *d) size_t idx; scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); - ret = filename__read_build_id(filename, &bid); + ret = filename__read_build_id(filename, &bid, /*block=*/true); TEST_ASSERT_VAL("Failed to read build_id", ret == sizeof(expect_build_id)); TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, @@ -49,7 +49,7 @@ static int run_dir(const char *d) !strcmp(debuglink, expect_debuglink)); scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); - ret = filename__read_build_id(debugfile, &bid); + ret = filename__read_build_id(debugfile, &bid, /*block=*/true); TEST_ASSERT_VAL("Failed to read debug file build_id", ret == sizeof(expect_build_id)); TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 93baee2eae42..6132f1af3e22 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -31,7 +31,7 @@ static int build_id_cache__add_file(const char *filename) struct build_id bid = { .size = 0, }; int err; - err = filename__read_build_id(filename, &bid); + err = filename__read_build_id(filename, &bid, /*block=*/true); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh index 69e3c2055134..be67d56e0f09 100755 --- a/tools/perf/tests/shell/test_bpf_metadata.sh +++ b/tools/perf/tests/shell/test_bpf_metadata.sh @@ -61,7 +61,7 @@ test_bpf_metadata() { /perf_version/ { if (entry) print $NF; } - ' | egrep "$VERS" > /dev/null + ' | grep -qF "$VERS" then echo "Basic BPF metadata test [Failed invalid output]" err=1 diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index a15ac2fa4b20..f291ab4f94eb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -90,10 +90,28 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +/* Reserved kernel ranges [-100], [-10000, -40000]. */ #define AT_FDCWD -100 /* Special value for dirfd used to indicate openat should use the current working directory. */ +/* + * The concept of process and threads in userland and the kernel is a confusing + * one - within the kernel every thread is a 'task' with its own individual PID, + * however from userland's point of view threads are grouped by a single PID, + * which is that of the 'thread group leader', typically the first thread + * spawned. + * + * To cut the Gideon knot, for internal kernel usage, we refer to + * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel + * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread + * group leader... + */ +#define PIDFD_SELF_THREAD -10000 /* Current thread. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ + +#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 0098b0ce8ccb..0bd678a4a10e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -60,6 +60,17 @@ #define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ #define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ +/* + * The root inode of procfs is guaranteed to always have the same inode number. + * For programs that make heavy use of procfs, verifying that the root is a + * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH}) + * will allow you to make sure you are never tricked into operating on the + * wrong procfs file. + */ +enum procfs_ino { + PROCFS_ROOT_INO = 1, +}; + struct file_clone_range { __s64 src_fd; __u64 src_offset; @@ -91,6 +102,63 @@ struct fs_sysfs_path { __u8 name[128]; }; +/* Protection info capability flags */ +#define LBMD_PI_CAP_INTEGRITY (1 << 0) +#define LBMD_PI_CAP_REFTAG (1 << 1) + +/* Checksum types for Protection Information */ +#define LBMD_PI_CSUM_NONE 0 +#define LBMD_PI_CSUM_IP 1 +#define LBMD_PI_CSUM_CRC16_T10DIF 2 +#define LBMD_PI_CSUM_CRC64_NVME 4 + +/* sizeof first published struct */ +#define LBMD_SIZE_VER0 16 + +/* + * Logical block metadata capability descriptor + * If the device does not support metadata, all the fields will be zero. + * Applications must check lbmd_flags to determine whether metadata is + * supported or not. + */ +struct logical_block_metadata_cap { + /* Bitmask of logical block metadata capability flags */ + __u32 lbmd_flags; + /* + * The amount of data described by each unit of logical block + * metadata + */ + __u16 lbmd_interval; + /* + * Size in bytes of the logical block metadata associated with each + * interval + */ + __u8 lbmd_size; + /* + * Size in bytes of the opaque block tag associated with each + * interval + */ + __u8 lbmd_opaque_size; + /* + * Offset in bytes of the opaque block tag within the logical block + * metadata + */ + __u8 lbmd_opaque_offset; + /* Size in bytes of the T10 PI tuple associated with each interval */ + __u8 lbmd_pi_size; + /* Offset in bytes of T10 PI tuple within the logical block metadata */ + __u8 lbmd_pi_offset; + /* T10 PI guard tag type */ + __u8 lbmd_guard_tag_type; + /* Size in bytes of the T10 PI application tag */ + __u8 lbmd_app_tag_size; + /* Size in bytes of the T10 PI reference tag */ + __u8 lbmd_ref_tag_size; + /* Size in bytes of the T10 PI storage tag */ + __u8 lbmd_storage_tag_size; + __u8 pad; +}; + /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 @@ -149,6 +217,24 @@ struct fsxattr { }; /* + * Variable size structure for file_[sg]et_attr(). + * + * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'. + * As this structure is passed to/from userspace with its size, this can + * be versioned based on the size. + */ +struct file_attr { + __u64 fa_xflags; /* xflags field value (get/set) */ + __u32 fa_extsize; /* extsize field value (get/set)*/ + __u32 fa_nextents; /* nextents field value (get) */ + __u32 fa_projid; /* project identifier (get/set) */ + __u32 fa_cowextsize; /* CoW extsize field value (get/set) */ +}; + +#define FILE_ATTR_SIZE_VER0 24 +#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0 + +/* * Flags for the fsx_xflags field */ #define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ @@ -247,6 +333,8 @@ struct fsxattr { * also /sys/kernel/debug/ for filesystems with debugfs exports */ #define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) +/* Get logical block metadata capability details */ +#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) /* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 3b93fb906e3c..ed3aed264aeb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -244,6 +244,8 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* MTE tag check store only */ +# define PR_MTE_STORE_ONLY (1UL << 19) /* RISC-V pointer masking tag length */ # define PR_PMLEN_SHIFT 24 # define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) @@ -255,7 +257,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 diff --git a/tools/perf/trace/beauty/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h index d4b3e2ae1314..c57674a6aa0d 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/vhost.h +++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h @@ -235,4 +235,39 @@ */ #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) + +/* Extended features manipulation */ +#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) +#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ + struct vhost_features_array) + +/* fork_owner values for vhost */ +#define VHOST_FORK_OWNER_KTHREAD 0 +#define VHOST_FORK_OWNER_TASK 1 + +/** + * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, + * This ioctl must called before VHOST_SET_OWNER. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @param fork_owner: An 8-bit value that determines the vhost thread mode + * + * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): + * - Vhost will create vhost worker as tasks forked from the owner, + * inheriting all of the owner's attributes. + * + * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: + * - Vhost will create vhost workers as kernel threads. + */ +#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8) + +/** + * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. + * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y + * + * @return: An 8-bit value indicating the current thread mode. + */ +#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8) + #endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 5b6d3e899e11..2298cd396c42 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -657,9 +657,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, info_node->info_linear = info_linear; info_node->metadata = NULL; if (!perf_env__insert_bpf_prog_info(env, info_node)) { - free(info_linear); + /* + * Insert failed, likely because of a duplicate event + * made by the sideband thread. Ignore synthesizing the + * metadata. + */ free(info_node); + goto out; } + /* info_linear is now owned by info_node and shouldn't be freed below. */ info_linear = NULL; /* @@ -827,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, return err; } -static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +static int perf_env__add_bpf_info(struct perf_env *env, u32 id) { struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; - int fd; + int fd, err = 0; fd = bpf_prog_get_fd_by_id(id); if (fd < 0) - return; + return -EINVAL; arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -852,6 +858,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + err = PTR_ERR(info_linear); goto out; } @@ -862,38 +869,46 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node->info_linear = info_linear; info_node->metadata = bpf_metadata_create(&info_linear->info); if (!perf_env__insert_bpf_prog_info(env, info_node)) { + pr_debug("%s: duplicate add bpf info request for id %u\n", + __func__, btf_id); free(info_linear); free(info_node); + goto out; } - } else + } else { free(info_linear); + err = -ENOMEM; + goto out; + } if (btf_id == 0) goto out; btf = btf__load_from_kernel_by_id(btf_id); - if (libbpf_get_error(btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", - __func__, btf_id); - goto out; + if (!btf) { + err = -errno; + pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err); + } else { + perf_env__fetch_btf(env, btf_id, btf); } - perf_env__fetch_btf(env, btf_id, btf); out: btf__free(btf); close(fd); + return err; } static int bpf_event__sb_cb(union perf_event *event, void *data) { struct perf_env *env = data; + int ret = 0; if (event->header.type != PERF_RECORD_BPF_EVENT) return -1; switch (event->bpf.type) { case PERF_BPF_EVENT_PROG_LOAD: - perf_env__add_bpf_info(env, event->bpf.id); + ret = perf_env__add_bpf_info(env, event->bpf.id); case PERF_BPF_EVENT_PROG_UNLOAD: /* @@ -907,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) break; } - return 0; + return ret; } int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index 80b1d2b3729b..5a66dc8594aa 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -20,7 +20,7 @@ struct bpil_array_desc { */ }; -static struct bpil_array_desc bpil_array_desc[] = { +static const struct bpil_array_desc bpil_array_desc[] = { [PERF_BPIL_JITED_INSNS] = { offsetof(struct bpf_prog_info, jited_prog_insns), offsetof(struct bpf_prog_info, jited_prog_len), @@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) __u32 info_len = sizeof(info); __u32 data_len = 0; int i, err; - void *ptr; + __u8 *ptr; if (arrays >> PERF_BPIL_LAST_ARRAY) return ERR_PTR(-EINVAL); @@ -126,15 +126,15 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) pr_debug("can't get prog info: %s", strerror(errno)); return ERR_PTR(-EFAULT); } + if (info.type >= __MAX_BPF_PROG_TYPE) + pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type); /* step 2: calculate total size of all arrays */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + const struct bpil_array_desc *desc = &bpil_array_desc[i]; bool include_array = (arrays & (1UL << i)) > 0; - struct bpil_array_desc *desc; __u32 count, size; - desc = bpil_array_desc + i; - /* kernel is too old to support this field */ if (info_len < desc->array_offset + sizeof(__u32) || info_len < desc->count_offset + sizeof(__u32) || @@ -163,19 +163,20 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u32 count, size; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); bpf_prog_info_set_offset_u32(&info_linear->info, desc->count_offset, count); bpf_prog_info_set_offset_u32(&info_linear->info, desc->size_offset, size); + assert(ptr >= info_linear->data); + assert(ptr < &info_linear->data[data_len]); bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); @@ -189,27 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) free(info_linear); return ERR_PTR(-EFAULT); } + if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) { + pr_debug("%s:%d: unexpected program type %u\n", + __func__, __LINE__, info_linear->info.type); + } /* step 6: verify the data */ + ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; - __u32 v1, v2; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; + __u32 count1, count2, size1, size2; + __u64 ptr2; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + count2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); - if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + if (count1 != count2) { + pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2); + free(info_linear); + return ERR_PTR(-ERANGE); + } - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + size2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); - if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + if (size1 != size2) { + pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); + if (ptr_to_u64(ptr) != ptr2) { + pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr += roundup(count1 * size1, sizeof(__u64)); } /* step 7: update info_len and data_len */ @@ -224,13 +243,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; addr = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); offs = addr - ptr_to_u64(info_linear->data); @@ -244,13 +262,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; offs = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); addr = offs + ptr_to_u64(info_linear->data); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a7018a3b0437..bf7f3268b9a2 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -115,7 +115,7 @@ int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sb struct build_id bid = { .size = 0, }; int ret; - ret = filename__read_build_id(pathname, &bid); + ret = filename__read_build_id(pathname, &bid, /*block=*/true); if (ret < 0) return ret; @@ -841,7 +841,7 @@ static int filename__read_build_id_ns(const char *filename, int ret; nsinfo__mountns_enter(nsi, &nsc); - ret = filename__read_build_id(filename, bid); + ret = filename__read_build_id(filename, bid, /*block=*/true); nsinfo__mountns_exit(&nsc); return ret; diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index a44c70f93156..bb9ebd84ec2d 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -110,8 +110,12 @@ struct debuginfo *debuginfo__new(const char *path) if (!dso) goto out; - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + /* + * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block + * incase the path isn't for a regular file. + */ + assert(!dso__has_build_id(dso)); + if (filename__read_build_id(path, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); for (type = distro_dwarf_types; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 0a7645c7fae7..64c1d65b0149 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -81,13 +81,13 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid) > 0) { + if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6d2c280a1730..1346fd180653 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -873,13 +873,17 @@ out: #ifdef HAVE_LIBBFD_BUILDID_SUPPORT -static int read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid, bool block) { size_t size = sizeof(bid->data); - int err = -1; + int err = -1, fd; bfd *abfd; - abfd = bfd_openr(filename, NULL); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + if (fd < 0) + return -1; + + abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); if (!abfd) return -1; @@ -902,7 +906,7 @@ out_close: #else // HAVE_LIBBFD_BUILDID_SUPPORT -static int read_build_id(const char *filename, struct build_id *bid) +static int read_build_id(const char *filename, struct build_id *bid, bool block) { size_t size = sizeof(bid->data); int fd, err = -1; @@ -911,7 +915,7 @@ static int read_build_id(const char *filename, struct build_id *bid) if (size < BUILD_ID_SIZE) goto out; - fd = open(filename, O_RDONLY); + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); if (fd < 0) goto out; @@ -934,7 +938,7 @@ out: #endif // HAVE_LIBBFD_BUILDID_SUPPORT -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { struct kmod_path m = { .name = NULL, }; char path[PATH_MAX]; @@ -958,9 +962,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) } close(fd); filename = path; + block = true; } - err = read_build_id(filename, bid); + err = read_build_id(filename, bid, block); if (m.comp) unlink(filename); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7201494c5c20..41e4ebe5eac5 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -4,7 +4,6 @@ #include <errno.h> #include <unistd.h> -#include <stdio.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> @@ -86,13 +85,10 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, struct build_id *bid) +int filename__read_build_id(const char *filename, struct build_id *bid, bool block) { - FILE *fp; - int ret = -1; + int fd, ret = -1; bool need_swap = false, elf32; - u8 e_ident[EI_NIDENT]; - int i; union { struct { Elf32_Ehdr ehdr32; @@ -103,28 +99,27 @@ int filename__read_build_id(const char *filename, struct build_id *bid) Elf64_Phdr *phdr64; }; } hdrs; - void *phdr; - size_t phdr_size; - void *buf = NULL; - size_t buf_size = 0; + void *phdr, *buf = NULL; + ssize_t phdr_size, ehdr_size, buf_size = 0; - fp = fopen(filename, "r"); - if (fp == NULL) + fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + if (fd < 0) return -1; - if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT) goto out; - if (memcmp(e_ident, ELFMAG, SELFMAG) || - e_ident[EI_VERSION] != EV_CURRENT) + if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) || + hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT) goto out; - need_swap = check_need_swap(e_ident[EI_DATA]); - elf32 = e_ident[EI_CLASS] == ELFCLASS32; + need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]); + elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32; + ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT; - if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, - elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), - 1, fp) != 1) + if (read(fd, + (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT, + ehdr_size) != ehdr_size) goto out; if (need_swap) { @@ -138,14 +133,18 @@ int filename__read_build_id(const char *filename, struct build_id *bid) hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } } - phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum - : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) || + (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr))) + goto out; + + phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum + : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum; phdr = malloc(phdr_size); if (phdr == NULL) goto out; - fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); - if (fread(phdr, phdr_size, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (read(fd, phdr, phdr_size) != phdr_size) goto out_free; if (elf32) @@ -153,8 +152,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) else hdrs.phdr64 = phdr; - for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { - size_t p_filesz; + for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) { + ssize_t p_filesz; if (need_swap) { if (elf32) { @@ -180,8 +179,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; buf = tmp; } - fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); - if (fread(buf, p_filesz, 1, fp) != 1) + lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (read(fd, buf, p_filesz) != p_filesz) goto out_free; ret = read_build_id(buf, p_filesz, bid, need_swap); @@ -194,7 +193,7 @@ out_free: free(buf); free(phdr); out: - fclose(fp); + close(fd); return ret; } @@ -324,7 +323,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (ret >= 0) RC_CHK_ACCESS(dso)->is_64_bit = ret; - if (filename__read_build_id(ss->name, &bid) > 0) + if (filename__read_build_id(ss->name, &bid, /*block=*/true) > 0) dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e816e4220d33..3fed54de5401 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1869,14 +1869,14 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. Don't block in case path + * isn't for a regular file. */ - if (!dso__has_build_id(dso) && - is_regular_file(dso__long_name(dso))) { + if (!dso__has_build_id(dso)) { struct build_id bid = { .size = 0, }; __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); - if (filename__read_build_id(name, &bid) > 0) + if (filename__read_build_id(name, &bid, /*block=*/false) > 0) dso__set_build_id(dso, &bid); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3fb5d146d9b1..347106218799 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -140,7 +140,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, struct build_id *id); +int filename__read_build_id(const char *filename, struct build_id *id, bool block); int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index cb2c1ace304a..fcd1fd13c30e 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -401,7 +401,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, nsi = nsinfo__new(event->pid); nsinfo__mountns_enter(nsi, &nc); - rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; + rc = filename__read_build_id(event->filename, &bid, /*block=*/false) > 0 ? 0 : -1; nsinfo__mountns_exit(&nc); nsinfo__put(nsi); diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 500653ef98c7..8ac82b6f9189 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information. .RE .PP -\-\-turbo\-boost, \-t +\-\-turbo\-boost, \-\-boost, \-t .RS 4 -This option is used to enable or disable the turbo boost feature on -supported Intel and AMD processors. +This option is used to enable or disable the boost feature on +supported Intel and AMD processors, and other boost supported systems. +(The --boost option is an alias for the --turbo-boost option) This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index fc750e127404..7d3732f5f2f6 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu) /* ToDo: Make this more global */ unsigned long pstates[MAX_HW_PSTATES] = {0,}; - ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states); + ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states); if (ret) { printf(_("Error while evaluating Boost Capabilities" " on CPU %d -- are you root?\n"), cpu); @@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu) return 0; } +static int get_boost_mode_generic(unsigned int cpu) +{ + bool active; + + if (!cpufreq_has_generic_boost_support(&active)) { + printf(_(" boost state support:\n")); + printf(_(" Active: %s\n"), active ? _("yes") : _("no")); + } + + return 0; +} + /* --boost / -b */ static int get_boost_mode(unsigned int cpu) @@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu) cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); + else + get_boost_mode_generic(cpu); freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 0677b58374ab..c2117e5650dd 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -21,6 +21,7 @@ static struct option set_opts[] = { {"epp", required_argument, NULL, 'e'}, {"amd-pstate-mode", required_argument, NULL, 'm'}, {"turbo-boost", required_argument, NULL, 't'}, + {"boost", required_argument, NULL, 't'}, { }, }; @@ -62,8 +63,8 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "b:e:m:", - set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "b:e:m:t:", + set_opts, NULL)) != -1) { switch (ret) { case 'b': if (params.perf_bias) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 95749b8ee475..82ea62bdf5a2 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +int cpufreq_has_generic_boost_support(bool *active); +int cpupower_set_turbo_boost(int turbo_boost); + /* X86 ONLY ****************************************/ #if defined(__i386__) || defined(__x86_64__) @@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); extern int cpupower_set_epp(unsigned int cpu, char *epp); extern int cpupower_set_amd_pstate_mode(char *mode); -extern int cpupower_set_turbo_boost(int turbo_boost); /* Read/Write msr ****************************/ @@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states, /* AMD HW pstate decoding **************************/ -extern int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states); +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states); /* AMD P-State stuff **************************/ bool cpupower_amd_pstate_enabled(void); @@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp) { return -1; }; static inline int cpupower_set_amd_pstate_mode(char *mode) { return -1; }; -static inline int cpupower_set_turbo_boost(int turbo_boost) -{ return -1; }; /* Read/Write msr ****************************/ -static inline int cpufreq_has_boost_support(unsigned int cpu, int *support, - int *active, int * states) +static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, + int *active, int *states) { return -1; } static inline bool cpupower_amd_pstate_enabled(void) diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 76e461ff4f74..166dc1e470ea 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -8,15 +8,14 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" #include "cpufreq.h" +#include "cpupower_intern.h" #if defined(__i386__) || defined(__x86_64__) -#include "cpupower_intern.h" - #define MSR_AMD_HWCR 0xc0010015 -int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, - int *states) +int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active, + int *states) { int ret; unsigned long long val; @@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode) return 0; } -int cpupower_set_turbo_boost(int turbo_boost) -{ - char path[SYSFS_PATH_MAX]; - char linebuf[2] = {}; - - snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); - - if (!is_valid_path(path)) - return -1; - - snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); - - if (cpupower_write_sysfs(path, linebuf, 2) <= 0) - return -1; - - return 0; -} - bool cpupower_amd_pstate_enabled(void) { char *driver = cpufreq_get_driver(0); @@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void) #endif /* #if defined(__i386__) || defined(__x86_64__) */ +int cpufreq_has_generic_boost_support(bool *active) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + unsigned long val; + char *endp; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -EACCES; + + if (cpupower_read_sysfs(path, linebuf, 2) <= 0) + return -EINVAL; + + val = strtoul(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -EINVAL; + + switch (val) { + case 0: + *active = false; + break; + case 1: + *active = true; + break; + default: + return -EINVAL; + } + + return 0; +} + /* get_cpustate * * Gather the information of all online CPUs into bitmask struct @@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding) } } } + +int cpupower_set_turbo_boost(int turbo_boost) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[2] = {}; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost"); + + if (!is_valid_path(path)) + return -1; + + snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost); + + if (cpupower_write_sysfs(path, linebuf, 2) <= 0) + return -1; + + return 0; +} diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index 580b4e246aec..d1ae5e92c615 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -408,3 +408,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 5a5a7a5f7e1d..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (info.coredump_mask & PIDFD_COREDUMPED) goto out; - if (read(fd_coredump, &c, 1) < 1) - goto out; - exit_code = EXIT_SUCCESS; out: if (fd_peer_pidfd >= 0) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b230deb19e8..9a3499827d4b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py +TEST_FILES += drgn_dump_damon_status.py # functionality tests TEST_PROGS += sysfs.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..44b98f17f8ff 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_passive_lacp.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index dad4e5fda4db..4d16a69ffc65 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -6,6 +6,7 @@ CONFIG_MACVLAN=y CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None): ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None): if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index b2698db39817..9699a100a87d 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None: threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout defer(_set_threaded_state, cfg, threaded) + return combined + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ @@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") _assert_napi_threaded_enabled(nl, napi0_id) _assert_napi_threaded_disabled(nl, napi1_id) @@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None: napi0_id = napis[0]['id'] napi1_id = napis[1]['id'] - _setup_deferred_cleanup(cfg) + qcnt = _setup_deferred_cleanup(cfg) # set threaded _set_threaded_state(cfg, 1) @@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None: _assert_napi_threaded_enabled(nl, napi1_id) cmd(f"ethtool -L {cfg.ifname} combined 1") - cmd(f"ethtool -L {cfg.ifname} combined 2") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") # check napi threaded is set for both napis _assert_napi_threaded_enabled(nl, napi0_id) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db995..8516e8434bc4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -751,7 +751,7 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) +#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ @@ -759,7 +759,7 @@ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ - switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ + switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \ case 0: { \ uintmax_t __exp_print = (uintmax_t)__exp; \ uintmax_t __seen_print = (uintmax_t)__seen; \ diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f6fe7a07a0a2..41b40c676d7f 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -169,6 +169,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index cef8f7323ceb..713005b6f508 100644 --- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); return el0 == ID_AA64PFR0_EL1_EL0_IMP; } diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index e34963956fbc..1d431de8729c 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt) static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..af9581b860f1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include <errno.h> +#include <linux/arm-smccc.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index ebd70430c89d..f222538e6084 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -54,7 +54,7 @@ static void guest_code(void) * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having * hidden the feature at runtime without any other userspace action. */ - __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, read_sysreg(id_aa64pfr0_el1)) == 0, "GICv3 wrongly advertised"); @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, NULL); pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), "GICv3 not supported."); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index dc6559dad9d8..4ccbd389d133 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -95,14 +95,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -195,7 +195,7 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index d3bf9204409c..189321e96925 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -243,6 +243,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); @@ -594,8 +595,8 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); @@ -612,7 +613,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) } val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); else @@ -774,7 +775,7 @@ int main(void) /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f16b3b27e32e..a0c4ab839155 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -441,7 +441,7 @@ static void create_vpmu_vm(void *guest_code) /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 9d69904cb608..eb115123d741 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -573,15 +573,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index d30625c18259..c744c603d688 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1554,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) } /* Read from the page to populate the shared zeropage. */ - FORCE_READ(mem); - FORCE_READ(smem); + FORCE_READ(*mem); + FORCE_READ(*smem); fn(mem, smem, pagesize); munmap: diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b0d42eb04e3a..8dd81c0a4a5a 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -145,7 +145,7 @@ static bool try_access_buf(char *ptr, bool write) if (write) *ptr = 'x'; else - FORCE_READ(ptr); + FORCE_READ(*ptr); } signal_jump_set = false; diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index 1afe14b9dc0c..c5940c0595be 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -50,8 +50,10 @@ void read_fault_pages(void *addr, unsigned long nr_pages) unsigned long i; for (i = 0; i < nr_pages; i++) { + unsigned long *addr2 = + ((unsigned long *)(addr + (i * huge_page_size))); /* Prevent the compiler from optimizing out the entire loop: */ - FORCE_READ(((unsigned long *)(addr + (i * huge_page_size)))); + FORCE_READ(*addr2); } } diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index c5a73617796a..ea945eebec2f 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -110,7 +110,7 @@ void *access_mem(void *ptr) * the memory access actually happens and prevents the compiler * from optimizing away this entire loop. */ - FORCE_READ((uint64_t *)ptr); + FORCE_READ(*(uint64_t *)ptr); } return NULL; diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index fccf9e797a0c..5bd52a951cbd 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -5,10 +5,14 @@ #define _GNU_SOURCE #include <errno.h> +#include <fcntl.h> +#include <linux/userfaultfd.h> #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <sys/ioctl.h> #include <sys/mman.h> +#include <syscall.h> #include <time.h> #include <stdbool.h> @@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, if (first_val <= start && second_val >= end) { success = true; + fflush(maps_fp); break; } } @@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, return success; } +/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */ +static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size) +{ + unsigned long start = (unsigned long)ptr; + unsigned long end = start + size; + + return is_range_mapped(maps_fp, start, end); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -733,6 +747,249 @@ out: dont_unmap ? " [dontunnmap]" : ""); } +#ifdef __NR_userfaultfd +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, + unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int uffd, err, i; + void *res; + struct uffdio_api api = { + .api = UFFD_API, + .features = UFFD_EVENT_PAGEFAULT, + }; + + uffd = syscall(__NR_userfaultfd, O_NONBLOCK); + if (uffd == -1) { + err = errno; + perror("userfaultfd"); + if (err == EPERM) { + ksft_test_result_skip("%s - missing uffd", test_name); + return; + } + success = false; + goto out; + } + if (ioctl(uffd, UFFDIO_API, &api)) { + perror("ioctl UFFDIO_API"); + success = false; + goto out_close_uffd; + } + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + + tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| + * + * Additionally, register each with UFFD. + */ + for (i = 0; i < 10; i += 2) { + void *unmap_ptr = &ptr[(i + 1) * page_size]; + unsigned long start = (unsigned long)&ptr[i * page_size]; + struct uffdio_register reg = { + .range = { + .start = start, + .len = page_size, + }, + .mode = UFFDIO_REGISTER_MODE_MISSING, + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) == -1) { + perror("ioctl UFFDIO_REGISTER"); + success = false; + goto out_unmap; + } + if (munmap(unmap_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range which is invalid for multi VMA move. + * + * This will fail, and no VMA should be moved, as we check this ahead of + * time. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, + "Invalid uffd-armed VMA at start of multi range moved\n"); + success = false; + goto out_unmap; + } + + /* + * Now try to move a single VMA, this should succeed as not multi VMA + * move. + */ + res = mremap(ptr, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + if (res == MAP_FAILED) { + perror("mremap single invalid-multi VMA"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA + * move valid) VMA at the start of ptr range. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + res = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + + /* + * Now try to move the entire range, we should succeed in moving the + * first VMA, but no others, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, "Valid VMA not moved\n"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and map valid VMA at start of ptr range, and replace + * all existing multi-move invalid VMAs, except the last, with valid + * multi-move VMAs. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + if (munmap(ptr, size - 2 * page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + for (i = 0; i < 8; i += 2) { + res = mmap(&ptr[i * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range, we should succeed in moving all but + * the last VMA, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + + for (i = 0; i < 10; i += 2) { + bool is_mapped = is_ptr_mapped(maps_fp, + &tgt_ptr[i * page_size], page_size); + + if (i < 8 && !is_mapped) { + fprintf(stderr, "Valid VMA not moved at %d\n", i); + success = false; + goto out_unmap; + } else if (i == 8 && is_mapped) { + fprintf(stderr, "Invalid VMA moved at %d\n", i); + success = false; + goto out_unmap; + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out_close_uffd: + close(uffd); +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} +#else +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + + ksft_test_result_skip("%s - missing uffd", test_name); +} +#endif /* __NR_userfaultfd */ + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -1074,7 +1331,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 8; + int num_misc_tests = 9; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1197,8 +1454,6 @@ int main(int argc, char **argv) mremap_expand_merge(maps_fp, page_size); mremap_expand_merge_offset(maps_fp, page_size); - fclose(maps_fp); - mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); @@ -1207,6 +1462,9 @@ int main(int argc, char **argv) mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multi_invalid_vmas(maps_fp, page_size); + + fclose(maps_fp); if (run_perf_tests) { ksft_print_msg("\n%s\n", diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 0d4209eef0c3..e6face7c0166 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -1525,7 +1525,7 @@ void zeropfn_tests(void) ret = madvise(mem, hpage_size, MADV_HUGEPAGE); if (!ret) { - FORCE_READ(mem); + FORCE_READ(*mem); ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0, 0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 05de1fc0005b..44a3f8a58806 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -439,8 +439,11 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, } madvise(*addr, fd_size, MADV_HUGEPAGE); - for (size_t i = 0; i < fd_size; i++) - FORCE_READ((*addr + i)); + for (size_t i = 0; i < fd_size; i++) { + char *addr2 = *addr + i; + + FORCE_READ(*addr2); + } if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c20298ae98ea..b55d1809debc 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -23,7 +23,7 @@ * anything with it in order to trigger a read page fault. We therefore must use * volatile to stop the compiler from optimising this away. */ -#define FORCE_READ(x) (*(volatile typeof(x) *)x) +#define FORCE_READ(x) (*(const volatile typeof(x) *)&(x)) extern unsigned int __page_size; extern unsigned int __page_shift; diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index b1e4618399be..a688871a98eb 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -107,6 +107,26 @@ #endif #endif +#ifndef __NR_open_tree_attr + #if defined __alpha__ + #define __NR_open_tree_attr 577 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree_attr (467 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree_attr (467 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree_attr (467 + 5000) + #endif + #elif defined __ia64__ + #define __NR_open_tree_attr (467 + 1024) + #else + #define __NR_open_tree_attr 467 + #endif +#endif + #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } +static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size); +} + static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) ASSERT_EQ(close(open_tree_fd), 0); } +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + /** * Validate that currently changing the idmapping of an idmapped mount fails. */ @@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping) .attr_set = MOUNT_ATTR_IDMAP, }; + ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0)); + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping) AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + /* Change idmapping on a detached mount that is already idmapped. */ attr.userns_fd = get_userns_fd(0, 20000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -static bool expected_uid_gid(int dfd, const char *path, int flags, - uid_t expected_uid, gid_t expected_gid) -{ - int ret; - struct stat st; - - ret = fstatat(dfd, path, &st, flags); - if (ret < 0) - return false; - - return st.st_uid == expected_uid && st.st_gid == expected_gid; -} - TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) { int open_tree_fd = -EBADF; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..c7e03e1d6f63 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nh.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += test_neigh.sh diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 1f6f53e284b5..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 08240d3e3c87..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ac1349c4b9e5..4f07ac9fa207 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8af65373b3a..82cae37d9c20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3842,6 +3842,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..ac7ec6f94023 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 606a43a60f73..7fc6c5dbd551 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -99,7 +99,7 @@ run_one_clash_test() local entries local cre - if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then echo "INFO: did not receive expected number of replies for $daddr:$dport" ip netns exec "$ctns" conntrack -S # don't fail: check if clash resolution triggered after all. diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 788cd56ea4a0..615fe3c6f405 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -187,7 +187,7 @@ ct_udpclash() [ -x udpclash ] || return while [ $now -lt $end ]; do - ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 now=$(date +%s) done @@ -277,6 +277,7 @@ check_taint() insert_flood() { local n="$1" + local timeout="$2" local r=0 r=$((RANDOM%$insert_count)) @@ -302,7 +303,7 @@ test_floodresize_all() read tainted_then < /proc/sys/kernel/tainted for n in "$nsclient1" "$nsclient2";do - insert_flood "$n" & + insert_flood "$n" "$timeout" & done # resize table constantly while flood/insert/dump/flushs diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..45832df98295 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +271,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +284,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +328,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +351,39 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local dstip=$4 + local dstport=$5 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +393,16 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { @@ -388,8 +413,8 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_ingress" + test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345 + check_dscp "dscp_ingress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle else @@ -405,10 +430,10 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_egress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + check_dscp "dscp_egress" "$pmtu" - ip netns exec "$nsr1" nft flush table netdev dscpmangle + ip netns exec "$nsr1" nft delete table netdev dscpmangle else echo "SKIP: Could not load netdev:egress for veth1" fi @@ -416,48 +441,53 @@ fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_fwd" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + check_dscp "dscp_fwd" "$pmtu" } test_tcp_forwarding_nat() { + local nsa="$1" + local nsb="$2" + local pmtu="$3" + local what="$4" local lret - local pmtu - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - lret=$? + [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" - pmtu=$3 - what=$4 + test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345 + lret=$? if [ "$lret" -eq 0 ] ; then if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with masquerade $what" else echo "PASS: flow offload for ns1/ns2 with masquerade $what" fi - test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666 lret=$? if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with dnat $what" elif [ "$lret" -eq 0 ] ; then echo "PASS: flow offload for ns1/ns2 with dnat $what" fi + else + echo "FAIL: flow offload for ns1/ns2 with dnat $what" fi return $lret } make_file "$nsin" "$filesize" +make_file "$nsin_small" "$filesize_small" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 0; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -489,8 +519,9 @@ table ip nat { } EOF +check_dscp "dscp_none" "0" if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -513,6 +544,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 @@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -668,7 +707,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c index 85c7b906ad08..79de163d61ab 100644 --- a/tools/testing/selftests/net/netfilter/udpclash.c +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -29,7 +29,7 @@ struct thread_args { int sockfd; }; -static int wait = 1; +static volatile int wait = 1; static void *thread_main(void *varg) { diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5ded3b3a7538..0f5640d8dc7f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -2164,6 +2176,284 @@ TEST_F(tls, rekey_poll_delay) } } +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; @@ -2708,6 +2998,67 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); + exit(!__test_passed(_metadata)); + } + + usleep(10000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 66773685a047..94bba4553130 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr) int offs = end - text; text[offs] = '\0'; - printf(text); + printf("%s", text); text[offs] = '\n'; printf("\n"); } else { - printf(text); + printf("%s", text); } } @@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr) nr--; start--; } - printf(start); + printf("%s", start); } static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self) diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 1c9ceb661c43..0cfbb111a2d0 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -6,7 +6,6 @@ #include <bpf/bpf.h> #include <sched.h> #include <scx/common.h> -#include <sched.h> #include <sys/wait.h> #include <unistd.h> diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 23a61e5b99d0..998e5a2f4579 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -186,6 +186,204 @@ ] }, { + "id": "34c0", + "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "hhf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "fd68", + "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "514e", + "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "6c97", + "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "5d0b", + "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "21c3", + "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { "id": "a4bb", "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", "category": [ diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 2d93ac860bd5..cd9fe69ecce2 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, struct io_uring_sqe *sqe[1]; ublk_io_alloc_sqes(t, sqe, 1); - io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); + io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); @@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (!sqe[0]) return -ENOMEM; - io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, + io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, addr, iod->nr_sectors << 9, iod->start_sector << 9); @@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); - io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0, + io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0, iod->nr_sectors << 9, iod->start_sector << 9); sqe[1]->buf_index = tag; sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1); - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); return 2; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..b71faba86c3b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t) } } -static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) +static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) q->flags = dev->dev_info.flags; q->flags |= extra_flags; + /* Cache fd in queue for fast path access */ + q->ublk_fd = dev->fds[0]; + cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ, @@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) return -ENOMEM; } -static int ublk_thread_init(struct ublk_thread *t) +static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags) { struct ublk_dev *dev = t->dev; + unsigned long long flags = dev->dev_info.flags | extra_flags; int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth; int ret; @@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t) io_uring_register_ring_fd(&t->ring); - ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + /* Register only backing files starting from index 1, exclude ublk control device */ + if (dev->nr_fds > 1) { + ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1); + } else { + /* No backing files to register, skip file registration */ + ret = 0; + } + } else { + ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds); + } if (ret) { ublk_err("ublk dev %d thread %d register files failed %d\n", t->dev->dev_info.dev_id, t->idx, ret); @@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) /* These fields should be written once, never change */ ublk_set_sqe_cmd_op(sqe[0], cmd_op); - sqe[0]->fd = 0; /* dev->fds[0] */ + sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */ sqe[0]->opcode = IORING_OP_URING_CMD; - sqe[0]->flags = IOSQE_FIXED_FILE; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe[0]->flags = 0; /* Use raw FD, not fixed file */ + else + sqe[0]->flags = IOSQE_FIXED_FILE; sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; @@ -832,6 +849,7 @@ struct ublk_thread_info { unsigned idx; sem_t *ready; cpu_set_t *affinity; + unsigned long long extra_flags; }; static void *ublk_io_handler_fn(void *data) @@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data) t->dev = info->dev; t->idx = info->idx; - ret = ublk_thread_init(t); + ret = ublk_thread_init(t, info->extra_flags); if (ret) { ublk_err("ublk dev %d thread %u init failed\n", dev_id, t->idx); @@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->auto_zc_fallback) extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; + if (ctx->no_ublk_fixed_fd) + extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; @@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) tinfo[i].dev = dev; tinfo[i].idx = i; tinfo[i].ready = &ready; + tinfo[i].extra_flags = extra_flags; /* * If threads are not tied 1:1 to queues, setting thread @@ -1400,7 +1421,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1471,13 +1492,13 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); - printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n"); printf("\t[--nthreads threads] [--per_io_tasks]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) @@ -1534,6 +1555,7 @@ int main(int argc, char *argv[]) { "size", 1, NULL, 's'}, { "nthreads", 1, NULL, 0 }, { "per_io_tasks", 0, NULL, 0 }, + { "no_ublk_fixed_fd", 0, NULL, 0 }, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1613,6 +1635,8 @@ int main(int argc, char *argv[]) ctx.nthreads = strtol(optarg, NULL, 10); if (!strcmp(longopts[option_idx].name, "per_io_tasks")) ctx.per_io_tasks = 1; + if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd")) + ctx.no_ublk_fixed_fd = 1; break; case '?': /* diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 219233f8a053..5e55484fb0aa 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -77,6 +77,7 @@ struct dev_ctx { unsigned int recovery:1; unsigned int auto_zc_fallback:1; unsigned int per_io_tasks:1; + unsigned int no_ublk_fixed_fd:1; int _evtfd; int _shmid; @@ -166,7 +167,9 @@ struct ublk_queue { /* borrow one bit of ublk uapi flags, which may never be used */ #define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) +#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62) __u64 flags; + int ublk_fd; /* cached ublk char device fd */ struct ublk_io ios[UBLK_QUEUE_DEPTH]; }; @@ -273,34 +276,48 @@ static inline int ublk_io_alloc_sqes(struct ublk_thread *t, return nr_sqes; } -static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index) +{ + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) { + if (fd_index == 0) + /* Return the raw ublk FD for index 0 */ + return q->ublk_fd; + /* Adjust index for backing files (index 1 becomes 0, etc.) */ + return fd_index - 1; + } + return fd_index; +} + +static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + int dev_fd = ublk_get_registered_fd(q, 0); io_uring_prep_read(sqe, dev_fd, 0, 0, 0); sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; - sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; + if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) + sqe->flags &= ~IOSQE_FIXED_FILE; + else + sqe->flags |= IOSQE_FIXED_FILE; cmd->tag = tag; cmd->addr = index; cmd->q_id = q_id; } -static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, - int dev_fd, int tag, int q_id, __u64 index) +static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) { - struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd; + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); + sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF; +} - io_uring_prep_read(sqe, dev_fd, 0, 0, 0); - sqe->opcode = IORING_OP_URING_CMD; - sqe->flags |= IOSQE_FIXED_FILE; +static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, + struct ublk_queue *q, int tag, int q_id, __u64 index) +{ + __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index); sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF; - - cmd->tag = tag; - cmd->addr = index; - cmd->q_id = q_id; } static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index f0e0003a4860..280043f6b689 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -63,7 +63,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, 3); - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -71,7 +71,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, __setup_nop_io(tag, iod, sqe[1], q->q_id); sqe[1]->flags |= IOSQE_IO_HARDLINK; - io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); + io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1); // buf register is marked as IOSQE_CQE_SKIP_SUCCESS diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 1fb9b7cc281b..791fa8dc1651 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -142,7 +142,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { - io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; sqe[0]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1); @@ -168,7 +168,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, if (zc) { struct io_uring_sqe *unreg = sqe[s->nr + 1]; - io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index); + io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index); unreg->user_data = build_user_data( tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1); } diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 40d1437ca298..3f901db4d09d 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -28,14 +28,14 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 -z & -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & if _have_feature "AUTO_BUF_REG"; then ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & - ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & fi diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..676c5564e33f 100644 --- a/tools/testing/shared/linux/idr.h +++ b/tools/testing/shared/linux/idr.h @@ -1 +1,5 @@ +/* Avoid duplicate definitions due to system headers. */ +#ifdef __CONCAT +#undef __CONCAT +#endif #include "../../../../include/linux/idr.h" diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config index 0fe6b50f029b..6efa13e3ca93 100644 --- a/tools/tracing/latency/Makefile.config +++ b/tools/tracing/latency/Makefile.config @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f2231d8d626..07ff5e8f3006 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -1,10 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-only +include $(srctree)/tools/scripts/utilities.mak + STOP_ERROR := LIBTRACEEVENT_MIN_VERSION = 1.5 LIBTRACEFS_MIN_VERSION = 1.6 +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) |