diff options
Diffstat (limited to 'tools/arch')
19 files changed, 243 insertions, 559 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index b551b741653d2..0000000000000 --- a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include <asm/ptrace.h> - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index f8129c624b070..f7ddd73a8c0fa 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 @@ -372,6 +381,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -411,6 +424,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 @@ -469,6 +484,27 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h index d4e32b3d48437..00b4ba1e5cdf0 100644 --- a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h +++ b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h @@ -2,7 +2,7 @@ #ifndef __ASM_LOONGARCH_BITSPERLONG_H #define __ASM_LOONGARCH_BITSPERLONG_H -#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) +#define __BITS_PER_LONG (__SIZEOF_LONG__ * 8) #include <asm-generic/bitsperlong.h> diff --git a/tools/arch/loongarch/include/uapi/asm/perf_regs.h b/tools/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 0000000000000..29d69c00fc7a6 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 0000000000000..0c743344e92de --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include <asm-generic/unistd.h> diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 0a8e37a519f25..0000000000000 --- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include "ptrace.h" - -typedef user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h deleted file mode 100644 index ad64d673b5e6b..0000000000000 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ /dev/null @@ -1,458 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * S390 version - * Copyright IBM Corp. 1999, 2000 - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - */ - -#ifndef _UAPI_S390_PTRACE_H -#define _UAPI_S390_PTRACE_H - -/* - * Offsets in the user_regs_struct. They are used for the ptrace - * system call and in entry.S - */ -#ifndef __s390x__ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x04 -#define PT_GPR0 0x08 -#define PT_GPR1 0x0C -#define PT_GPR2 0x10 -#define PT_GPR3 0x14 -#define PT_GPR4 0x18 -#define PT_GPR5 0x1C -#define PT_GPR6 0x20 -#define PT_GPR7 0x24 -#define PT_GPR8 0x28 -#define PT_GPR9 0x2C -#define PT_GPR10 0x30 -#define PT_GPR11 0x34 -#define PT_GPR12 0x38 -#define PT_GPR13 0x3C -#define PT_GPR14 0x40 -#define PT_GPR15 0x44 -#define PT_ACR0 0x48 -#define PT_ACR1 0x4C -#define PT_ACR2 0x50 -#define PT_ACR3 0x54 -#define PT_ACR4 0x58 -#define PT_ACR5 0x5C -#define PT_ACR6 0x60 -#define PT_ACR7 0x64 -#define PT_ACR8 0x68 -#define PT_ACR9 0x6C -#define PT_ACR10 0x70 -#define PT_ACR11 0x74 -#define PT_ACR12 0x78 -#define PT_ACR13 0x7C -#define PT_ACR14 0x80 -#define PT_ACR15 0x84 -#define PT_ORIGGPR2 0x88 -#define PT_FPC 0x90 -/* - * A nasty fact of life that the ptrace api - * only supports passing of longs. - */ -#define PT_FPR0_HI 0x98 -#define PT_FPR0_LO 0x9C -#define PT_FPR1_HI 0xA0 -#define PT_FPR1_LO 0xA4 -#define PT_FPR2_HI 0xA8 -#define PT_FPR2_LO 0xAC -#define PT_FPR3_HI 0xB0 -#define PT_FPR3_LO 0xB4 -#define PT_FPR4_HI 0xB8 -#define PT_FPR4_LO 0xBC -#define PT_FPR5_HI 0xC0 -#define PT_FPR5_LO 0xC4 -#define PT_FPR6_HI 0xC8 -#define PT_FPR6_LO 0xCC -#define PT_FPR7_HI 0xD0 -#define PT_FPR7_LO 0xD4 -#define PT_FPR8_HI 0xD8 -#define PT_FPR8_LO 0XDC -#define PT_FPR9_HI 0xE0 -#define PT_FPR9_LO 0xE4 -#define PT_FPR10_HI 0xE8 -#define PT_FPR10_LO 0xEC -#define PT_FPR11_HI 0xF0 -#define PT_FPR11_LO 0xF4 -#define PT_FPR12_HI 0xF8 -#define PT_FPR12_LO 0xFC -#define PT_FPR13_HI 0x100 -#define PT_FPR13_LO 0x104 -#define PT_FPR14_HI 0x108 -#define PT_FPR14_LO 0x10C -#define PT_FPR15_HI 0x110 -#define PT_FPR15_LO 0x114 -#define PT_CR_9 0x118 -#define PT_CR_10 0x11C -#define PT_CR_11 0x120 -#define PT_IEEE_IP 0x13C -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x140-1 - -#define GPR_SIZE 4 -#define CR_SIZE 4 - -#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */ - -#else /* __s390x__ */ - -#define PT_PSWMASK 0x00 -#define PT_PSWADDR 0x08 -#define PT_GPR0 0x10 -#define PT_GPR1 0x18 -#define PT_GPR2 0x20 -#define PT_GPR3 0x28 -#define PT_GPR4 0x30 -#define PT_GPR5 0x38 -#define PT_GPR6 0x40 -#define PT_GPR7 0x48 -#define PT_GPR8 0x50 -#define PT_GPR9 0x58 -#define PT_GPR10 0x60 -#define PT_GPR11 0x68 -#define PT_GPR12 0x70 -#define PT_GPR13 0x78 -#define PT_GPR14 0x80 -#define PT_GPR15 0x88 -#define PT_ACR0 0x90 -#define PT_ACR1 0x94 -#define PT_ACR2 0x98 -#define PT_ACR3 0x9C -#define PT_ACR4 0xA0 -#define PT_ACR5 0xA4 -#define PT_ACR6 0xA8 -#define PT_ACR7 0xAC -#define PT_ACR8 0xB0 -#define PT_ACR9 0xB4 -#define PT_ACR10 0xB8 -#define PT_ACR11 0xBC -#define PT_ACR12 0xC0 -#define PT_ACR13 0xC4 -#define PT_ACR14 0xC8 -#define PT_ACR15 0xCC -#define PT_ORIGGPR2 0xD0 -#define PT_FPC 0xD8 -#define PT_FPR0 0xE0 -#define PT_FPR1 0xE8 -#define PT_FPR2 0xF0 -#define PT_FPR3 0xF8 -#define PT_FPR4 0x100 -#define PT_FPR5 0x108 -#define PT_FPR6 0x110 -#define PT_FPR7 0x118 -#define PT_FPR8 0x120 -#define PT_FPR9 0x128 -#define PT_FPR10 0x130 -#define PT_FPR11 0x138 -#define PT_FPR12 0x140 -#define PT_FPR13 0x148 -#define PT_FPR14 0x150 -#define PT_FPR15 0x158 -#define PT_CR_9 0x160 -#define PT_CR_10 0x168 -#define PT_CR_11 0x170 -#define PT_IEEE_IP 0x1A8 -#define PT_LASTOFF PT_IEEE_IP -#define PT_ENDREGS 0x1B0-1 - -#define GPR_SIZE 8 -#define CR_SIZE 8 - -#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ - -#endif /* __s390x__ */ - -#define NUM_GPRS 16 -#define NUM_FPRS 16 -#define NUM_CRS 16 -#define NUM_ACRS 16 - -#define NUM_CR_WORDS 3 - -#define FPR_SIZE 8 -#define FPC_SIZE 4 -#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */ -#define ACR_SIZE 4 - - -#define PTRACE_OLDSETOPTIONS 21 -#define PTRACE_SYSEMU 31 -#define PTRACE_SYSEMU_SINGLESTEP 32 -#ifndef __ASSEMBLY__ -#include <linux/stddef.h> -#include <linux/types.h> - -typedef union { - float f; - double d; - __u64 ui; - struct - { - __u32 hi; - __u32 lo; - } fp; -} freg_t; - -typedef struct { - __u32 fpc; - __u32 pad; - freg_t fprs[NUM_FPRS]; -} s390_fp_regs; - -#define FPC_EXCEPTION_MASK 0xF8000000 -#define FPC_FLAGS_MASK 0x00F80000 -#define FPC_DXC_MASK 0x0000FF00 -#define FPC_RM_MASK 0x00000003 - -/* this typedef defines how a Program Status Word looks like */ -typedef struct { - unsigned long mask; - unsigned long addr; -} __attribute__ ((aligned(8))) psw_t; - -#ifndef __s390x__ - -#define PSW_MASK_PER 0x40000000UL -#define PSW_MASK_DAT 0x04000000UL -#define PSW_MASK_IO 0x02000000UL -#define PSW_MASK_EXT 0x01000000UL -#define PSW_MASK_KEY 0x00F00000UL -#define PSW_MASK_BASE 0x00080000UL /* always one */ -#define PSW_MASK_MCHECK 0x00040000UL -#define PSW_MASK_WAIT 0x00020000UL -#define PSW_MASK_PSTATE 0x00010000UL -#define PSW_MASK_ASC 0x0000C000UL -#define PSW_MASK_CC 0x00003000UL -#define PSW_MASK_PM 0x00000F00UL -#define PSW_MASK_RI 0x00000000UL -#define PSW_MASK_EA 0x00000000UL -#define PSW_MASK_BA 0x00000000UL - -#define PSW_MASK_USER 0x0000FF00UL - -#define PSW_ADDR_AMODE 0x80000000UL -#define PSW_ADDR_INSN 0x7FFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) - -#define PSW_ASC_PRIMARY 0x00000000UL -#define PSW_ASC_ACCREG 0x00004000UL -#define PSW_ASC_SECONDARY 0x00008000UL -#define PSW_ASC_HOME 0x0000C000UL - -#else /* __s390x__ */ - -#define PSW_MASK_PER 0x4000000000000000UL -#define PSW_MASK_DAT 0x0400000000000000UL -#define PSW_MASK_IO 0x0200000000000000UL -#define PSW_MASK_EXT 0x0100000000000000UL -#define PSW_MASK_BASE 0x0000000000000000UL -#define PSW_MASK_KEY 0x00F0000000000000UL -#define PSW_MASK_MCHECK 0x0004000000000000UL -#define PSW_MASK_WAIT 0x0002000000000000UL -#define PSW_MASK_PSTATE 0x0001000000000000UL -#define PSW_MASK_ASC 0x0000C00000000000UL -#define PSW_MASK_CC 0x0000300000000000UL -#define PSW_MASK_PM 0x00000F0000000000UL -#define PSW_MASK_RI 0x0000008000000000UL -#define PSW_MASK_EA 0x0000000100000000UL -#define PSW_MASK_BA 0x0000000080000000UL - -#define PSW_MASK_USER 0x0000FF0180000000UL - -#define PSW_ADDR_AMODE 0x0000000000000000UL -#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL - -#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) - -#define PSW_ASC_PRIMARY 0x0000000000000000UL -#define PSW_ASC_ACCREG 0x0000400000000000UL -#define PSW_ASC_SECONDARY 0x0000800000000000UL -#define PSW_ASC_HOME 0x0000C00000000000UL - -#endif /* __s390x__ */ - - -/* - * The s390_regs structure is used to define the elf_gregset_t. - */ -typedef struct { - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; -} s390_regs; - -/* - * The user_pt_regs structure exports the beginning of - * the in-kernel pt_regs structure to user space. - */ -typedef struct { - unsigned long args[1]; - psw_t psw; - unsigned long gprs[NUM_GPRS]; -} user_pt_regs; - -/* - * Now for the user space program event recording (trace) definitions. - * The following structures are used only for the ptrace interface, don't - * touch or even look at it if you don't want to modify the user-space - * ptrace interface. In particular stay away from it for in-kernel PER. - */ -typedef struct { - unsigned long cr[NUM_CR_WORDS]; -} per_cr_words; - -#define PER_EM_MASK 0xE8000000UL - -typedef struct { -#ifdef __s390x__ - unsigned : 32; -#endif /* __s390x__ */ - unsigned em_branching : 1; - unsigned em_instruction_fetch : 1; - /* - * Switching on storage alteration automatically fixes - * the storage alteration event bit in the users std. - */ - unsigned em_storage_alteration : 1; - unsigned em_gpr_alt_unused : 1; - unsigned em_store_real_address : 1; - unsigned : 3; - unsigned branch_addr_ctl : 1; - unsigned : 1; - unsigned storage_alt_space_ctl : 1; - unsigned : 21; - unsigned long starting_addr; - unsigned long ending_addr; -} per_cr_bits; - -typedef struct { - unsigned short perc_atmid; - unsigned long address; - unsigned char access_id; -} per_lowcore_words; - -typedef struct { - unsigned perc_branching : 1; - unsigned perc_instruction_fetch : 1; - unsigned perc_storage_alteration : 1; - unsigned perc_gpr_alt_unused : 1; - unsigned perc_store_real_address : 1; - unsigned : 3; - unsigned atmid_psw_bit_31 : 1; - unsigned atmid_validity_bit : 1; - unsigned atmid_psw_bit_32 : 1; - unsigned atmid_psw_bit_5 : 1; - unsigned atmid_psw_bit_16 : 1; - unsigned atmid_psw_bit_17 : 1; - unsigned si : 2; - unsigned long address; - unsigned : 4; - unsigned access_id : 4; -} per_lowcore_bits; - -typedef struct { - union { - per_cr_words words; - per_cr_bits bits; - } control_regs; - /* - * The single_step and instruction_fetch bits are obsolete, - * the kernel always sets them to zero. To enable single - * stepping use ptrace(PTRACE_SINGLESTEP) instead. - */ - unsigned single_step : 1; - unsigned instruction_fetch : 1; - unsigned : 30; - /* - * These addresses are copied into cr10 & cr11 if single - * stepping is switched off - */ - unsigned long starting_addr; - unsigned long ending_addr; - union { - per_lowcore_words words; - per_lowcore_bits bits; - } lowcore; -} per_struct; - -typedef struct { - unsigned int len; - unsigned long kernel_addr; - unsigned long process_addr; -} ptrace_area; - -/* - * S/390 specific non posix ptrace requests. I chose unusual values so - * they are unlikely to clash with future ptrace definitions. - */ -#define PTRACE_PEEKUSR_AREA 0x5000 -#define PTRACE_POKEUSR_AREA 0x5001 -#define PTRACE_PEEKTEXT_AREA 0x5002 -#define PTRACE_PEEKDATA_AREA 0x5003 -#define PTRACE_POKETEXT_AREA 0x5004 -#define PTRACE_POKEDATA_AREA 0x5005 -#define PTRACE_GET_LAST_BREAK 0x5006 -#define PTRACE_PEEK_SYSTEM_CALL 0x5007 -#define PTRACE_POKE_SYSTEM_CALL 0x5008 -#define PTRACE_ENABLE_TE 0x5009 -#define PTRACE_DISABLE_TE 0x5010 -#define PTRACE_TE_ABORT_RAND 0x5011 - -/* - * The numbers chosen here are somewhat arbitrary but absolutely MUST - * not overlap with any of the number assigned in <linux/ptrace.h>. - */ -#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ - -/* - * PT_PROT definition is loosely based on hppa bsd definition in - * gdb/hppab-nat.c - */ -#define PTRACE_PROT 21 - -typedef enum { - ptprot_set_access_watchpoint, - ptprot_set_write_watchpoint, - ptprot_disable_watchpoint -} ptprot_flags; - -typedef struct { - unsigned long lowaddr; - unsigned long hiaddr; - ptprot_flags prot; -} ptprot_area; - -/* Sequence of bytes for breakpoint illegal instruction. */ -#define S390_BREAKPOINT {0x0,0x1} -#define S390_BREAKPOINT_U16 ((__u16)0x0001) -#define S390_SYSCALL_OPCODE ((__u16)0x0a00) -#define S390_SYSCALL_SIZE 2 - -/* - * The user_regs_struct defines the way the user registers are - * store on the stack for signal handling. - */ -struct user_regs_struct { - psw_t psw; - unsigned long gprs[NUM_GPRS]; - unsigned int acrs[NUM_ACRS]; - unsigned long orig_gpr2; - s390_fp_regs fp_regs; - /* - * These per registers are in here so that gdb can modify them - * itself as there is no "official" ptrace interface for hardware - * watchpoints. This is the way intel does it. - */ - per_struct per_info; - unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ -}; - -#endif /* __ASSEMBLY__ */ - -#endif /* _UAPI_S390_PTRACE_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b89005819cd55..cb8ca46213bed 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -97,7 +97,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ +/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ @@ -226,10 +226,9 @@ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ @@ -307,14 +306,21 @@ #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ #define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -331,6 +337,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ #define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ @@ -363,6 +370,7 @@ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ #define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -427,6 +435,13 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ + /* * BUG word(s) */ @@ -467,5 +482,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5dfa4fb76f4b2..fafe9be7a6f4f 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -75,6 +75,12 @@ # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) #endif +#ifdef CONFIG_ADDRESS_MASKING +# define DISABLE_LAM 0 +#else +# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -115,7 +121,7 @@ #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ DISABLE_CALL_DEPTH_TRACKING) -#define DISABLED_MASK12 0 +#define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ad35355ee43ea..3aedae61af4fc 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -206,6 +206,8 @@ /* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ #define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 1343a62106de9..46d7e06763c9f 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -39,6 +39,12 @@ #define ORC_REG_SP_INDIRECT 9 #define ORC_REG_MAX 15 +#define ORC_TYPE_UNDEFINED 0 +#define ORC_TYPE_END_OF_STACK 1 +#define ORC_TYPE_CALL 2 +#define ORC_TYPE_REGS 3 +#define ORC_TYPE_REGS_PARTIAL 4 + #ifndef __ASSEMBLY__ #include <asm/byteorder.h> @@ -56,16 +62,14 @@ struct orc_entry { #if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; - unsigned type:2; + unsigned type:3; unsigned signal:1; - unsigned end:1; #elif defined(__BIG_ENDIAN_BITFIELD) unsigned bp_reg:4; unsigned sp_reg:4; unsigned unused:4; - unsigned end:1; unsigned signal:1; - unsigned type:2; + unsigned type:3; #endif } __packed; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 7f467fe05d42e..1a6a1f9879496 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,4 +559,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ +/* x86-specific KVM_EXIT_HYPERCALL flags. */ +#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 500b96e71f186..e8d7ebbca1a4d 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -16,8 +16,16 @@ #define ARCH_GET_XCOMP_GUEST_PERM 0x1024 #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 +#define ARCH_XCOMP_TILECFG 17 +#define ARCH_XCOMP_TILEDATA 18 + #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_GET_UNTAG_MASK 0x4001 +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 +#define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 + #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 2712d5e03e2eb..bc48a4dabe5db 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_fork +#define __NR_fork 2 +#endif #ifndef __NR_execve #define __NR_execve 11 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index a6f7fe84d4df1..f70d2cada2561 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_fork +#define __NR_fork 57 +#endif #ifndef __NR_execve #define __NR_execve 59 #endif diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index 4f1c4b0c29e98..e0c25b75327ee 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -184,8 +184,8 @@ 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr - 7, 0, EBX, 26, avx512bw, AVX512 Byte & Word instr - 7, 0, EBX, 28, avx512vl, AVX512 Vector Length Extentions (VL) + 7, 0, EBX, 30, avx512bw, AVX512 Byte & Word instr + 7, 0, EBX, 31, avx512vl, AVX512 Vector Length Extentions (VL) 7, 0, ECX, 0, prefetchwt1, X 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions 7, 0, ECX, 2, umip, User-mode Instruction Prevention @@ -340,19 +340,70 @@ # According to SDM # 40000000H - 4FFFFFFFH is invalid range - # Leaf 80000001H # Extended Processor Signature and Feature Bits +0x80000001, 0, EAX, 27:20, extfamily, Extended family +0x80000001, 0, EAX, 19:16, extmodel, Extended model +0x80000001, 0, EAX, 11:8, basefamily, Description of Family +0x80000001, 0, EAX, 11:8, basemodel, Model numbers vary with product +0x80000001, 0, EAX, 3:0, stepping, Processor stepping (revision) for a specific model + +0x80000001, 0, EBX, 31:28, pkgtype, Specifies the package type + 0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode +0x80000001, 0, ECX, 1, cmplegacy, Core multi-processing legacy mode +0x80000001, 0, ECX, 2, svm, Indicates support for: VMRUN, VMLOAD, VMSAVE, CLGI, VMMCALL, and INVLPGA +0x80000001, 0, ECX, 3, extapicspace, Extended APIC register space +0x80000001, 0, ECX, 4, altmovecr8, Indicates support for LOCK MOV CR0 means MOV CR8 0x80000001, 0, ECX, 5, lzcnt, LZCNT +0x80000001, 0, ECX, 6, sse4a, EXTRQ, INSERTQ, MOVNTSS, and MOVNTSD instruction support +0x80000001, 0, ECX, 7, misalignsse, Misaligned SSE Mode 0x80000001, 0, ECX, 8, prefetchw, PREFETCHW - +0x80000001, 0, ECX, 9, osvw, OS Visible Work-around support +0x80000001, 0, ECX, 10, ibs, Instruction Based Sampling +0x80000001, 0, ECX, 11, xop, Extended operation support +0x80000001, 0, ECX, 12, skinit, SKINIT and STGI support +0x80000001, 0, ECX, 13, wdt, Watchdog timer support +0x80000001, 0, ECX, 15, lwp, Lightweight profiling support +0x80000001, 0, ECX, 16, fma4, Four-operand FMA instruction support +0x80000001, 0, ECX, 17, tce, Translation cache extension +0x80000001, 0, ECX, 22, TopologyExtensions, Indicates support for Core::X86::Cpuid::CachePropEax0 and Core::X86::Cpuid::ExtApicId +0x80000001, 0, ECX, 23, perfctrextcore, Indicates support for Core::X86::Msr::PERF_CTL0 - 5 and Core::X86::Msr::PERF_CTR +0x80000001, 0, ECX, 24, perfctrextdf, Indicates support for Core::X86::Msr::DF_PERF_CTL and Core::X86::Msr::DF_PERF_CTR +0x80000001, 0, ECX, 26, databreakpointextension, Indicates data breakpoint support for Core::X86::Msr::DR0_ADDR_MASK, Core::X86::Msr::DR1_ADDR_MASK, Core::X86::Msr::DR2_ADDR_MASK and Core::X86::Msr::DR3_ADDR_MASK +0x80000001, 0, ECX, 27, perftsc, Performance time-stamp counter supported +0x80000001, 0, ECX, 28, perfctrextllc, Indicates support for L3 performance counter extensions +0x80000001, 0, ECX, 29, mwaitextended, MWAITX and MONITORX capability is supported +0x80000001, 0, ECX, 30, admskextn, Indicates support for address mask extension (to 32 bits and to all 4 DRs) for instruction breakpoints + +0x80000001, 0, EDX, 0, fpu, x87 floating point unit on-chip +0x80000001, 0, EDX, 1, vme, Virtual-mode enhancements +0x80000001, 0, EDX, 2, de, Debugging extensions, IO breakpoints, CR4.DE +0x80000001, 0, EDX, 3, pse, Page-size extensions (4 MB pages) +0x80000001, 0, EDX, 4, tsc, Time stamp counter, RDTSC/RDTSCP instructions, CR4.TSD +0x80000001, 0, EDX, 5, msr, Model-specific registers (MSRs), with RDMSR and WRMSR instructions +0x80000001, 0, EDX, 6, pae, Physical-address extensions (PAE) +0x80000001, 0, EDX, 7, mce, Machine Check Exception, CR4.MCE +0x80000001, 0, EDX, 8, cmpxchg8b, CMPXCHG8B instruction +0x80000001, 0, EDX, 9, apic, advanced programmable interrupt controller (APIC) exists and is enabled 0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported +0x80000001, 0, EDX, 12, mtrr, Memory-type range registers +0x80000001, 0, EDX, 13, pge, Page global extension, CR4.PGE +0x80000001, 0, EDX, 14, mca, Machine check architecture, MCG_CAP +0x80000001, 0, EDX, 15, cmov, Conditional move instructions, CMOV, FCOMI, FCMOV +0x80000001, 0, EDX, 16, pat, Page attribute table +0x80000001, 0, EDX, 17, pse36, Page-size extensions 0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available +0x80000001, 0, EDX, 22, mmxext, AMD extensions to MMX instructions +0x80000001, 0, EDX, 23, mmx, MMX instructions +0x80000001, 0, EDX, 24, fxsr, FXSAVE and FXRSTOR instructions +0x80000001, 0, EDX, 25, ffxsr, FXSAVE and FXRSTOR instruction optimizations 0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported 0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available -#0x80000001, 0, EDX, 29, 64b, 64b Architecture supported +0x80000001, 0, EDX, 29, lm, 64b Architecture supported +0x80000001, 0, EDX, 30, threednowext, AMD extensions to 3DNow! instructions +0x80000001, 0, EDX, 31, threednow, 3DNow! instructions # Leaf 80000002H/80000003H/80000004H # Processor Brand String diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index dae75511fef71..416f5b35dd8f4 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -33,7 +33,7 @@ struct reg_desc { struct bits_desc descs[32]; }; -enum { +enum cpuid_reg { R_EAX = 0, R_EBX, R_ECX, @@ -41,6 +41,10 @@ enum { NR_REGS }; +static const char * const reg_names[] = { + "EAX", "EBX", "ECX", "EDX", +}; + struct subleaf { u32 index; u32 sub; @@ -428,12 +432,18 @@ static void parse_text(void) /* Decode every eax/ebx/ecx/edx */ -static void decode_bits(u32 value, struct reg_desc *rdesc) +static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) { struct bits_desc *bdesc; int start, end, i; u32 mask; + if (!rdesc->nr) { + if (show_details) + printf("\t %s: 0x%08x\n", reg_names[reg], value); + return; + } + for (i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; @@ -468,13 +478,21 @@ static void show_leaf(struct subleaf *leaf) if (!leaf) return; - if (show_raw) + if (show_raw) { leaf_print_raw(leaf); + } else { + if (show_details) + printf("CPUID_0x%x_ECX[0x%x]:\n", + leaf->index, leaf->sub); + } + + decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX); + decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX); + decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX); + decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX); - decode_bits(leaf->eax, &leaf->info[R_EAX]); - decode_bits(leaf->ebx, &leaf->info[R_EBX]); - decode_bits(leaf->ecx, &leaf->info[R_ECX]); - decode_bits(leaf->edx, &leaf->info[R_EDX]); + if (!show_raw && show_details) + printf("\n"); } static void show_func(struct cpuid_func *func) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index a91ac666f7582..d055b82d22ccd 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -10,13 +10,6 @@ .section .noinstr.text, "ax" /* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ - -/* * memcpy - Copy a memory block. * * Input: @@ -26,17 +19,21 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_TYPED_FUNC_START(__memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb RET SYM_FUNC_END(__memcpy) @@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START_LOCAL(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - RET -SYM_FUNC_END(memcpy_erms) - SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 6143b1a6fa2ca..7c59a704c4584 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -18,27 +18,22 @@ * rdx count (bytes) * * rax original destination + * + * The FSRS alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep stosb' itself is small enough to replace the call, but all + * the register moves blow up the code. And two of them are "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_FUNC_START(__memset) - /* - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended - * to use it when possible. If not available, use fast string instructions. - * - * Otherwise, use original memset function. - */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 + movb %sil,%al movq %rdx,%rcx - andl $7,%edx - shrq $3,%rcx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - imulq %rsi,%rax - rep stosq - movl %edx,%ecx rep stosb movq %r9,%rax RET @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. - * - * rdi destination - * rsi value (char) - * rdx count (bytes) - * - * rax original destination - */ -SYM_FUNC_START_LOCAL(memset_erms) - movq %rdi,%r9 - movb %sil,%al - movq %rdx,%rcx - rep stosb - movq %r9,%rax - RET -SYM_FUNC_END(memset_erms) - SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 |