diff options
| author | Mark Brown <broonie@kernel.org> | 2020-12-10 13:30:11 +0000 | 
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2020-12-10 13:30:11 +0000 | 
| commit | 49ab19a4a51a31cb06992386cec4be82ebca5a2d (patch) | |
| tree | de7d31ec7ded2c8ab8dbdfe5a55fa283068023d0 /arch | |
| parent | b0dfd948379c79b8754e224e29b99d30ce0d79b8 (diff) | |
| parent | 3b25f337929e73232f0aa990cd68a129f53652e2 (diff) | |
Merge series "spi: spi-geni-qcom: Use gpio descriptors for CS" from Stephen Boyd <swboyd@chromium.org>:
Collected patches from the two series below and associated tags so they
can be merged in one pile through the spi tree. Merry December!
SPI: https://lore.kernel.org/r/20201202214935.1114381-1-swboyd@chromium.org
cros-ec: https://lore.kernel.org/r/20201203011649.1405292-1-swboyd@chromium.org
Cc: Akash Asthana <akashast@codeaurora.org>
Cc: Simon Glass <sjg@chromium.org>
Cc: Gwendal Grignou <gwendal@chromium.org>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Alexandru M Stan <amstan@chromium.org>
Stephen Boyd (3):
  platform/chrome: cros_ec_spi: Don't overwrite spi::mode
  platform/chrome: cros_ec_spi: Drop bits_per_word assignment
  spi: spi-geni-qcom: Use the new method of gpio CS control
 drivers/platform/chrome/cros_ec_spi.c | 2 --
 drivers/spi/spi-geni-qcom.c           | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)
base-commit: b65054597872ce3aefbc6a666385eabdf9e288da
--
https://chromeos.dev
Diffstat (limited to 'arch')
297 files changed, 2187 insertions, 1265 deletions
| diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 7462a7911002..4c7b0414a3ff 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);  void arch_cpu_idle(void)  {  	wtint(0); -	local_irq_enable(); +	raw_local_irq_enable();  }  void arch_cpu_idle_dead(void) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index c6606f4d20d6..fb98440c0bd4 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -243,10 +243,8 @@ static inline int constant_fls(unsigned int x)  		x <<= 2;  		r -= 2;  	} -	if (!(x & 0x80000000u)) { -		x <<= 1; +	if (!(x & 0x80000000u))  		r -= 1; -	}  	return r;  } diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index f1ed17edb085..163641726a2b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -134,8 +134,10 @@  #ifdef CONFIG_ARC_HAS_PAE40  #define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 40  #else  #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 32  #endif  /************************************************************************** diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 17fd1ed700cc..9152782444b5 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -67,7 +67,22 @@  	sr	r5, [ARC_REG_LPB_CTRL]  1:  #endif /* CONFIG_ARC_LPB_DISABLE */ -#endif + +	/* On HSDK, CCMs need to remapped super early */ +#ifdef CONFIG_ARC_SOC_HSDK +	mov	r6, 0x60000000 +	lr	r5, [ARC_REG_ICCM_BUILD] +	breq	r5, 0, 1f +	sr	r6, [ARC_REG_AUX_ICCM] +1: +	lr	r5, [ARC_REG_DCCM_BUILD] +	breq	r5, 0, 2f +	sr	r6, [ARC_REG_AUX_DCCM] +2: +#endif	/* CONFIG_ARC_SOC_HSDK */ + +#endif	/* CONFIG_ISA_ARCV2 */ +  	; Config DSP_CTRL properly, so kernel may use integer multiply,  	; multiply-accumulate, and divide operations  	DSP_EARLY_INIT diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index feba91c9d969..f73da203b170 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -38,15 +38,27 @@  #ifdef CONFIG_ARC_DW2_UNWIND -static void seed_unwind_frame_info(struct task_struct *tsk, -				   struct pt_regs *regs, -				   struct unwind_frame_info *frame_info) +static int +seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, +		       struct unwind_frame_info *frame_info)  { -	/* -	 * synchronous unwinding (e.g. dump_stack) -	 *  - uses current values of SP and friends -	 */ -	if (tsk == NULL && regs == NULL) { +	if (regs) { +		/* +		 * Asynchronous unwinding of intr/exception +		 *  - Just uses the pt_regs passed +		 */ +		frame_info->task = tsk; + +		frame_info->regs.r27 = regs->fp; +		frame_info->regs.r28 = regs->sp; +		frame_info->regs.r31 = regs->blink; +		frame_info->regs.r63 = regs->ret; +		frame_info->call_frame = 0; +	} else if (tsk == NULL || tsk == current) { +		/* +		 * synchronous unwinding (e.g. dump_stack) +		 *  - uses current values of SP and friends +		 */  		unsigned long fp, sp, blink, ret;  		frame_info->task = current; @@ -63,13 +75,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk,  		frame_info->regs.r31 = blink;  		frame_info->regs.r63 = ret;  		frame_info->call_frame = 0; -	} else if (regs == NULL) { +	} else {  		/* -		 * Asynchronous unwinding of sleeping task -		 *  - Gets SP etc from task's pt_regs (saved bottom of kernel -		 *    mode stack of task) +		 * Asynchronous unwinding of a likely sleeping task +		 *  - first ensure it is actually sleeping +		 *  - if so, it will be in __switch_to, kernel mode SP of task +		 *    is safe-kept and BLINK at a well known location in there  		 */ +		if (tsk->state == TASK_RUNNING) +			return -1; +  		frame_info->task = tsk;  		frame_info->regs.r27 = TSK_K_FP(tsk); @@ -90,19 +106,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk,  		frame_info->regs.r28 += 60;  		frame_info->call_frame = 0; -	} else { -		/* -		 * Asynchronous unwinding of intr/exception -		 *  - Just uses the pt_regs passed -		 */ -		frame_info->task = tsk; - -		frame_info->regs.r27 = regs->fp; -		frame_info->regs.r28 = regs->sp; -		frame_info->regs.r31 = regs->blink; -		frame_info->regs.r63 = regs->ret; -		frame_info->call_frame = 0;  	} +	return 0;  }  #endif @@ -112,11 +117,12 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,  		int (*consumer_fn) (unsigned int, void *), void *arg)  {  #ifdef CONFIG_ARC_DW2_UNWIND -	int ret = 0; +	int ret = 0, cnt = 0;  	unsigned int address;  	struct unwind_frame_info frame_info; -	seed_unwind_frame_info(tsk, regs, &frame_info); +	if (seed_unwind_frame_info(tsk, regs, &frame_info)) +		return 0;  	while (1) {  		address = UNW_PC(&frame_info); @@ -132,6 +138,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,  			break;  		frame_info.regs.r63 = frame_info.regs.r31; + +		if (cnt++ > 128) { +			printk("unwinder looping too long, aborting !\n"); +			return 0; +		}  	}  	return address;		/* return the last address it saw */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index c340acd989a0..9bb3c24f3677 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -30,14 +30,14 @@   *  -Changes related to MMU v2 (Rel 4.8)   *   * Vineetg: Aug 29th 2008 - *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to + *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to   *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,   *    it fails. Thus need to load it with ANY valid value before invoking   *    TLBIVUTLB cmd   *   * Vineetg: Aug 21th 2008:   *  -Reduced the duration of IRQ lockouts in TLB Flush routines - *  -Multiple copies of TLB erase code seperated into a "single" function + *  -Multiple copies of TLB erase code separated into a "single" function   *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID   *       in interrupt-safe region.   * @@ -66,7 +66,7 @@   *   * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has   * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. - * Given this, the thrasing problem should never happen because once the 3 + * Given this, the thrashing problem should never happen because once the 3   * J-TLB entries are created (even though 3rd will knock out one of the prev   * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy   * @@ -127,7 +127,7 @@ static void utlb_invalidate(void)  	 * There was however an obscure hardware bug, where uTLB flush would  	 * fail when a prior probe for J-TLB (both totally unrelated) would  	 * return lkup err - because the entry didn't exist in MMU. -	 * The Workround was to set Index reg with some valid value, prior to +	 * The Workaround was to set Index reg with some valid value, prior to  	 * flush. This was fixed in MMU v3  	 */  	unsigned int idx; @@ -272,7 +272,7 @@ noinline void local_flush_tlb_all(void)  }  /* - * Flush the entrie MM for userland. The fastest way is to move to Next ASID + * Flush the entire MM for userland. The fastest way is to move to Next ASID   */  noinline void local_flush_tlb_mm(struct mm_struct *mm)  { @@ -303,7 +303,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)   * Difference between this and Kernel Range Flush is   *  -Here the fastest way (if range is too large) is to move to next ASID   *      without doing any explicit Shootdown - *  -In case of kernel Flush, entry has to be shot down explictly + *  -In case of kernel Flush, entry has to be shot down explicitly   */  void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,  			   unsigned long end) @@ -620,7 +620,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,   * Super Page size is configurable in hardware (4K to 16M), but fixed once   * RTL builds.   * - * The exact THP size a Linx configuration will support is a function of: + * The exact THP size a Linux configuration will support is a function of:   *  - MMU page size (typical 8K, RTL fixed)   *  - software page walker address split between PGD:PTE:PFN (typical   *    11:8:13, but can be changed with 1 line) @@ -698,7 +698,7 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,  #endif -/* Read the Cache Build Confuration Registers, Decode them and save into +/* Read the Cache Build Configuration Registers, Decode them and save into   * the cpuinfo structure for later use.   * No Validation is done here, simply read/convert the BCRs   */ @@ -803,13 +803,13 @@ void arc_mmu_init(void)  	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));  	/* -	 * Can't be done in processor.h due to header include depenedencies +	 * Can't be done in processor.h due to header include dependencies  	 */  	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));  	/*  	 * stack top size sanity check, -	 * Can't be done in processor.h due to header include depenedencies +	 * Can't be done in processor.h due to header include dependencies  	 */  	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); @@ -881,7 +881,7 @@ void arc_mmu_init(void)   *      the duplicate one.   * -Knob to be verbose abt it.(TODO: hook them up to debugfs)   */ -volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be silent abt it or complain (default) */  void do_tlb_overlap_fault(unsigned long cause, unsigned long address,  			  struct pt_regs *regs) @@ -948,7 +948,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,  /***********************************************************************   * Diagnostic Routines - *  -Called from Low Level TLB Hanlders if things don;t look good + *  -Called from Low Level TLB Handlers if things don;t look good   **********************************************************************/  #ifdef CONFIG_ARC_DBG_TLB_PARANOIA diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b63fc095b99..b3ea1fa11f87 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(".data") = 0;  #define ARC_CCM_UNUSED_ADDR	0x60000000 -static void __init hsdk_init_per_cpu(unsigned int cpu) -{ -	/* -	 * By default ICCM is mapped to 0x7z while this area is used for -	 * kernel virtual mappings, so move it to currently unused area. -	 */ -	if (cpuinfo_arc700[cpu].iccm.sz) -		write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); - -	/* -	 * By default DCCM is mapped to 0x8z while this area is used by kernel, -	 * so move it to currently unused area. -	 */ -	if (cpuinfo_arc700[cpu].dccm.sz) -		write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); -}  #define ARC_PERIPHERAL_BASE	0xf0000000  #define CREG_BASE		(ARC_PERIPHERAL_BASE + 0x1000) @@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = {  MACHINE_START(SIMULATION, "hsdk")  	.dt_compat	= hsdk_compat,  	.init_early     = hsdk_init_early, -	.init_per_cpu	= hsdk_init_per_cpu,  MACHINE_END diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..caa27322a0ab 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel)  		@ issued from HYP mode take us to the correct handler code. We  		@ will disable the MMU before jumping to the kernel proper.  		@ + ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE + THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE +		mcr	p15, 4, r1, c1, c0, 0  		adr	r0, __hyp_reentry_vectors  		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)  		isb diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index c220dc3c4e0f..243e35f7a56c 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -521,7 +521,7 @@  			ranges = <0x0 0x100000 0x8000>;  			mac_sw: switch@0 { -				compatible = "ti,am4372-cpsw","ti,cpsw-switch"; +				compatible = "ti,am4372-cpsw-switch", "ti,cpsw-switch";  				reg = <0x0 0x4000>;  				ranges = <0 0 0x4000>;  				clocks = <&cpsw_125mhz_gclk>, <&dpll_clksel_mac_clk>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index b69c7d40f5d8..2f326151116b 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -32,8 +32,8 @@  				interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,  					     <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;  				interrupt-names = "int0", "int1"; -				clocks = <&mcan_clk>, <&l3_iclk_div>; -				clock-names = "cclk", "hclk"; +				clocks = <&l3_iclk_div>, <&mcan_clk>; +				clock-names = "hclk", "cclk";  				bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;  			};  		}; diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index ab291cec650a..2983e91bc7dd 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -122,7 +122,6 @@  };  &clock { -	clocks = <&clock CLK_XUSBXTI>;  	assigned-clocks = <&clock CLK_FOUT_EPLL>;  	assigned-clock-rates = <45158401>;  }; diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index 878e89c20190..4ea5c23f181b 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@  				MX50_PAD_CSPI_MISO__CSPI_MISO		0x00  				MX50_PAD_CSPI_MOSI__CSPI_MOSI		0x00  				MX50_PAD_CSPI_SS0__GPIO4_11		0xc4 -				MX50_PAD_ECSPI1_MOSI__CSPI_SS1		0xf4 +				MX50_PAD_ECSPI1_MOSI__GPIO4_13		0x84  			>;  		}; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index d112b50f8c5d..b4605edfd2ab 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -213,8 +213,8 @@  		#size-cells = <0>;  		/* Microchip KSZ9031RNX PHY */ -		rgmii_phy: ethernet-phy@4 { -			reg = <4>; +		rgmii_phy: ethernet-phy@0 { +			reg = <0>;  			interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>;  			reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;  			reset-assert-us = <10000>; diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 828dd20cd27d..d07d8f83456d 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -98,7 +98,7 @@  &fec {  	pinctrl-names = "default";  	pinctrl-0 = <&pinctrl_enet>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts index f1a41152e9dd..adde62d6fce7 100644 --- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts +++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts @@ -227,12 +227,12 @@  	/delete-property/ #size-cells;  	spi-slave;  	status = "okay"; -	ready-gpio = <&gpio 125 GPIO_ACTIVE_HIGH>; +	ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>;  	slave {  		compatible = "olpc,xo1.75-ec";  		spi-cpha; -		cmd-gpio = <&gpio 155 GPIO_ACTIVE_HIGH>; +		cmd-gpios = <&gpio 155 GPIO_ACTIVE_HIGH>;  	};  }; diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index cc4efd0efabd..4ae630d37d09 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -296,6 +296,7 @@  				interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;  				clocks = <&soc_clocks MMP2_CLK_CCIC0>;  				clock-names = "axi"; +				power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>;  				#clock-cells = <0>;  				clock-output-names = "mclk";  				status = "disabled"; @@ -307,6 +308,7 @@  				interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;  				clocks = <&soc_clocks MMP2_CLK_CCIC1>;  				clock-names = "axi"; +				power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>;  				#clock-cells = <0>;  				clock-output-names = "mclk";  				status = "disabled"; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1.dts b/arch/arm/boot/dts/stm32mp157c-ed1.dts index ca109dc18238..2e77ccec3fc1 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1.dts @@ -89,6 +89,14 @@  		states = <1800000 0x1>,  			 <2900000 0x0>;  	}; + +	vin: vin { +		compatible = "regulator-fixed"; +		regulator-name = "vin"; +		regulator-min-microvolt = <5000000>; +		regulator-max-microvolt = <5000000>; +		regulator-always-on; +	};  };  &adc { @@ -150,11 +158,18 @@  		regulators {  			compatible = "st,stpmic1-regulators"; +			buck1-supply = <&vin>; +			buck2-supply = <&vin>; +			buck3-supply = <&vin>; +			buck4-supply = <&vin>;  			ldo1-supply = <&v3v3>;  			ldo2-supply = <&v3v3>;  			ldo3-supply = <&vdd_ddr>; +			ldo4-supply = <&vin>;  			ldo5-supply = <&v3v3>;  			ldo6-supply = <&v3v3>; +			vref_ddr-supply = <&vin>; +			boost-supply = <&vin>;  			pwr_sw1-supply = <&bst_out>;  			pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 5dff24e39af8..8456f172d4b1 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -46,6 +46,16 @@  			linux,code = <KEY_A>;  			gpios = <&gpiof 3 GPIO_ACTIVE_LOW>;  		}; + +		/* +		 * The EXTi IRQ line 0 is shared with PMIC, +		 * so mark this as polled GPIO key. +		 */ +		button-2 { +			label = "TA3-GPIO-C"; +			linux,code = <KEY_C>; +			gpios = <&gpiog 0 GPIO_ACTIVE_LOW>; +		};  	};  	gpio-keys { @@ -59,13 +69,6 @@  			wakeup-source;  		}; -		button-2 { -			label = "TA3-GPIO-C"; -			linux,code = <KEY_C>; -			gpios = <&gpioi 11 GPIO_ACTIVE_LOW>; -			wakeup-source; -		}; -  		button-3 {  			label = "TA4-GPIO-D";  			linux,code = <KEY_D>; @@ -79,7 +82,7 @@  		led-0 {  			label = "green:led5"; -			gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>; +			gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>;  			default-state = "off";  		}; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index b4b52cf634af..f796a6150313 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -68,6 +68,7 @@  		gpio = <&gpiog 3 GPIO_ACTIVE_LOW>;  		regulator-always-on;  		regulator-boot-on; +		vin-supply = <&vdd>;  	};  }; @@ -202,6 +203,7 @@  			vdda: ldo1 {  				regulator-name = "vdda"; +				regulator-always-on;  				regulator-min-microvolt = <2900000>;  				regulator-max-microvolt = <2900000>;  				interrupts = <IT_CURLIM_LDO1 0>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 04fbb324a541..803eb8bc9c85 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -21,6 +21,10 @@  	};  }; +&dts { +	status = "okay"; +}; +  &i2c4 {  	pinctrl-names = "default";  	pinctrl-0 = <&i2c4_pins_a>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index a5307745719a..93398cfae97e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -80,6 +80,14 @@  		dais = <&sai2a_port &sai2b_port &i2s2_port>;  		status = "okay";  	}; + +	vin: vin { +		compatible = "regulator-fixed"; +		regulator-name = "vin"; +		regulator-min-microvolt = <5000000>; +		regulator-max-microvolt = <5000000>; +		regulator-always-on; +	};  };  &adc { @@ -240,9 +248,18 @@  		regulators {  			compatible = "st,stpmic1-regulators"; +			buck1-supply = <&vin>; +			buck2-supply = <&vin>; +			buck3-supply = <&vin>; +			buck4-supply = <&vin>;  			ldo1-supply = <&v3v3>; +			ldo2-supply = <&vin>;  			ldo3-supply = <&vdd_ddr>; +			ldo4-supply = <&vin>; +			ldo5-supply = <&vin>;  			ldo6-supply = <&v3v3>; +			vref_ddr-supply = <&vin>; +			boost-supply = <&vin>;  			pwr_sw1-supply = <&bst_out>;  			pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 0f95a6ef8543..1c5a666c54b5 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@  			trips {  				cpu_alert0: cpu-alert0 {  					/* milliCelsius */ -					temperature = <850000>; +					temperature = <85000>;  					hysteresis = <2000>;  					type = "passive";  				}; diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index 049e6ab3cf56..73de34ae37fd 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -154,7 +154,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index 32d5d45a35c0..8945dbb114a2 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -130,7 +130,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-supply = <®_gmac_3v3>;  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts index 8c8dee6ea461..9109ca0919ad 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts @@ -151,7 +151,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 9d34eabba121..431f70234d36 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_sw>;  	phy-handle = <&rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	allwinner,rx-delay-ps = <700>;  	allwinner,tx-delay-ps = <700>;  	status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index d9be511f054f..d8326a5c681d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -183,7 +183,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_dldo4>;  	phy-handle = <&rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@  	};  }; -&emac { -	/* LEDs changed to active high on the plus */ -	/delete-property/ allwinner,leds-active-low; -}; -  &mmc1 {  	vmmc-supply = <®_vcc3v3>;  	bus-width = <4>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_gmac_3v3>;  	phy-handle = <&ext_rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index 2fc62ef0cb3e..a6a1087a0c9b 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -129,7 +129,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-supply = <®_dc1sw>;  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts index d3b337b043a1..484b93df20cb 100644 --- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts +++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts @@ -129,7 +129,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-supply = <®_cldo1>;  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index bbc6335e5631..5c3580d712e4 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -124,7 +124,7 @@  	pinctrl-names = "default";  	pinctrl-0 = <&gmac_rgmii_pins>;  	phy-handle = <&phy1>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-supply = <®_cldo1>;  	status = "okay";  }; diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi index 39263e74fbb5..8e5cb3b3fd68 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi @@ -126,7 +126,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_gmac_3v3>;  	phy-handle = <&ext_rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index e500911ce0a5..6f1e0f0d4f0a 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -406,6 +406,9 @@  	};  }; +&mdio1 { +	clock-frequency = <5000000>; +};  &iomuxc {  	pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 { diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index aeb1209e0804..bb70acc6b526 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -93,6 +93,7 @@ CONFIG_SPI=y  CONFIG_SPI_IMX=y  CONFIG_SPI_SPIDEV=y  CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_MXC=y  CONFIG_W1=y  CONFIG_W1_MASTER_MXC=y  CONFIG_W1_SLAVE_THERM=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0fa79bd00219..221f5c340c86 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -217,6 +217,7 @@ CONFIG_GPIO_PCA953X=y  CONFIG_GPIO_PCF857X=y  CONFIG_GPIO_STMPE=y  CONFIG_GPIO_74X164=y +CONFIG_GPIO_MXC=y  CONFIG_POWER_RESET=y  CONFIG_POWER_RESET_SYSCON=y  CONFIG_POWER_RESET_SYSCON_POWEROFF=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index 70b709a669d2..e00be9faa23b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -166,6 +166,7 @@ CONFIG_SPI_IMX=y  CONFIG_SPI_ORION=y  CONFIG_GPIO_ASPEED=m  CONFIG_GPIO_ASPEED_SGPIO=y +CONFIG_GPIO_MXC=y  CONFIG_POWER_RESET=y  CONFIG_POWER_RESET_GPIO=y  CONFIG_POWER_RESET_QNAP=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index e731cdf7c88c..a611b0c1e540 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -465,6 +465,7 @@ CONFIG_GPIO_PALMAS=y  CONFIG_GPIO_TPS6586X=y  CONFIG_GPIO_TPS65910=y  CONFIG_GPIO_TWL4030=y +CONFIG_GPIO_MXC=y  CONFIG_POWER_AVS=y  CONFIG_ROCKCHIP_IODOMAIN=y  CONFIG_POWER_RESET_AS3722=y diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c..e26a278d301a 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self,  			     unsigned long val, void *data);  /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[];  #define MAX_OPTIMIZED_LENGTH	4  #define MAX_OPTINSN_SIZE				\ -	((unsigned long)&optprobe_template_end -	\ -	 (unsigned long)&optprobe_template_entry) +	((unsigned long)optprobe_template_end -	\ +	 (unsigned long)optprobe_template_entry)  #define RELATIVEJUMP_SIZE	4  struct arch_optimized_insn { diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3502c2f746ca..baf7d0204eb5 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -75,6 +75,8 @@  #define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))  #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32)) +#define MAX_POSSIBLE_PHYSMEM_BITS	32 +  /*   * PMD_SHIFT determines the size of the area a second-level page table can map   * PGDIR_SHIFT determines what a third-level page table entry can map diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fbb6693c3352..2b85d175e999 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -25,6 +25,8 @@  #define PTE_HWTABLE_OFF		(0)  #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u64)) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +  /*   * PGDIR_SHIFT determines the size a top-level page table entry can map.   */ diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8e6ace03e960..9f199b1e8383 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -71,7 +71,7 @@ void arch_cpu_idle(void)  		arm_pm_idle();  	else  		cpu_do_idle(); -	local_irq_enable(); +	raw_local_irq_enable();  }  void arch_cpu_idle_prepare(void) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 5f4922e858d0..f7f4620d59c3 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -41,6 +41,10 @@ SECTIONS  #ifndef CONFIG_SMP_ON_UP  		*(.alt.smp.init)  #endif +#ifndef CONFIG_ARM_UNWIND +		*(.ARM.exidx) *(.ARM.exidx.*) +		*(.ARM.extab) *(.ARM.extab.*) +#endif  	}  	. = PAGE_OFFSET + TEXT_OFFSET; diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 2d962fe48821..a3a64bf97250 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -35,13 +35,8 @@ ENTRY(ll_get_coherency_base)  	/*  	 * MMU is disabled, use the physical address of the coherency -	 * base address. However, if the coherency fabric isn't mapped -	 * (i.e its virtual address is zero), it means coherency is -	 * not enabled, so we return 0. +	 * base address, (or 0x0 if the coherency fabric is not mapped)  	 */ -	ldr	r1, =coherency_base -	cmp	r1, #0 -	beq	2f  	adr	r1, 3f  	ldr	r3, [r1]  	ldr	r1, [r1, r3] diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 3ee7bdff86b2..3f62a0c9450d 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -7,7 +7,6 @@ config ARCH_OMAP2  	depends on ARCH_MULTI_V6  	select ARCH_OMAP2PLUS  	select CPU_V6 -	select PM_GENERIC_DOMAINS if PM  	select SOC_HAS_OMAP2_SDRC  config ARCH_OMAP3 @@ -106,6 +105,8 @@ config ARCH_OMAP2PLUS  	select OMAP_DM_TIMER  	select OMAP_GPMC  	select PINCTRL +	select PM_GENERIC_DOMAINS if PM +	select PM_GENERIC_DOMAINS_OF if PM  	select RESET_CONTROLLER  	select SOC_BUS  	select TI_SYSC diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index a92d277f81a0..c8d317fafe2e 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -175,8 +175,11 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,  		if (mpuss_can_lose_context) {  			error = cpu_cluster_pm_enter();  			if (error) { -				omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON); -				goto cpu_cluster_pm_out; +				index = 0; +				cx = state_ptr + index; +				pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); +				omap_set_pwrdm_state(mpu_pd, cx->mpu_state); +				mpuss_can_lose_context = 0;  			}  		}  	} @@ -184,7 +187,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,  	omap4_enter_lowpower(dev->cpu, cx->cpu_state);  	cpu_done[dev->cpu] = true; -cpu_cluster_pm_out:  	/* Wakeup CPU1 only if it is not offlined */  	if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5..c23dbf8bebee 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -354,8 +354,8 @@ static void __init free_highpages(void)  	/* set highmem page free */  	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,  				&range_start, &range_end, NULL) { -		unsigned long start = PHYS_PFN(range_start); -		unsigned long end = PHYS_PFN(range_end); +		unsigned long start = PFN_UP(range_start); +		unsigned long end = PFN_DOWN(range_end);  		/* Ignore complete lowmem entries */  		if (end <= max_low) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b359..c78180172120 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm (  			"optprobe_template_end:\n");  #define TMPL_VAL_IDX \ -	((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry)  #define TMPL_CALL_IDX \ -	((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry)  #define TMPL_END_IDX \ -	((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry)  #define TMPL_ADD_SP \ -	((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry)  #define TMPL_SUB_SP \ -	((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry)  #define TMPL_RESTORE_BEGIN \ -	((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry)  #define TMPL_RESTORE_ORIGN_INSN \ -	((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry)  #define TMPL_RESTORE_END \ -	((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) +	((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry)  /*   * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or  	}  	/* Copy arch-dep-instance from template. */ -	memcpy(code, (unsigned long *)&optprobe_template_entry, +	memcpy(code, (unsigned long *)optprobe_template_entry,  			TMPL_END_IDX * sizeof(kprobe_opcode_t));  	/* Adjust buffer according to instruction. */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f858c352f72a..1515f6f153a0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -636,6 +636,26 @@ config ARM64_ERRATUM_1542419  	  If unsure, say Y. +config ARM64_ERRATUM_1508412 +	bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read" +	default y +	help +	  This option adds a workaround for Arm Cortex-A77 erratum 1508412. + +	  Affected Cortex-A77 cores (r0p0, r1p0) could deadlock on a sequence +	  of a store-exclusive or read of PAR_EL1 and a load with device or +	  non-cacheable memory attributes. The workaround depends on a firmware +	  counterpart. + +	  KVM guests must also have the workaround implemented or they can +	  deadlock the system. + +	  Work around the issue by inserting DMB SY barriers around PAR_EL1 +	  register reads and warning KVM users. The DMB barrier is sufficient +	  to prevent a speculative PAR_EL1 read. + +	  If unsure, say Y. +  config CAVIUM_ERRATUM_22375  	bool "Cavium erratum 22375, 24313"  	default y @@ -982,7 +1002,7 @@ config NUMA  config NODES_SHIFT  	int "Maximum NUMA Nodes (as a power of 2)"  	range 1 10 -	default "2" +	default "4"  	depends on NEED_MULTIPLE_NODES  	help  	  Specify the maximum number of NUMA Nodes available on the target diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6f2494dd6d60..5c4ac1c9f4e0 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -54,6 +54,7 @@ config ARCH_BCM_IPROC  config ARCH_BERLIN  	bool "Marvell Berlin SoC Family"  	select DW_APB_ICTL +	select DW_APB_TIMER_OF  	select GPIOLIB  	select PINCTRL  	help diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 3ea5182ca489..e5e840b9fbb4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -105,7 +105,7 @@  &emac {  	pinctrl-names = "default";  	pinctrl-0 = <&rgmii_pins>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-handle = <&ext_rgmii_phy>;  	phy-supply = <®_dc1sw>;  	status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index d894ec5fa8a1..70e31743f0ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -120,7 +120,7 @@  &emac {  	pinctrl-names = "default";  	pinctrl-0 = <&rgmii_pins>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-handle = <&ext_rgmii_phy>;  	phy-supply = <®_gmac_3v3>;  	status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index b26181cf9095..b54099b654c8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -13,7 +13,7 @@  &emac {  	pinctrl-names = "default";  	pinctrl-0 = <&rgmii_pins>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-txid";  	phy-handle = <&ext_rgmii_phy>;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts index 3ab0f0347bc9..0494bfaf2ffa 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts @@ -122,9 +122,6 @@  	status = "okay";  	port { -		#address-cells = <1>; -		#size-cells = <0>; -  		csi_ep: endpoint {  			remote-endpoint = <&ov5640_ep>;  			bus-width = <8>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts index df1b9263ad0e..6e30a564c87f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts @@ -36,7 +36,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_gmac_3v3>;  	phy-handle = <&ext_rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	/delete-property/ allwinner,leds-active-low;  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 7d7aad18f078..8bf2db9dcbda 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -123,7 +123,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_gmac_3v3>;  	phy-handle = <&ext_rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index cb44bfa5981f..33ab44072e6d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -124,7 +124,7 @@  	pinctrl-0 = <&emac_rgmii_pins>;  	phy-supply = <®_gmac_3v3>;  	phy-handle = <&ext_rgmii_phy>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	status = "okay";  }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 3f7ceeb1a767..7c9dbde645b5 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -97,7 +97,7 @@  &emac {  	pinctrl-names = "default";  	pinctrl-0 = <&ext_rgmii_pins>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-handle = <&ext_rgmii_phy>;  	phy-supply = <®_aldo2>;  	status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index af85b2074867..961732c52aa0 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -100,7 +100,7 @@  &emac {  	pinctrl-names = "default";  	pinctrl-0 = <&ext_rgmii_pins>; -	phy-mode = "rgmii"; +	phy-mode = "rgmii-id";  	phy-handle = <&ext_rgmii_phy>;  	phy-supply = <®_gmac_3v3>;  	allwinner,rx-delay-ps = <200>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index feadd21bc0dc..46e558ab7729 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -159,7 +159,7 @@  	flash@0 {  		#address-cells = <1>;  		#size-cells = <1>; -		compatible = "n25q00a"; +		compatible = "micron,mt25qu02g", "jedec,spi-nor";  		reg = <0>;  		spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index c07966740e14..f9b4a39683cf 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -192,7 +192,7 @@  	flash@0 {  		#address-cells = <1>;  		#size-cells = <1>; -		compatible = "n25q00a"; +		compatible = "micron,mt25qu02g", "jedec,spi-nor";  		reg = <0>;  		spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts index cb1360ae1211..7740f97c240f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts @@ -584,3 +584,9 @@  	pinctrl-0 = <&uart_ao_a_pins>;  	pinctrl-names = "default";  }; + +&usb { +	status = "okay"; +	dr_mode = "otg"; +	vbus-supply = <&usb_pwr>; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index b9efc8469265..724ee179b316 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -171,6 +171,46 @@  		#size-cells = <2>;  		ranges; +		usb: usb@ffe09080 { +			compatible = "amlogic,meson-axg-usb-ctrl"; +			reg = <0x0 0xffe09080 0x0 0x20>; +			interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; +			#address-cells = <2>; +			#size-cells = <2>; +			ranges; + +			clocks = <&clkc CLKID_USB>, <&clkc CLKID_USB1_DDR_BRIDGE>; +			clock-names = "usb_ctrl", "ddr"; +			resets = <&reset RESET_USB_OTG>; + +			dr_mode = "otg"; + +			phys = <&usb2_phy1>; +			phy-names = "usb2-phy1"; + +			dwc2: usb@ff400000 { +				compatible = "amlogic,meson-g12a-usb", "snps,dwc2"; +				reg = <0x0 0xff400000 0x0 0x40000>; +				interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>; +				clocks = <&clkc CLKID_USB1>; +				clock-names = "otg"; +				phys = <&usb2_phy1>; +				dr_mode = "peripheral"; +				g-rx-fifo-size = <192>; +				g-np-tx-fifo-size = <128>; +				g-tx-fifo-size = <128 128 16 16 16>; +			}; + +			dwc3: usb@ff500000 { +				compatible = "snps,dwc3"; +				reg = <0x0 0xff500000 0x0 0x100000>; +				interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; +				dr_mode = "host"; +				maximum-speed = "high-speed"; +				snps,dis_u2_susphy_quirk; +			}; +		}; +  		ethmac: ethernet@ff3f0000 {  			compatible = "amlogic,meson-axg-dwmac",  				     "snps,dwmac-3.70a", @@ -187,6 +227,8 @@  				      "timing-adjustment";  			rx-fifo-depth = <4096>;  			tx-fifo-depth = <2048>; +			resets = <&reset RESET_ETHERNET>; +			reset-names = "stmmaceth";  			status = "disabled";  		}; @@ -1734,6 +1776,16 @@  				clock-names = "core", "clkin0", "clkin1";  				resets = <&reset RESET_SD_EMMC_C>;  			}; + +			usb2_phy1: phy@9020 { +				compatible = "amlogic,meson-gxl-usb2-phy"; +				#phy-cells = <0>; +				reg = <0x0 0x9020 0x0 0x20>; +				clocks = <&clkc CLKID_USB>; +				clock-names = "phy"; +				resets = <&reset RESET_USB_OTG>; +				reset-names = "phy"; +			};  		};  		sram: sram@fffc0000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 1e83ec5b8c91..8514fe6a275a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -209,7 +209,7 @@  		};  		ethmac: ethernet@ff3f0000 { -			compatible = "amlogic,meson-axg-dwmac", +			compatible = "amlogic,meson-g12a-dwmac",  				     "snps,dwmac-3.70a",  				     "snps,dwmac";  			reg = <0x0 0xff3f0000 0x0 0x10000>, @@ -224,6 +224,8 @@  				      "timing-adjustment";  			rx-fifo-depth = <4096>;  			tx-fifo-depth = <2048>; +			resets = <&reset RESET_ETHERNET>; +			reset-names = "stmmaceth";  			status = "disabled";  			mdio0: mdio { @@ -282,6 +284,8 @@  				hwrng: rng@218 {  					compatible = "amlogic,meson-rng";  					reg = <0x0 0x218 0x0 0x4>; +					clocks = <&clkc CLKID_RNG0>; +					clock-names = "core";  				};  			}; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts index 5de2815ba99d..ce1198ad34e4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts @@ -19,7 +19,7 @@  	regulator-min-microvolt = <680000>;  	regulator-max-microvolt = <1040000>; -	pwms = <&pwm_AO_cd 1 1500 0>; +	pwms = <&pwm_ab 0 1500 0>;  };  &vddcpu_b { diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 0edd137151f8..726b91d3a905 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -13,6 +13,7 @@  #include <dt-bindings/interrupt-controller/irq.h>  #include <dt-bindings/interrupt-controller/arm-gic.h>  #include <dt-bindings/power/meson-gxbb-power.h> +#include <dt-bindings/reset/amlogic,meson-gxbb-reset.h>  #include <dt-bindings/thermal/thermal.h>  / { @@ -575,6 +576,8 @@  			interrupt-names = "macirq";  			rx-fifo-depth = <4096>;  			tx-fifo-depth = <2048>; +			resets = <&reset RESET_ETHERNET>; +			reset-names = "stmmaceth";  			power-domains = <&pwrc PWRC_GXBB_ETHERNET_MEM_ID>;  			status = "disabled";  		}; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index 55259f973b5a..aef8f2b00778 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -5,20 +5,20 @@  	usb {  		compatible = "simple-bus";  		dma-ranges; -		#address-cells = <1>; -		#size-cells = <1>; -		ranges = <0x0 0x0 0x68500000 0x00400000>; +		#address-cells = <2>; +		#size-cells = <2>; +		ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>;  		usbphy0: usb-phy@0 {  			compatible = "brcm,sr-usb-combo-phy"; -			reg = <0x00000000 0x100>; +			reg = <0x0 0x00000000 0x0 0x100>;  			#phy-cells = <1>;  			status = "disabled";  		};  		xhci0: usb@1000 {  			compatible = "generic-xhci"; -			reg = <0x00001000 0x1000>; +			reg = <0x0 0x00001000 0x0 0x1000>;  			interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>;  			phys = <&usbphy0 1>, <&usbphy0 0>;  			phy-names = "phy0", "phy1"; @@ -28,7 +28,7 @@  		bdc0: usb@2000 {  			compatible = "brcm,bdc-v0.16"; -			reg = <0x00002000 0x1000>; +			reg = <0x0 0x00002000 0x0 0x1000>;  			interrupts = <GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>;  			phys = <&usbphy0 0>, <&usbphy0 1>;  			phy-names = "phy0", "phy1"; @@ -38,21 +38,21 @@  		usbphy1: usb-phy@10000 {  			compatible = "brcm,sr-usb-combo-phy"; -			reg = <0x00010000 0x100>; +			reg = <0x0 0x00010000 0x0 0x100>;  			#phy-cells = <1>;  			status = "disabled";  		};  		usbphy2: usb-phy@20000 {  			compatible = "brcm,sr-usb-hs-phy"; -			reg = <0x00020000 0x100>; +			reg = <0x0 0x00020000 0x0 0x100>;  			#phy-cells = <0>;  			status = "disabled";  		};  		xhci1: usb@11000 {  			compatible = "generic-xhci"; -			reg = <0x00011000 0x1000>; +			reg = <0x0 0x00011000 0x0 0x1000>;  			interrupts = <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>;  			phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>;  			phy-names = "phy0", "phy1", "phy2"; @@ -62,7 +62,7 @@  		bdc1: usb@21000 {  			compatible = "brcm,bdc-v0.16"; -			reg = <0x00021000 0x1000>; +			reg = <0x0 0x00021000 0x0 0x1000>;  			interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;  			phys = <&usbphy2>;  			phy-names = "phy0"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index f46eb47cfa4d..8161dd237971 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -75,6 +75,7 @@  &enetc_port0 {  	phy-handle = <&phy0>;  	phy-connection-type = "sgmii"; +	managed = "in-band-status";  	status = "okay";  	mdio { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 73e4f9466887..7a6fb7e1fb82 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -1012,6 +1012,7 @@  			compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+";  			reg = <0x0 0x1e34040 0x0 0x1c>;  			#fsl,rcpm-wakeup-cells = <7>; +			little-endian;  		};  		ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index ff5805206a28..692d8f4a206d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -805,6 +805,7 @@  			compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+";  			reg = <0x0 0x1e34040 0x0 0x18>;  			#fsl,rcpm-wakeup-cells = <6>; +			little-endian;  		};  		ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index bf72918fe545..e7abb74bd816 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -892,6 +892,7 @@  			compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+";  			reg = <0x0 0x1e34040 0x0 0x18>;  			#fsl,rcpm-wakeup-cells = <6>; +			little-endian;  		};  		ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 6de86a4f0ec4..b88c3c99b007 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -72,6 +72,7 @@  	pmic@4b {  		compatible = "rohm,bd71847";  		reg = <0x4b>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio1>;  		interrupts = <3 IRQ_TYPE_LEVEL_LOW>; @@ -210,6 +211,7 @@  		host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>;  		device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>;  		clocks = <&osc_32k>; +		max-speed = <4000000>;  		clock-names = "extclk";  	};  }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi index f305a530ff6f..521eb3a5a12e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi @@ -121,6 +121,7 @@  	pmic@4b {  		compatible = "rohm,bd71847";  		reg = <0x4b>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio1>;  		interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 4107fe914d08..49082529764f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -135,13 +135,10 @@  	pmic@4b {  		compatible = "rohm,bd71847";  		reg = <0x4b>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio2>; -		/* -		 * The interrupt is not correct. It should be level low, -		 * however with internal pull up this causes IRQ storm. -		 */ -		interrupts = <8 IRQ_TYPE_EDGE_RISING>; +		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;  		rohm,reset-snvs-powered;  		#clock-cells = <0>; @@ -398,7 +395,7 @@  	pinctrl_pmic: pmicirqgrp {  		fsl,pins = < -			MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8	0x41 +			MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8	0x141  		>;  	}; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index b83f400def8b..05ee062548e4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -129,7 +129,7 @@  		opp-1600000000 {  			opp-hz = /bits/ 64 <1600000000>; -			opp-microvolt = <900000>; +			opp-microvolt = <950000>;  			opp-supported-hw = <0xc>, <0x7>;  			clock-latency-ns = <150000>;  			opp-suspend; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 46e76cf32b2f..7dfee715a2c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -53,6 +53,7 @@  	pmic@4b {  		compatible = "rohm,bd71847";  		reg = <0x4b>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio1>;  		interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts index 707d8486b4d8..8311b95dee49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts @@ -18,6 +18,7 @@  	pmic: pmic@25 {  		compatible = "nxp,pca9450b";  		reg = <0x25>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio1>;  		interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index a2d0190921e4..7f356edf9f91 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -116,13 +116,10 @@  	pmic@4b {  		compatible = "rohm,bd71847";  		reg = <0x4b>; +		pinctrl-names = "default";  		pinctrl-0 = <&pinctrl_pmic>;  		interrupt-parent = <&gpio2>; -		/* -		 * The interrupt is not correct. It should be level low, -		 * however with internal pull up this causes IRQ storm. -		 */ -		interrupts = <8 IRQ_TYPE_EDGE_RISING>; +		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;  		rohm,reset-snvs-powered;  		regulators { @@ -388,7 +385,7 @@  	pinctrl_pmic: pmicirqgrp {  		fsl,pins = < -			MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8	0x101 +			MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8	0x141  		>;  	}; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 746faf1cf2fb..16c7202885d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -790,28 +790,6 @@  				#index-cells = <1>;  				reg = <0x32e40200 0x200>;  			}; - -			usbotg2: usb@32e50000 { -				compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb"; -				reg = <0x32e50000 0x200>; -				interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; -				clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>; -				clock-names = "usb1_ctrl_root_clk"; -				assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>, -						  <&clk IMX8MN_CLK_USB_CORE_REF>; -				assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>, -							 <&clk IMX8MN_SYS_PLL1_100M>; -				fsl,usbphy = <&usbphynop2>; -				fsl,usbmisc = <&usbmisc2 0>; -				status = "disabled"; -			}; - -			usbmisc2: usbmisc@32e50200 { -				compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc"; -				#index-cells = <1>; -				reg = <0x32e50200 0x200>; -			}; -  		};  		dma_apbh: dma-controller@33000000 { @@ -876,12 +854,4 @@  		assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>;  		clock-names = "main_clk";  	}; - -	usbphynop2: usbphynop2 { -		compatible = "usb-nop-xceiv"; -		clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; -		assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; -		assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; -		clock-names = "main_clk"; -	};  }; diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi index 8bc6caa9167d..4338db14c5da 100644 --- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi +++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi @@ -19,6 +19,7 @@ fman0: fman@1a00000 {  	clock-names = "fmanclk";  	fsl,qman-channel-range = <0x800 0x10>;  	ptimer-handle = <&ptp_timer0>; +	dma-coherent;  	muram@0 {  		compatible = "fsl,fman-muram"; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts index 96c50d48289d..a7a83f29f00b 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts +++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts @@ -110,7 +110,7 @@  	flash@0 {  		#address-cells = <1>;  		#size-cells = <1>; -		compatible = "mt25qu02g"; +		compatible = "micron,mt25qu02g", "jedec,spi-nor";  		reg = <0>;  		spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts index 03733fd92732..215d2f702623 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts @@ -20,17 +20,23 @@  	compatible = "globalscale,espressobin-v7-emmc", "globalscale,espressobin-v7",  		     "globalscale,espressobin", "marvell,armada3720",  		     "marvell,armada3710"; + +	aliases { +		/* ethernet1 is wan port */ +		ethernet1 = &switch0port3; +		ethernet3 = &switch0port1; +	};  };  &switch0 {  	ports { -		port@1 { +		switch0port1: port@1 {  			reg = <1>;  			label = "lan1";  			phy-handle = <&switch0phy0>;  		}; -		port@3 { +		switch0port3: port@3 {  			reg = <3>;  			label = "wan";  			phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts index 8570c5f47d7d..b6f4af8ebafb 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts @@ -19,17 +19,23 @@  	model = "Globalscale Marvell ESPRESSOBin Board V7";  	compatible = "globalscale,espressobin-v7", "globalscale,espressobin",  		     "marvell,armada3720", "marvell,armada3710"; + +	aliases { +		/* ethernet1 is wan port */ +		ethernet1 = &switch0port3; +		ethernet3 = &switch0port1; +	};  };  &switch0 {  	ports { -		port@1 { +		switch0port1: port@1 {  			reg = <1>;  			label = "lan1";  			phy-handle = <&switch0phy0>;  		}; -		port@3 { +		switch0port3: port@3 {  			reg = <3>;  			label = "wan";  			phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi index b97218c72727..0775c16e0ec8 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi @@ -13,6 +13,10 @@  / {  	aliases {  		ethernet0 = ð0; +		/* for dsa slave device */ +		ethernet1 = &switch0port1; +		ethernet2 = &switch0port2; +		ethernet3 = &switch0port3;  		serial0 = &uart0;  		serial1 = &uart1;  	}; @@ -120,7 +124,7 @@  			#address-cells = <1>;  			#size-cells = <0>; -			port@0 { +			switch0port0: port@0 {  				reg = <0>;  				label = "cpu";  				ethernet = <ð0>; @@ -131,19 +135,19 @@  				};  			}; -			port@1 { +			switch0port1: port@1 {  				reg = <1>;  				label = "wan";  				phy-handle = <&switch0phy0>;  			}; -			port@2 { +			switch0port2: port@2 {  				reg = <2>;  				label = "lan0";  				phy-handle = <&switch0phy1>;  			}; -			port@3 { +			switch0port3: port@3 {  				reg = <3>;  				label = "lan1";  				phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts index 381a84912ba8..c28d51cc5797 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts @@ -10,18 +10,6 @@  	model = "NVIDIA Jetson TX2 Developer Kit";  	compatible = "nvidia,p2771-0000", "nvidia,tegra186"; -	aconnect { -		status = "okay"; - -		dma-controller@2930000 { -			status = "okay"; -		}; - -		interrupt-controller@2a40000 { -			status = "okay"; -		}; -	}; -  	i2c@3160000 {  		power-monitor@42 {  			compatible = "ti,ina3221"; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi index a2893be80507..0dc8304a2edd 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi @@ -54,7 +54,7 @@  			status = "okay";  		}; -		serial@c280000 { +		serial@3100000 {  			status = "okay";  		}; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index e9c90f0f44ff..93438d2b9469 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1161,7 +1161,7 @@  		hsp_aon: hsp@c150000 {  			compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp"; -			reg = <0x0c150000 0xa0000>; +			reg = <0x0c150000 0x90000>;  			interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,  			             <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,  			             <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index e18e1a9a3011..a9caaf7c0d67 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1663,16 +1663,6 @@  		vin-supply = <&vdd_5v0_sys>;  	}; -	vdd_usb_vbus_otg: regulator@11 { -		compatible = "regulator-fixed"; -		regulator-name = "USB_VBUS_EN0"; -		regulator-min-microvolt = <5000000>; -		regulator-max-microvolt = <5000000>; -		gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; -		enable-active-high; -		vin-supply = <&vdd_5v0_sys>; -	}; -  	vdd_hdmi: regulator@10 {  		compatible = "regulator-fixed";  		regulator-name = "VDD_HDMI_5V0"; @@ -1712,4 +1702,14 @@  		enable-active-high;  		vin-supply = <&vdd_3v3_sys>;  	}; + +	vdd_usb_vbus_otg: regulator@14 { +		compatible = "regulator-fixed"; +		regulator-name = "USB_VBUS_EN0"; +		regulator-min-microvolt = <5000000>; +		regulator-max-microvolt = <5000000>; +		gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; +		enable-active-high; +		vin-supply = <&vdd_5v0_sys>; +	};  }; diff --git a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts index f6e6a24829af..b5d9a5526272 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts @@ -8,7 +8,7 @@  	compatible = "nvidia,tegra234-vdk", "nvidia,tegra234";  	aliases { -		sdhci3 = "/cbb@0/sdhci@3460000"; +		mmc3 = "/bus@0/mmc@3460000";  		serial0 = &uarta;  	}; @@ -17,12 +17,12 @@  		stdout-path = "serial0:115200n8";  	}; -	cbb@0 { +	bus@0 {  		serial@3100000 {  			status = "okay";  		}; -		sdhci@3460000 { +		mmc@3460000 {  			status = "okay";  			bus-width = <8>;  			non-removable; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index a94dac76bf3f..59e0cbfa2214 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -179,22 +179,22 @@  	};  	soc: soc { -		#address-cells = <1>; -		#size-cells = <1>; -		ranges = <0 0 0 0xffffffff>; +		#address-cells = <2>; +		#size-cells = <2>; +		ranges = <0 0 0 0 0x0 0xffffffff>;  		dma-ranges;  		compatible = "simple-bus";  		prng: qrng@e1000 {  			compatible = "qcom,prng-ee"; -			reg = <0xe3000 0x1000>; +			reg = <0x0 0xe3000 0x0 0x1000>;  			clocks = <&gcc GCC_PRNG_AHB_CLK>;  			clock-names = "core";  		};  		cryptobam: dma@704000 {  			compatible = "qcom,bam-v1.7.0"; -			reg = <0x00704000 0x20000>; +			reg = <0x0 0x00704000 0x0 0x20000>;  			interrupts = <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>;  			clocks = <&gcc GCC_CRYPTO_AHB_CLK>;  			clock-names = "bam_clk"; @@ -206,7 +206,7 @@  		crypto: crypto@73a000 {  			compatible = "qcom,crypto-v5.1"; -			reg = <0x0073a000 0x6000>; +			reg = <0x0 0x0073a000 0x0 0x6000>;  			clocks = <&gcc GCC_CRYPTO_AHB_CLK>,  				<&gcc GCC_CRYPTO_AXI_CLK>,  				<&gcc GCC_CRYPTO_CLK>; @@ -217,7 +217,7 @@  		tlmm: pinctrl@1000000 {  			compatible = "qcom,ipq6018-pinctrl"; -			reg = <0x01000000 0x300000>; +			reg = <0x0 0x01000000 0x0 0x300000>;  			interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;  			gpio-controller;  			#gpio-cells = <2>; @@ -235,7 +235,7 @@  		gcc: gcc@1800000 {  			compatible = "qcom,gcc-ipq6018"; -			reg = <0x01800000 0x80000>; +			reg = <0x0 0x01800000 0x0 0x80000>;  			clocks = <&xo>, <&sleep_clk>;  			clock-names = "xo", "sleep_clk";  			#clock-cells = <1>; @@ -244,17 +244,17 @@  		tcsr_mutex_regs: syscon@1905000 {  			compatible = "syscon"; -			reg = <0x01905000 0x8000>; +			reg = <0x0 0x01905000 0x0 0x8000>;  		};  		tcsr_q6: syscon@1945000 {  			compatible = "syscon"; -			reg = <0x01945000 0xe000>; +			reg = <0x0 0x01945000 0x0 0xe000>;  		};  		blsp_dma: dma@7884000 {  			compatible = "qcom,bam-v1.7.0"; -			reg = <0x07884000 0x2b000>; +			reg = <0x0 0x07884000 0x0 0x2b000>;  			interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>;  			clocks = <&gcc GCC_BLSP1_AHB_CLK>;  			clock-names = "bam_clk"; @@ -264,7 +264,7 @@  		blsp1_uart3: serial@78b1000 {  			compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; -			reg = <0x078b1000 0x200>; +			reg = <0x0 0x078b1000 0x0 0x200>;  			interrupts = <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;  			clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>,  				<&gcc GCC_BLSP1_AHB_CLK>; @@ -276,7 +276,7 @@  			compatible = "qcom,spi-qup-v2.2.1";  			#address-cells = <1>;  			#size-cells = <0>; -			reg = <0x078b5000 0x600>; +			reg = <0x0 0x078b5000 0x0 0x600>;  			interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;  			spi-max-frequency = <50000000>;  			clocks = <&gcc GCC_BLSP1_QUP1_SPI_APPS_CLK>, @@ -291,7 +291,7 @@  			compatible = "qcom,spi-qup-v2.2.1";  			#address-cells = <1>;  			#size-cells = <0>; -			reg = <0x078b6000 0x600>; +			reg = <0x0 0x078b6000 0x0 0x600>;  			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;  			spi-max-frequency = <50000000>;  			clocks = <&gcc GCC_BLSP1_QUP2_SPI_APPS_CLK>, @@ -306,7 +306,7 @@  			compatible = "qcom,i2c-qup-v2.2.1";  			#address-cells = <1>;  			#size-cells = <0>; -			reg = <0x078b6000 0x600>; +			reg = <0x0 0x078b6000 0x0 0x600>;  			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;  			clocks = <&gcc GCC_BLSP1_AHB_CLK>,  				<&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>; @@ -321,7 +321,7 @@  			compatible = "qcom,i2c-qup-v2.2.1";  			#address-cells = <1>;  			#size-cells = <0>; -			reg = <0x078b7000 0x600>; +			reg = <0x0 0x078b7000 0x0 0x600>;  			interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;  			clocks = <&gcc GCC_BLSP1_AHB_CLK>,  				<&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>; @@ -336,24 +336,24 @@  			compatible = "qcom,msm-qgic2";  			interrupt-controller;  			#interrupt-cells = <0x3>; -			reg =   <0x0b000000 0x1000>,  /*GICD*/ -				<0x0b002000 0x1000>,  /*GICC*/ -				<0x0b001000 0x1000>,  /*GICH*/ -				<0x0b004000 0x1000>;  /*GICV*/ +			reg =   <0x0 0x0b000000 0x0 0x1000>,  /*GICD*/ +				<0x0 0x0b002000 0x0 0x1000>,  /*GICC*/ +				<0x0 0x0b001000 0x0 0x1000>,  /*GICH*/ +				<0x0 0x0b004000 0x0 0x1000>;  /*GICV*/  			interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;  		};  		watchdog@b017000 {  			compatible = "qcom,kpss-wdt";  			interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>; -			reg = <0x0b017000 0x40>; +			reg = <0x0 0x0b017000 0x0 0x40>;  			clocks = <&sleep_clk>;  			timeout-sec = <10>;  		};  		apcs_glb: mailbox@b111000 {  			compatible = "qcom,ipq6018-apcs-apps-global"; -			reg = <0x0b111000 0x1000>; +			reg = <0x0 0x0b111000 0x0 0x1000>;  			#clock-cells = <1>;  			clocks = <&a53pll>, <&xo>;  			clock-names = "pll", "xo"; @@ -362,7 +362,7 @@  		a53pll: clock@b116000 {  			compatible = "qcom,ipq6018-a53pll"; -			reg = <0x0b116000 0x40>; +			reg = <0x0 0x0b116000 0x0 0x40>;  			#clock-cells = <0>;  			clocks = <&xo>;  			clock-names = "xo"; @@ -377,68 +377,68 @@  		};  		timer@b120000 { -			#address-cells = <1>; -			#size-cells = <1>; +			#address-cells = <2>; +			#size-cells = <2>;  			ranges;  			compatible = "arm,armv7-timer-mem"; -			reg = <0x0b120000 0x1000>; +			reg = <0x0 0x0b120000 0x0 0x1000>;  			clock-frequency = <19200000>;  			frame@b120000 {  				frame-number = <0>;  				interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,  					     <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b121000 0x1000>, -				      <0x0b122000 0x1000>; +				reg = <0x0 0x0b121000 0x0 0x1000>, +				      <0x0 0x0b122000 0x0 0x1000>;  			};  			frame@b123000 {  				frame-number = <1>;  				interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0xb123000 0x1000>; +				reg = <0x0 0xb123000 0x0 0x1000>;  				status = "disabled";  			};  			frame@b124000 {  				frame-number = <2>;  				interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b124000 0x1000>; +				reg = <0x0 0x0b124000 0x0 0x1000>;  				status = "disabled";  			};  			frame@b125000 {  				frame-number = <3>;  				interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b125000 0x1000>; +				reg = <0x0 0x0b125000 0x0 0x1000>;  				status = "disabled";  			};  			frame@b126000 {  				frame-number = <4>;  				interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b126000 0x1000>; +				reg = <0x0 0x0b126000 0x0 0x1000>;  				status = "disabled";  			};  			frame@b127000 {  				frame-number = <5>;  				interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b127000 0x1000>; +				reg = <0x0 0x0b127000 0x0 0x1000>;  				status = "disabled";  			};  			frame@b128000 {  				frame-number = <6>;  				interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; -				reg = <0x0b128000 0x1000>; +				reg = <0x0 0x0b128000 0x0 0x1000>;  				status = "disabled";  			};  		};  		q6v5_wcss: remoteproc@cd00000 {  			compatible = "qcom,ipq8074-wcss-pil"; -			reg = <0x0cd00000 0x4040>, -				<0x004ab000 0x20>; +			reg = <0x0 0x0cd00000 0x0 0x4040>, +			      <0x0 0x004ab000 0x0 0x20>;  			reg-names = "qdsp6",  				    "rmb";  			interrupts-extended = <&intc GIC_SPI 325 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 9cbf963aa068..c29643442e91 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -28,6 +28,12 @@  		clock-frequency = <0>;  	}; +	audio_clk_b: audio_clk_b { +		compatible = "fixed-clock"; +		#clock-cells = <0>; +		clock-frequency = <0>; +	}; +  	audio_clk_c: audio_clk_c {  		compatible = "fixed-clock";  		#clock-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts index 35bd6b904b9c..337681038519 100644 --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts @@ -243,7 +243,6 @@  		interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>;  		pinctrl-names = "default";  		pinctrl-0 = <&pmic_int>; -		rockchip,system-power-controller;  		wakeup-source;  		#clock-cells = <1>;  		clock-output-names = "rk808-clkout1", "xin32k"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts index be7a31d81632..2ee07d15a6e3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts @@ -20,7 +20,7 @@  	gmac_clk: gmac-clock {  		compatible = "fixed-clock";  		clock-frequency = <125000000>; -		clock-output-names = "gmac_clk"; +		clock-output-names = "gmac_clkin";  		#clock-cells = <0>;  	}; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index e7a459fa4322..20309076dbac 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -74,14 +74,14 @@  			label = "red:diy";  			gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;  			default-state = "off"; -			linux,default-trigger = "mmc1"; +			linux,default-trigger = "mmc2";  		};  		yellow_led: led-2 {  			label = "yellow:yellow-led";  			gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;  			default-state = "off"; -			linux,default-trigger = "mmc0"; +			linux,default-trigger = "mmc1";  		};  	}; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index ada724b12f01..7a9a7aca86c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -29,6 +29,9 @@  		i2c6 = &i2c6;  		i2c7 = &i2c7;  		i2c8 = &i2c8; +		mmc0 = &sdio0; +		mmc1 = &sdmmc; +		mmc2 = &sdhci;  		serial0 = &uart0;  		serial1 = &uart1;  		serial2 = &uart2; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 17a2df6a263e..5cfe3cf6f2ac 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -500,6 +500,7 @@ CONFIG_GPIO_ALTERA=m  CONFIG_GPIO_DWAPB=y  CONFIG_GPIO_MB86S7X=y  CONFIG_GPIO_MPC8XXX=y +CONFIG_GPIO_MXC=y  CONFIG_GPIO_PL061=y  CONFIG_GPIO_RCAR=y  CONFIG_GPIO_UNIPHIER=y diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161d..ec7720dbe2c8 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@   * #imm16 values used for BRK instruction generation   * 0x004: for installing kprobes   * 0x005: for installing uprobes + * 0x006: for kprobe software single-step   * Allowed values for kgdb are 0x400 - 0x7ff   * 0x100: for triggering a fault on purpose (reserved)   * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@   */  #define KPROBES_BRK_IMM			0x004  #define UPROBES_BRK_IMM			0x005 +#define KPROBES_BRK_SS_IMM		0x006  #define FAULT_BRK_IMM			0x100  #define KGDB_DYN_DBG_BRK_IMM		0x400  #define KGDB_COMPILED_DBG_BRK_IMM	0x401 diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 0ac3e06a2118..63d43b5f82f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -24,6 +24,7 @@  #define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)  #define ICACHE_POLICY_VPIPT	0 +#define ICACHE_POLICY_RESERVED	1  #define ICACHE_POLICY_VIPT	2  #define ICACHE_POLICY_PIPT	3 diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 42868dbd29fd..e7d98997c09c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -65,7 +65,8 @@  #define ARM64_HAS_ARMv8_4_TTL			55  #define ARM64_HAS_TLB_RANGE			56  #define ARM64_MTE				57 +#define ARM64_WORKAROUND_1508412		58 -#define ARM64_NCAPS				58 +#define ARM64_NCAPS				59  #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7e7144af174..da250e4741bd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;  /*   * CPU feature detected at boot time based on feature of one or more CPUs.   * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()!   */  #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE		\  	(ARM64_CPUCAP_SCOPE_LOCAL_CPU		|	\ @@ -375,6 +377,23 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,  	return false;  } +static __always_inline bool is_vhe_hyp_code(void) +{ +	/* Only defined for code run in VHE hyp context */ +	return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ +	/* Only defined for code run in NVHE hyp context */ +	return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + +static __always_inline bool is_hyp_code(void) +{ +	return is_vhe_hyp_code() || is_nvhe_hyp_code(); +} +  extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);  extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];  extern struct static_key_false arm64_const_caps_ready; @@ -428,35 +447,40 @@ static __always_inline bool __cpus_have_const_cap(int num)  }  /* - * Test for a capability, possibly with a runtime check. + * Test for a capability without a runtime check.   * - * Before capabilities are finalized, this behaves as cpus_have_cap(). + * Before capabilities are finalized, this will BUG().   * After capabilities are finalized, this is patched to avoid a runtime check.   *   * @num must be a compile-time constant.   */ -static __always_inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_cap(int num)  {  	if (system_capabilities_finalized())  		return __cpus_have_const_cap(num);  	else -		return cpus_have_cap(num); +		BUG();  }  /* - * Test for a capability without a runtime check. + * Test for a capability, possibly with a runtime check for non-hyp code.   * - * Before capabilities are finalized, this will BUG(). + * For hyp code, this behaves the same as cpus_have_final_cap(). + * + * For non-hyp code: + * Before capabilities are finalized, this behaves as cpus_have_cap().   * After capabilities are finalized, this is patched to avoid a runtime check.   *   * @num must be a compile-time constant.   */ -static __always_inline bool cpus_have_final_cap(int num) +static __always_inline bool cpus_have_const_cap(int num)  { -	if (system_capabilities_finalized()) +	if (is_hyp_code()) +		return cpus_have_final_cap(num); +	else if (system_capabilities_finalized())  		return __cpus_have_const_cap(num);  	else -		BUG(); +		return cpus_have_cap(num);  }  static inline void cpus_set_cap(unsigned int num) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7219cddeba66..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,7 @@  #define ARM_CPU_PART_CORTEX_A55		0xD05  #define ARM_CPU_PART_CORTEX_A76		0xD0B  #define ARM_CPU_PART_NEOVERSE_N1	0xD0C +#define ARM_CPU_PART_CORTEX_A77		0xD0D  #define APM_CPU_PART_POTENZA		0x000 @@ -85,6 +86,8 @@  #define QCOM_CPU_PART_FALKOR_V1		0x800  #define QCOM_CPU_PART_FALKOR		0xC00  #define QCOM_CPU_PART_KRYO		0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD	0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER	0x801  #define QCOM_CPU_PART_KRYO_3XX_SILVER	0x803  #define QCOM_CPU_PART_KRYO_4XX_GOLD	0x804  #define QCOM_CPU_PART_KRYO_4XX_SILVER	0x805 @@ -105,6 +108,7 @@  #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)  #define MIDR_CORTEX_A76	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)  #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)  #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)  #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)  #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -114,6 +118,8 @@  #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)  #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)  #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)  #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)  #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)  #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 0b298f48f5bf..657c921fd784 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@  /* kprobes BRK opcodes with ESR encoding  */  #define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS	(AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))  /* uprobes BRK opcodes with ESR encoding  */  #define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a2..8699ce30f587 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@  #include <linux/percpu.h>  #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE			1 +#define MAX_INSN_SIZE			2  #define flush_insn_slot(p)		do { } while (0)  #define kretprobe_blacklist_size	0 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0aecbab6a7fb..0cd9f0f75c13 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -118,6 +118,8 @@ struct kvm_arch {  	 */  	unsigned long *pmu_filter;  	unsigned int pmuver; + +	u8 pfr0_csv2;  };  struct kvm_vcpu_fault_info { @@ -239,6 +241,7 @@ enum vcpu_sysreg {  #define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)  #define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)  #define cp14_DBGDCCINT	(MDCCINT_EL1 * 2) +#define cp14_DBGVCR	(DBGVCR32_EL2 * 2)  #define NR_COPRO_REGS	(NR_SYS_REGS * 2) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4ff12a7adcfd..5628289b9d5e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];  #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))  #define pte_valid_not_user(pte) \  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_young(pte) \ -	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))  #define pte_valid_user(pte) \  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -124,9 +122,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];   * Could the pte be present in the TLB? We must check mm_tlb_flush_pending   * so that we don't erroneously return false for pages that have been   * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB.   */  #define pte_accessible(mm, pte)	\ -	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) +	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))  /*   * p??_access_permitted() is true for valid user mappings (subject to the @@ -164,13 +165,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)  	return pmd;  } -static inline pte_t pte_wrprotect(pte_t pte) -{ -	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); -	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); -	return pte; -} -  static inline pte_t pte_mkwrite(pte_t pte)  {  	pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -196,6 +190,20 @@ static inline pte_t pte_mkdirty(pte_t pte)  	return pte;  } +static inline pte_t pte_wrprotect(pte_t pte) +{ +	/* +	 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY +	 * clear), set the PTE_DIRTY bit. +	 */ +	if (pte_hw_dirty(pte)) +		pte = pte_mkdirty(pte); + +	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); +	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); +	return pte; +} +  static inline pte_t pte_mkold(pte_t pte)  {  	return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -845,12 +853,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres  	pte = READ_ONCE(*ptep);  	do {  		old_pte = pte; -		/* -		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY -		 * clear), set the PTE_DIRTY bit. -		 */ -		if (pte_hw_dirty(pte)) -			pte = pte_mkdirty(pte);  		pte = pte_wrprotect(pte);  		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),  					       pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 4266262101fe..006946745352 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -7,6 +7,8 @@  #ifndef _ARM_PROBES_H  #define _ARM_PROBES_H +#include <asm/insn.h> +  typedef u32 probe_opcode_t;  typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d52c1b3ce589..e2ef4c2edf06 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -372,6 +372,8 @@  #define SYS_CONTEXTIDR_EL1		sys_reg(3, 0, 13, 0, 1)  #define SYS_TPIDR_EL1			sys_reg(3, 0, 13, 0, 4) +#define SYS_SCXTNUM_EL1			sys_reg(3, 0, 13, 0, 7) +  #define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)  #define SYS_CCSIDR_EL1			sys_reg(3, 1, 0, 0, 0) @@ -404,6 +406,8 @@  #define SYS_TPIDR_EL0			sys_reg(3, 3, 13, 0, 2)  #define SYS_TPIDRRO_EL0			sys_reg(3, 3, 13, 0, 3) +#define SYS_SCXTNUM_EL0			sys_reg(3, 3, 13, 0, 7) +  /* Definitions for system register interface to AMU for ARMv8.4 onwards */  #define SYS_AM_EL0(crm, op2)		sys_reg(3, 3, 13, (crm), (op2))  #define SYS_AMCR_EL0			SYS_AM_EL0(2, 0) @@ -1007,6 +1011,7 @@  #include <linux/build_bug.h>  #include <linux/types.h> +#include <asm/alternative.h>  #define __DEFINE_MRS_MSR_S_REGNUM				\  "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -1095,6 +1100,14 @@  		write_sysreg_s(__scs_new, sysreg);			\  } while (0) +#define read_sysreg_par() ({						\ +	u64 par;							\ +	asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412));	\ +	par = read_sysreg(par_el1);					\ +	asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412));	\ +	par;								\ +}) +  #endif  #endif	/* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 09977acc007d..6069be50baf9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -86,13 +86,12 @@ static inline bool is_kernel_in_hyp_mode(void)  static __always_inline bool has_vhe(void)  {  	/* -	 * The following macros are defined for code specic to VHE/nVHE. -	 * If has_vhe() is inlined into those compilation units, it can -	 * be determined statically. Otherwise fall back to caps. +	 * Code only run in VHE/NVHE hyp context can assume VHE is present or +	 * absent. Otherwise fall back to caps.  	 */ -	if (__is_defined(__KVM_VHE_HYPERVISOR__)) +	if (is_vhe_hyp_code())  		return true; -	else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) +	else if (is_nvhe_hyp_code())  		return false;  	else  		return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 24d75af344b1..cafaf0da05b7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -299,6 +299,8 @@ static const struct midr_range erratum_845719_list[] = {  	MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),  	/* Brahma-B53 r0p[0] */  	MIDR_REV(MIDR_BRAHMA_B53, 0, 0), +	/* Kryo2XX Silver rAp4 */ +	MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4),  	{},  };  #endif @@ -523,6 +525,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {  		.cpu_enable = cpu_enable_trap_ctr_access,  	},  #endif +#ifdef CONFIG_ARM64_ERRATUM_1508412 +	{ +		/* we depend on the firmware portion for correctness */ +		.desc = "ARM erratum 1508412 (kernel portion)", +		.capability = ARM64_WORKAROUND_1508412, +		ERRATA_MIDR_RANGE(MIDR_CORTEX_A77, +				  0, 0, +				  1, 0), +	}, +#endif  	{  	}  }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dcc165b3fc04..6f36c4f62f69 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1337,6 +1337,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,  		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),  		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),  		MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), +		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), +		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),  		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),  		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),  		{ /* sentinel */ } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 6a7bb3729d60..77605aec25fe 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -34,10 +34,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);  static struct cpuinfo_arm64 boot_cpu_data;  static const char *icache_policy_str[] = { -	[0 ... ICACHE_POLICY_PIPT]	= "RESERVED/UNKNOWN", +	[ICACHE_POLICY_VPIPT]		= "VPIPT", +	[ICACHE_POLICY_RESERVED]	= "RESERVED/UNKNOWN",  	[ICACHE_POLICY_VIPT]		= "VIPT",  	[ICACHE_POLICY_PIPT]		= "PIPT", -	[ICACHE_POLICY_VPIPT]		= "VPIPT",  };  unsigned long __icache_flags; @@ -334,10 +334,11 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)  	case ICACHE_POLICY_VPIPT:  		set_bit(ICACHEF_VPIPT, &__icache_flags);  		break; -	default: +	case ICACHE_POLICY_RESERVED:  	case ICACHE_POLICY_VIPT:  		/* Assume aliasing */  		set_bit(ICACHEF_ALIASING, &__icache_flags); +		break;  	}  	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index df67c0f2a077..a71844fb923e 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -147,6 +147,6 @@ efi_debug_entry:  	 * correctly at this alignment, we must ensure that .text is  	 * placed at a 4k boundary in the Image to begin with.  	 */ -	.align 12 +	.balign	SEGMENT_ALIGN  efi_header_end:  	.endm diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f30007dff35f..b295fb912b12 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -365,6 +365,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0  	br	x30  #endif  	.else +	/* Ensure any device/NC reads complete */ +	alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 +  	eret  	.endif  	sb diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 61684a500914..c615b285ff5b 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -87,7 +87,6 @@ KVM_NVHE_ALIAS(__icache_flags);  /* Kernel symbols needed for cpus_have_final/const_caps checks. */  KVM_NVHE_ALIAS(arm64_const_caps_ready);  KVM_NVHE_ALIAS(cpu_hwcap_keys); -KVM_NVHE_ALIAS(cpu_hwcaps);  /* Static keys which are set if a vGIC trap should be handled in hyp. */  KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index af9987c154ca..9ec34690e255 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -43,7 +43,7 @@ static void *image_load(struct kimage *image,  	u64 flags, value;  	bool be_image, be_kernel;  	struct kexec_buf kbuf; -	unsigned long text_offset; +	unsigned long text_offset, kernel_segment_number;  	struct kexec_segment *kernel_segment;  	int ret; @@ -88,11 +88,37 @@ static void *image_load(struct kimage *image,  	/* Adjust kernel segment with TEXT_OFFSET */  	kbuf.memsz += text_offset; -	ret = kexec_add_buffer(&kbuf); -	if (ret) +	kernel_segment_number = image->nr_segments; + +	/* +	 * The location of the kernel segment may make it impossible to satisfy +	 * the other segment requirements, so we try repeatedly to find a +	 * location that will work. +	 */ +	while ((ret = kexec_add_buffer(&kbuf)) == 0) { +		/* Try to load additional data */ +		kernel_segment = &image->segment[kernel_segment_number]; +		ret = load_other_segments(image, kernel_segment->mem, +					  kernel_segment->memsz, initrd, +					  initrd_len, cmdline); +		if (!ret) +			break; + +		/* +		 * We couldn't find space for the other segments; erase the +		 * kernel segment and try the next available hole. +		 */ +		image->nr_segments -= 1; +		kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; +		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; +	} + +	if (ret) { +		pr_err("Could not find any suitable kernel location!");  		return ERR_PTR(ret); +	} -	kernel_segment = &image->segment[image->nr_segments - 1]; +	kernel_segment = &image->segment[kernel_segment_number];  	kernel_segment->mem += text_offset;  	kernel_segment->memsz -= text_offset;  	image->start = kernel_segment->mem; @@ -101,12 +127,7 @@ static void *image_load(struct kimage *image,  				kernel_segment->mem, kbuf.bufsz,  				kernel_segment->memsz); -	/* Load additional data */ -	ret = load_other_segments(image, -				kernel_segment->mem, kernel_segment->memsz, -				initrd, initrd_len, cmdline); - -	return ERR_PTR(ret); +	return NULL;  }  #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 5b0e67b93cdc..03210f644790 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -240,6 +240,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)  	return ret;  } +/* + * Tries to add the initrd and DTB to the image. If it is not possible to find + * valid locations, this function will undo changes to the image and return non + * zero. + */  int load_other_segments(struct kimage *image,  			unsigned long kernel_load_addr,  			unsigned long kernel_size, @@ -248,7 +253,8 @@ int load_other_segments(struct kimage *image,  {  	struct kexec_buf kbuf;  	void *headers, *dtb = NULL; -	unsigned long headers_sz, initrd_load_addr = 0, dtb_len; +	unsigned long headers_sz, initrd_load_addr = 0, dtb_len, +		      orig_segments = image->nr_segments;  	int ret = 0;  	kbuf.image = image; @@ -334,6 +340,7 @@ int load_other_segments(struct kimage *image,  	return 0;  out_err: +	image->nr_segments = orig_segments;  	vfree(dtb);  	return ret;  } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index deba738142ed..f11a1a1f7026 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);  static void __kprobes  post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ -	void *addrs[1]; -	u32 insns[1]; - -	addrs[0] = addr; -	insns[0] = opcode; - -	return aarch64_insn_patch_text(addrs, insns, 1); -} -  static void __kprobes arch_prepare_ss_slot(struct kprobe *p)  { +	kprobe_opcode_t *addr = p->ainsn.api.insn; +	void *addrs[] = {addr, addr + 1}; +	u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; +  	/* prepare insn slot */ -	patch_text(p->ainsn.api.insn, p->opcode); +	aarch64_insn_patch_text(addrs, insns, 2); -	flush_icache_range((uintptr_t) (p->ainsn.api.insn), -			   (uintptr_t) (p->ainsn.api.insn) + -			   MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); +	flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));  	/*  	 * Needs restoring of return address after stepping xol. @@ -128,13 +119,18 @@ void *alloc_insn_page(void)  /* arm kprobe: install breakpoint in text */  void __kprobes arch_arm_kprobe(struct kprobe *p)  { -	patch_text(p->addr, BRK64_OPCODE_KPROBES); +	void *addr = p->addr; +	u32 insn = BRK64_OPCODE_KPROBES; + +	aarch64_insn_patch_text(&addr, &insn, 1);  }  /* disarm kprobe: remove breakpoint from text */  void __kprobes arch_disarm_kprobe(struct kprobe *p)  { -	patch_text(p->addr, p->opcode); +	void *addr = p->addr; + +	aarch64_insn_patch_text(&addr, &p->opcode, 1);  }  void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -163,20 +159,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p)  }  /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and  start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated.   */  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,  						struct pt_regs *regs)  {  	kcb->saved_irqflag = regs->pstate & DAIF_MASK; -	regs->pstate |= PSR_I_BIT; -	/* Unmask PSTATE.D for enabling software step exceptions. */ -	regs->pstate &= ~PSR_D_BIT; +	regs->pstate |= DAIF_MASK;  }  static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -219,10 +210,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,  		slot = (unsigned long)p->ainsn.api.insn;  		set_ss_context(kcb, slot);	/* mark pending ss */ - -		/* IRQs and single stepping do not mix well. */  		kprobes_save_local_irqflag(kcb, regs); -		kernel_enable_single_step(regs);  		instruction_pointer_set(regs, slot);  	} else {  		/* insn simulation */ @@ -273,12 +261,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)  	}  	/* call post handler */  	kcb->kprobe_status = KPROBE_HIT_SSDONE; -	if (cur->post_handler)	{ -		/* post_handler can hit breakpoint and single step -		 * again, so we enable D-flag for recursive exception. -		 */ +	if (cur->post_handler)  		cur->post_handler(cur, regs, 0); -	}  	reset_current_kprobe();  } @@ -302,8 +286,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)  		if (!instruction_pointer(regs))  			BUG(); -		kernel_disable_single_step(); -  		if (kcb->kprobe_status == KPROBE_REENTER)  			restore_previous_kprobe(kcb);  		else @@ -365,10 +347,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)  			 * pre-handler and it returned non-zero, it will  			 * modify the execution path and no need to single  			 * stepping. Let's just reset current kprobe and exit. -			 * -			 * pre_handler can hit a breakpoint and can step thru -			 * before return, keep PSTATE D-flag enabled until -			 * pre_handler return back.  			 */  			if (!p->pre_handler || !p->pre_handler(p, regs)) {  				setup_singlestep(p, regs, kcb, 0); @@ -399,7 +377,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)  }  static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)  {  	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();  	int retval; @@ -409,16 +387,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)  	if (retval == DBG_HOOK_HANDLED) {  		kprobes_restore_local_irqflag(kcb, regs); -		kernel_disable_single_step(); -  		post_kprobe_handler(kcb, regs);  	}  	return retval;  } -static struct step_hook kprobes_step_hook = { -	.fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { +	.imm = KPROBES_BRK_SS_IMM, +	.fn = kprobe_breakpoint_ss_handler,  };  static int __kprobes @@ -486,7 +463,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)  int __init arch_init_kprobes(void)  {  	register_kernel_break_hook(&kprobes_break_hook); -	register_kernel_step_hook(&kprobes_step_hook); +	register_kernel_break_hook(&kprobes_break_ss_hook);  	return 0;  } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..7697a4b48b7c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -126,7 +126,7 @@ void arch_cpu_idle(void)  	 * tricks  	 */  	cpu_do_idle(); -	local_irq_enable(); +	raw_local_irq_enable();  }  #ifdef CONFIG_HOTPLUG_CPU @@ -522,14 +522,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,  	bool prev32, next32;  	u64 val; -	if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && -	      cpus_have_const_cap(ARM64_WORKAROUND_1418040))) +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))  		return;  	prev32 = is_compat_thread(task_thread_info(prev));  	next32 = is_compat_thread(task_thread_info(next)); -	if (prev32 == next32) +	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))  		return;  	val = read_sysreg(cntkctl_el1); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 25f3c80b5ffe..f6e4e3737405 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -118,6 +118,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)  		MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),  		MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),  		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), +		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),  		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),  		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),  		{ /* sentinel */ } @@ -135,8 +136,6 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)  	return SPECTRE_VULNERABLE;  } -#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED	(1) -  static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void)  {  	int ret; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 43ae4e0c968f..62d2bda7adb8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu)  static void cpu_psci_cpu_die(unsigned int cpu)  { -	int ret;  	/*  	 * There are no known implementations of PSCI actually using the  	 * power state field, pass a sensible default for now. @@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu)  	u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN <<  		    PSCI_0_2_POWER_STATE_TYPE_SHIFT; -	ret = psci_ops.cpu_off(state); - -	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +	psci_ops.cpu_off(state);  }  static int cpu_psci_cpu_kill(unsigned int cpu) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 82e75fc2c903..18e9727d3f64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -222,6 +222,7 @@ asmlinkage notrace void secondary_start_kernel(void)  	if (system_uses_irq_prio_masking())  		init_gic_priority_masking(); +	rcu_cpu_starting(cpu);  	preempt_disable();  	trace_hardirqs_off(); @@ -412,6 +413,7 @@ void cpu_die_early(void)  	/* Mark this CPU absent */  	set_cpu_present(cpu, 0); +	rcu_report_dead(cpu);  	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {  		update_cpu_boot_status(CPU_KILL_ME); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 7f96a1a9f68c..79280c53b9a6 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -22,16 +22,21 @@ endif  CC_COMPAT ?= $(CC)  CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) + +ifneq ($(LLVM),) +LD_COMPAT ?= $(LD) +else +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +endif  else  CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld  endif  cc32-option = $(call try-run,\          $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))  cc32-disable-warning = $(call try-run,\  	$(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-ldoption = $(call try-run,\ -        $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))  cc32-as-instr = $(call try-run,\  	printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) @@ -122,14 +127,10 @@ dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)  VDSO_CFLAGS += $(dmbinstr)  VDSO_AFLAGS += $(dmbinstr) -VDSO_LDFLAGS := $(VDSO_CPPFLAGS)  # From arm vDSO Makefile -VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 -VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 -VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft -VDSO_LDFLAGS += -Wl,--hash-style=sysv -VDSO_LDFLAGS += -Wl,--build-id=sha1 -VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) +VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 +VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared --hash-style=sysv --build-id=sha1  # Borrow vdsomunge.c from the arm vDSO @@ -189,8 +190,8 @@ quiet_cmd_vdsold_and_vdso_check = LD32    $@        cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)  quiet_cmd_vdsold = LD32    $@ -      cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ -                   -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ +      cmd_vdsold = $(LD_COMPAT) $(VDSO_LDFLAGS) \ +                   -T $(filter %.lds,$^) $(filter %.o,$^) -o $@  quiet_cmd_vdsocc = CC32    $@        cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<  quiet_cmd_vdsocc_gettimeofday = CC32    $@ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6d78c041fdf6..1bda604f4c70 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -278,7 +278,7 @@ SECTIONS  	 * explicitly check instead of blindly discarding.  	 */  	.plt : { -		*(.plt) *(.plt.*) *(.iplt) *(.igot) +		*(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt)  	}  	ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f56122eedffc..c0ffb019ca8b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -102,6 +102,20 @@ static int kvm_arm_default_max_vcpus(void)  	return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;  } +static void set_default_csv2(struct kvm *kvm) +{ +	/* +	 * The default is to expose CSV2 == 1 if the HW isn't affected. +	 * Although this is a per-CPU feature, we make it global because +	 * asymmetric systems are just a nuisance. +	 * +	 * Userspace can override this as long as it doesn't promise +	 * the impossible. +	 */ +	if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) +		kvm->arch.pfr0_csv2 = 1; +} +  /**   * kvm_arch_init_vm - initializes a VM data structure   * @kvm:	pointer to the KVM struct @@ -127,6 +141,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)  	/* The maximum number of VCPUs is limited by the host's GIC model */  	kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); +	set_default_csv2(kvm); +  	return ret;  out_free_stage2_pgd:  	kvm_free_stage2_pgd(&kvm->arch.mmu); @@ -808,6 +824,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)  		preempt_enable(); +		/* +		 * The ARMv8 architecture doesn't give the hypervisor +		 * a mechanism to prevent a guest from dropping to AArch32 EL0 +		 * if implemented by the CPU. If we spot the guest in such +		 * state and that we decided it wasn't supposed to do so (like +		 * with the asymmetric AArch32 case), return to userspace with +		 * a fatal error. +		 */ +		if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { +			/* +			 * As we have caught the guest red-handed, decide that +			 * it isn't fit for purpose anymore by making the vcpu +			 * invalid. The VMM can try and fix it by issuing  a +			 * KVM_ARM_VCPU_INIT if it really wants to. +			 */ +			vcpu->arch.target = -1; +			ret = ARM_EXCEPTION_IL; +		} +  		ret = handle_exit(vcpu, ret);  	} @@ -1719,7 +1754,8 @@ int kvm_arch_init(void *opaque)  		return -ENODEV;  	} -	if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)) +	if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || +	    cpus_have_final_cap(ARM64_WORKAROUND_1508412))  		kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \  			 "Only trusted guests should be used on this system.\n"); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 313a8fa3c721..1f875a8f20c4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -140,9 +140,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)  	 * We do need to save/restore PAR_EL1 though, as we haven't  	 * saved the guest context yet, and we may return early...  	 */ -	par = read_sysreg(par_el1); +	par = read_sysreg_par();  	if (!__kvm_at("s1e1r", far)) -		tmp = read_sysreg(par_el1); +		tmp = read_sysreg_par();  	else  		tmp = SYS_PAR_EL1_F; /* back to the guest */  	write_sysreg(par, par_el1); @@ -421,7 +421,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)  	if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&  	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&  	    handle_tx2_tvm(vcpu)) -		return true; +		goto guest;  	/*  	 * We trap the first access to the FP/SIMD to save the host context @@ -431,13 +431,13 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)  	 * Similarly for trapped SVE accesses.  	 */  	if (__hyp_handle_fpsimd(vcpu)) -		return true; +		goto guest;  	if (__hyp_handle_ptrauth(vcpu)) -		return true; +		goto guest;  	if (!__populate_fault_info(vcpu)) -		return true; +		goto guest;  	if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {  		bool valid; @@ -452,7 +452,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)  			int ret = __vgic_v2_perform_cpuif_access(vcpu);  			if (ret == 1) -				return true; +				goto guest;  			/* Promote an illegal access to an SError.*/  			if (ret == -1) @@ -468,12 +468,17 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)  		int ret = __vgic_v3_perform_cpuif_access(vcpu);  		if (ret == 1) -			return true; +			goto guest;  	}  exit:  	/* Return to the host kernel and handle the exit */  	return false; + +guest: +	/* Re-enter the guest */ +	asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); +	return true;  }  static inline void __kvm_unexpected_el2_exception(void) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 7a986030145f..cce43bfe158f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -43,7 +43,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)  	ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);  	ctxt_sys_reg(ctxt, AMAIR_EL1)	= read_sysreg_el1(SYS_AMAIR);  	ctxt_sys_reg(ctxt, CNTKCTL_EL1)	= read_sysreg_el1(SYS_CNTKCTL); -	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg(par_el1); +	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();  	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);  	ctxt_sys_reg(ctxt, SP_EL1)	= read_sysreg(sp_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ff9a0f547b9f..ed27f06a31ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -17,8 +17,6 @@ SYM_FUNC_START(__host_exit)  	get_host_ctxt	x0, x1 -	ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) -  	/* Store the host regs x2 and x3 */  	stp	x2, x3,   [x0, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 47224dc62c51..b11a9d7db677 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,16 +57,25 @@ __do_hyp_init:  	cmp	x0, #HVC_STUB_HCALL_NR  	b.lo	__kvm_handle_stub_hvc -	/* Set tpidr_el2 for use by HYP to free a register */ -	msr	tpidr_el2, x2 - -	mov	x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) -	cmp	x0, x2 -	b.eq	1f +	// We only actively check bits [24:31], and everything +	// else has to be zero, which we check at build time. +#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) +#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value +#endif + +	ror	x0, x0, #24 +	eor	x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) +	ror	x0, x0, #4 +	eor	x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) +	cbz	x0, 1f  	mov	x0, #SMCCC_RET_NOT_SUPPORTED  	eret -1:	phys_to_ttbr x0, x1 +1: +	/* Set tpidr_el2 for use by HYP to free a register */ +	msr	tpidr_el2, x2 + +	phys_to_ttbr x0, x1  alternative_if ARM64_HAS_CNP  	orr	x0, x0, #TTBR_CNP_BIT  alternative_else_nop_endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff696..a797abace13f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -13,6 +13,11 @@  SECTIONS {  	HYP_SECTION(.text) +	/* +	 * .hyp..data..percpu needs to be page aligned to maintain the same +	 * alignment for when linking into vmlinux. +	 */ +	. = ALIGN(PAGE_SIZE);  	HYP_SECTION_NAME(.data..percpu) : {  		PERCPU_INPUT(L1_CACHE_BYTES)  	} diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a457a0306e03..8ae8160bc93a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -250,7 +250,7 @@ void __noreturn hyp_panic(void)  {  	u64 spsr = read_sysreg_el2(SYS_SPSR);  	u64 elr = read_sysreg_el2(SYS_ELR); -	u64 par = read_sysreg(par_el1); +	u64 par = read_sysreg_par();  	bool restore_host = true;  	struct kvm_cpu_context *host_ctxt;  	struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 39ca71ab8866..fbde89a2c6e8 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -128,7 +128,6 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)  	struct tlb_inv_context cxt;  	/* Switch to requested VMID */ -	mmu = kern_hyp_va(mmu);  	__tlb_switch_to_guest(mmu, &cxt);  	__tlbi(vmalle1); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0cdf6e461cbd..0271b4a3b9fe 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -635,7 +635,7 @@ static void stage2_flush_dcache(void *addr, u64 size)  static bool stage2_pte_cacheable(kvm_pte_t pte)  { -	u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); +	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;  	return memattr == PAGE_S2_MEMATTR(NORMAL);  } @@ -846,7 +846,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)  	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;  	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; -	pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); +	pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO);  	if (!pgt->pgd)  		return -ENOMEM; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index fe69de16dadc..62546e20b251 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -215,7 +215,7 @@ void __noreturn hyp_panic(void)  {  	u64 spsr = read_sysreg_el2(SYS_SPSR);  	u64 elr = read_sysreg_el2(SYS_ELR); -	u64 par = read_sysreg(par_el1); +	u64 par = read_sysreg_par();  	__hyp_call_panic(spsr, elr, par);  	unreachable(); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 9824025ccc5c..25ea4ecb6449 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -31,7 +31,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)  				val = SMCCC_RET_SUCCESS;  				break;  			case SPECTRE_UNAFFECTED: -				val = SMCCC_RET_NOT_REQUIRED; +				val = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;  				break;  			}  			break; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 19aacc7d64de..1a01da9fdc99 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -787,14 +787,28 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,  		vma_shift = PAGE_SHIFT;  	} -	if (vma_shift == PUD_SHIFT && -	    !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) -	       vma_shift = PMD_SHIFT; - -	if (vma_shift == PMD_SHIFT && -	    !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { -		force_pte = true; +	switch (vma_shift) { +#ifndef __PAGETABLE_PMD_FOLDED +	case PUD_SHIFT: +		if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) +			break; +		fallthrough; +#endif +	case CONT_PMD_SHIFT: +		vma_shift = PMD_SHIFT; +		fallthrough; +	case PMD_SHIFT: +		if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) +			break; +		fallthrough; +	case CONT_PTE_SHIFT:  		vma_shift = PAGE_SHIFT; +		force_pte = true; +		fallthrough; +	case PAGE_SHIFT: +		break; +	default: +		WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);  	}  	vma_pagesize = 1UL << vma_shift; @@ -839,6 +853,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,  	if (kvm_is_device_pfn(pfn)) {  		device = true; +		force_pte = true;  	} else if (logging_active && !write_fault) {  		/*  		 * Only actually map the page as writable if this was a write diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d9117bc56237..c1fac9836af1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -95,7 +95,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)  	case AMAIR_EL1:		*val = read_sysreg_s(SYS_AMAIR_EL12);	break;  	case CNTKCTL_EL1:	*val = read_sysreg_s(SYS_CNTKCTL_EL12);	break;  	case ELR_EL1:		*val = read_sysreg_s(SYS_ELR_EL12);	break; -	case PAR_EL1:		*val = read_sysreg_s(SYS_PAR_EL1);	break; +	case PAR_EL1:		*val = read_sysreg_par();		break;  	case DACR32_EL2:	*val = read_sysreg_s(SYS_DACR32_EL2);	break;  	case IFSR32_EL2:	*val = read_sysreg_s(SYS_IFSR32_EL2);	break;  	case DBGVCR32_EL2:	*val = read_sysreg_s(SYS_DBGVCR32_EL2);	break; @@ -1038,8 +1038,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,  	{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)),					\  	  access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, -			     const struct sys_reg_desc *r) +static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, +			 const struct sys_reg_desc *r)  {  	kvm_inject_undefined(vcpu); @@ -1047,33 +1047,25 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p,  }  /* Macro to expand the AMU counter and type registers*/ -#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu } -#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu } -#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu } -#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu } - -static bool trap_ptrauth(struct kvm_vcpu *vcpu, -			 struct sys_reg_params *p, -			 const struct sys_reg_desc *rd) -{ -	/* -	 * If we land here, that is because we didn't fixup the access on exit -	 * by allowing the PtrAuth sysregs. The only way this happens is when -	 * the guest does not have PtrAuth support enabled. -	 */ -	kvm_inject_undefined(vcpu); - -	return false; -} +#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access } +#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access } +#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), undef_access } +#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), undef_access }  static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,  			const struct sys_reg_desc *rd)  { -	return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; +	return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN;  } +/* + * If we land here on a PtrAuth access, that is because we didn't + * fixup the access on exit by allowing the PtrAuth sysregs. The only + * way this happens is when the guest does not have PtrAuth support + * enabled. + */  #define __PTRAUTH_KEY(k)						\ -	{ SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k,		\ +	{ SYS_DESC(SYS_## k), undef_access, reset_unknown, k,		\  	.visibility = ptrauth_visibility}  #define PTRAUTH_KEY(k)							\ @@ -1128,9 +1120,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,  		if (!vcpu_has_sve(vcpu))  			val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);  		val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); -		if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && -		    arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) -			val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); +		val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); +		val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT);  	} else if (id == SYS_ID_AA64PFR1_EL1) {  		val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);  	} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1153,6 +1144,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,  	return val;  } +static unsigned int id_visibility(const struct kvm_vcpu *vcpu, +				  const struct sys_reg_desc *r) +{ +	u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, +			 (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + +	switch (id) { +	case SYS_ID_AA64ZFR0_EL1: +		if (!vcpu_has_sve(vcpu)) +			return REG_RAZ; +		break; +	} + +	return 0; +} +  /* cpufeature ID register access trap handlers */  static bool __access_id_reg(struct kvm_vcpu *vcpu, @@ -1171,7 +1178,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,  			  struct sys_reg_params *p,  			  const struct sys_reg_desc *r)  { -	return __access_id_reg(vcpu, p, r, false); +	bool raz = sysreg_visible_as_raz(vcpu, r); + +	return __access_id_reg(vcpu, p, r, raz);  }  static bool access_raz_id_reg(struct kvm_vcpu *vcpu, @@ -1192,71 +1201,40 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,  	if (vcpu_has_sve(vcpu))  		return 0; -	return REG_HIDDEN_USER | REG_HIDDEN_GUEST; -} - -/* Visibility overrides for SVE-specific ID registers */ -static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, -				      const struct sys_reg_desc *rd) -{ -	if (vcpu_has_sve(vcpu)) -		return 0; - -	return REG_HIDDEN_USER; +	return REG_HIDDEN;  } -/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ -static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) -{ -	if (!vcpu_has_sve(vcpu)) -		return 0; - -	return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1); -} - -static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, -				   struct sys_reg_params *p, -				   const struct sys_reg_desc *rd) -{ -	if (p->is_write) -		return write_to_read_only(vcpu, p, rd); - -	p->regval = guest_id_aa64zfr0_el1(vcpu); -	return true; -} - -static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, -		const struct sys_reg_desc *rd, -		const struct kvm_one_reg *reg, void __user *uaddr) -{ -	u64 val; - -	if (WARN_ON(!vcpu_has_sve(vcpu))) -		return -ENOENT; - -	val = guest_id_aa64zfr0_el1(vcpu); -	return reg_to_user(uaddr, &val, reg->id); -} - -static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, -		const struct sys_reg_desc *rd, -		const struct kvm_one_reg *reg, void __user *uaddr) +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, +			       const struct sys_reg_desc *rd, +			       const struct kvm_one_reg *reg, void __user *uaddr)  {  	const u64 id = sys_reg_to_index(rd);  	int err;  	u64 val; - -	if (WARN_ON(!vcpu_has_sve(vcpu))) -		return -ENOENT; +	u8 csv2;  	err = reg_from_user(&val, uaddr, id);  	if (err)  		return err; -	/* This is what we mean by invariant: you can't change it. */ -	if (val != guest_id_aa64zfr0_el1(vcpu)) +	/* +	 * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as +	 * it doesn't promise more than what is actually provided (the +	 * guest could otherwise be covered in ectoplasmic residue). +	 */ +	csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT); +	if (csv2 > 1 || +	    (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))  		return -EINVAL; +	/* We can only differ with CSV2, and anything else is an error */ +	val ^= read_id_reg(vcpu, rd, false); +	val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); +	if (val) +		return -EINVAL; + +	vcpu->kvm->arch.pfr0_csv2 = csv2; +  	return 0;  } @@ -1299,13 +1277,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,  static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,  		      const struct kvm_one_reg *reg, void __user *uaddr)  { -	return __get_id_reg(vcpu, rd, uaddr, false); +	bool raz = sysreg_visible_as_raz(vcpu, rd); + +	return __get_id_reg(vcpu, rd, uaddr, raz);  }  static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,  		      const struct kvm_one_reg *reg, void __user *uaddr)  { -	return __set_id_reg(vcpu, rd, uaddr, false); +	bool raz = sysreg_visible_as_raz(vcpu, rd); + +	return __set_id_reg(vcpu, rd, uaddr, raz);  }  static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1384,19 +1366,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,  	return true;  } -static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, -			    const struct sys_reg_desc *r) -{ -	kvm_inject_undefined(vcpu); -	return false; -} -  /* sys_reg_desc initialiser for known cpufeature ID registers */  #define ID_SANITISED(name) {			\  	SYS_DESC(SYS_##name),			\  	.access	= access_id_reg,		\  	.get_user = get_id_reg,			\  	.set_user = set_id_reg,			\ +	.visibility = id_visibility,		\  }  /* @@ -1514,11 +1490,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	/* AArch64 ID registers */  	/* CRm=4 */ -	ID_SANITISED(ID_AA64PFR0_EL1), +	{ SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg, +	  .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, },  	ID_SANITISED(ID_AA64PFR1_EL1),  	ID_UNALLOCATED(4,2),  	ID_UNALLOCATED(4,3), -	{ SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, +	ID_SANITISED(ID_AA64ZFR0_EL1),  	ID_UNALLOCATED(4,5),  	ID_UNALLOCATED(4,6),  	ID_UNALLOCATED(4,7), @@ -1557,8 +1534,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },  	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, -	{ SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, -	{ SYS_DESC(SYS_GCR_EL1), access_mte_regs }, +	{ SYS_DESC(SYS_RGSR_EL1), undef_access }, +	{ SYS_DESC(SYS_GCR_EL1), undef_access },  	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },  	{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1584,8 +1561,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },  	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, -	{ SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, -	{ SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, +	{ SYS_DESC(SYS_TFSR_EL1), undef_access }, +	{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },  	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },  	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, @@ -1621,6 +1598,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },  	{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, +	{ SYS_DESC(SYS_SCXTNUM_EL1), undef_access }, +  	{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},  	{ SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, @@ -1649,14 +1628,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {  	{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },  	{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, -	{ SYS_DESC(SYS_AMCR_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCFGR_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCGCR_EL0), access_amu }, -	{ SYS_DESC(SYS_AMUSERENR_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCNTENCLR0_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCNTENSET0_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCNTENCLR1_EL0), access_amu }, -	{ SYS_DESC(SYS_AMCNTENSET1_EL0), access_amu }, +	{ SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, + +	{ SYS_DESC(SYS_AMCR_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCFGR_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCGCR_EL0), undef_access }, +	{ SYS_DESC(SYS_AMUSERENR_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCNTENCLR0_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCNTENSET0_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCNTENCLR1_EL0), undef_access }, +	{ SYS_DESC(SYS_AMCNTENSET1_EL0), undef_access },  	AMU_AMEVCNTR0_EL0(0),  	AMU_AMEVCNTR0_EL0(1),  	AMU_AMEVCNTR0_EL0(2), @@ -1897,9 +1878,9 @@ static const struct sys_reg_desc cp14_regs[] = {  	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },  	DBG_BCR_BVR_WCR_WVR(1),  	/* DBGDCCINT */ -	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, +	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT },  	/* DBGDSCRext */ -	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, +	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext },  	DBG_BCR_BVR_WCR_WVR(2),  	/* DBGDTR[RT]Xint */  	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1914,7 +1895,7 @@ static const struct sys_reg_desc cp14_regs[] = {  	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },  	DBG_BCR_BVR_WCR_WVR(6),  	/* DBGVCR */ -	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, +	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR },  	DBG_BCR_BVR_WCR_WVR(7),  	DBG_BCR_BVR_WCR_WVR(8),  	DBG_BCR_BVR_WCR_WVR(9), @@ -2185,7 +2166,7 @@ static void perform_access(struct kvm_vcpu *vcpu,  	trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);  	/* Check for regs disabled by runtime config */ -	if (sysreg_hidden_from_guest(vcpu, r)) { +	if (sysreg_hidden(vcpu, r)) {  		kvm_inject_undefined(vcpu);  		return;  	} @@ -2684,7 +2665,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg  		return get_invariant_sys_reg(reg->id, uaddr);  	/* Check for regs disabled by runtime config */ -	if (sysreg_hidden_from_user(vcpu, r)) +	if (sysreg_hidden(vcpu, r))  		return -ENOENT;  	if (r->get_user) @@ -2709,7 +2690,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg  		return set_invariant_sys_reg(reg->id, uaddr);  	/* Check for regs disabled by runtime config */ -	if (sysreg_hidden_from_user(vcpu, r)) +	if (sysreg_hidden(vcpu, r))  		return -ENOENT;  	if (r->set_user) @@ -2780,7 +2761,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,  	if (!(rd->reg || rd->get_user))  		return 0; -	if (sysreg_hidden_from_user(vcpu, rd)) +	if (sysreg_hidden(vcpu, rd))  		return 0;  	if (!copy_reg_to_user(rd, uind)) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..0f95964339b1 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -59,8 +59,8 @@ struct sys_reg_desc {  				   const struct sys_reg_desc *rd);  }; -#define REG_HIDDEN_USER		(1 << 0) /* hidden from userspace ioctls */ -#define REG_HIDDEN_GUEST	(1 << 1) /* hidden from guest */ +#define REG_HIDDEN		(1 << 0) /* hidden from userspace and guest */ +#define REG_RAZ			(1 << 1) /* RAZ from userspace and guest */  static __printf(2, 3)  inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -111,22 +111,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r  	__vcpu_sys_reg(vcpu, r->reg) = r->val;  } -static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu, -					    const struct sys_reg_desc *r) +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, +				 const struct sys_reg_desc *r)  {  	if (likely(!r->visibility))  		return false; -	return r->visibility(vcpu, r) & REG_HIDDEN_GUEST; +	return r->visibility(vcpu, r) & REG_HIDDEN;  } -static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu, -					   const struct sys_reg_desc *r) +static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, +					 const struct sys_reg_desc *r)  {  	if (likely(!r->visibility))  		return false; -	return r->visibility(vcpu, r) & REG_HIDDEN_USER; +	return r->visibility(vcpu, r) & REG_RAZ;  }  static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 52d6f24f65dc..15a6c98ee92f 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -273,6 +273,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,  	return extract_bytes(value, addr & 7, len);  } +static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, +						 gpa_t addr, unsigned int len) +{ +	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); +	int target_vcpu_id = vcpu->vcpu_id; +	u64 value; + +	value = (u64)(mpidr & GENMASK(23, 0)) << 32; +	value |= ((target_vcpu_id & 0xffff) << 8); + +	if (vgic_has_its(vcpu->kvm)) +		value |= GICR_TYPER_PLPIS; + +	/* reporting of the Last bit is not supported for userspace */ +	return extract_bytes(value, addr & 7, len); +} +  static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,  					     gpa_t addr, unsigned int len)  { @@ -593,8 +610,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {  	REGISTER_DESC_WITH_LENGTH(GICR_IIDR,  		vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,  		VGIC_ACCESS_32bit), -	REGISTER_DESC_WITH_LENGTH(GICR_TYPER, -		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, +	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, +		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, +		vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,  		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),  	REGISTER_DESC_WITH_LENGTH(GICR_WAKER,  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4, diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index e0bf83d556f2..dc8d2a216a6e 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -56,9 +56,8 @@  	stp \reg1, \reg2, [\ptr], \val  	.endm -	.weak memcpy  SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_PI(memcpy) +SYM_FUNC_START_WEAK_PI(memcpy)  #include "copy_template.S"  	ret  SYM_FUNC_END_PI(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 02cda2e33bde..1035dce4bdaf 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -45,9 +45,8 @@ C_h	.req	x12  D_l	.req	x13  D_h	.req	x14 -	.weak memmove  SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_PI(memmove) +SYM_FUNC_START_WEAK_PI(memmove)  	cmp	dstin, src  	b.lo	__memcpy  	add	tmp1, src, count diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 77c3c7ba0084..a9c1c9a01ea9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,9 +42,8 @@ dst		.req	x8  tmp3w		.req	w9  tmp3		.req	x9 -	.weak memset  SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_PI(memset) +SYM_FUNC_START_WEAK_PI(memset)  	mov	dst, dstin	/* Preserve return value.  */  	and	A_lw, val, #255  	orr	A_lw, A_lw, A_lw, lsl #8 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 94c99c1c19e3..1ee94002801f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -262,7 +262,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,  	local_irq_save(flags);  	asm volatile("at s1e1r, %0" :: "r" (addr));  	isb(); -	par = read_sysreg(par_el1); +	par = read_sysreg_par();  	local_irq_restore(flags);  	/* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)  	free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);  } +static bool inside_linear_region(u64 start, u64 size) +{ +	/* +	 * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] +	 * accommodating both its ends but excluding PAGE_END. Max physical +	 * range which can be mapped inside this linear mapping range, must +	 * also be derived from its end points. +	 */ +	return start >= __pa(_PAGE_OFFSET(vabits_actual)) && +	       (start + size - 1) <= __pa(PAGE_END - 1); +} +  int arch_add_memory(int nid, u64 start, u64 size,  		    struct mhp_params *params)  {  	int ret, flags = 0; +	if (!inside_linear_region(start, size)) { +		pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); +		return -EINVAL; +	} +  	if (rodata_full || debug_pagealloc_enabled())  		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = perf_reg_abi(current); diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f730869e21ee..69af6bc87e64 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -102,6 +102,6 @@ void arch_cpu_idle(void)  #ifdef CONFIG_CPU_PM_STOP  	asm volatile("stop\n");  #endif -	local_irq_enable(); +	raw_local_irq_enable();  }  #endif diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index aea0a40b77a9..bc1364db58fe 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);   */  void arch_cpu_idle(void)  { -	local_irq_enable(); +	raw_local_irq_enable();  	__asm__("sleep");  } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 5a0a95d93ddb..67767c5ed98c 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -44,7 +44,7 @@ void arch_cpu_idle(void)  {  	__vmwait();  	/*  interrupts wake us up, but irqs are still disabled */ -	local_irq_enable(); +	raw_local_irq_enable();  }  /* diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index 336d0570e1fa..dd8c166ffd7b 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -18,4 +18,10 @@  #endif  #endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +  #endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6b61a703bcf5..c9ff8796b509 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -239,7 +239,7 @@ void arch_cpu_idle(void)  	if (mark_idle)  		(*mark_idle)(1); -	safe_halt(); +	raw_safe_halt();  	if (mark_idle)  		(*mark_idle)(0); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index a9e46e525cd0..f99860771ff4 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)  void arch_cpu_idle(void)  { -       local_irq_enable(); +       raw_local_irq_enable();  } diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index a95a894aceaf..f0c830337104 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,  {  	struct clk_init_data id;  	struct clk_hw *h; +	struct clk *clk;  	h = kzalloc(sizeof(*h), GFP_KERNEL);  	if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,  	id.ops = &alchemy_clkops_cpu;  	h->init = &id; -	return clk_register(NULL, h); +	clk = clk_register(NULL, h); +	if (IS_ERR(clk)) { +		pr_err("failed to register clock\n"); +		kfree(h); +	} + +	return clk;  }  /* AUXPLLs ************************************************************/ diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index a950fc1ddb4d..6c0532d7b211 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp)  #if defined(CONFIG_XPA) +#define MAX_POSSIBLE_PHYSMEM_BITS 40  #define pte_pfn(x)		(((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))  static inline pte_t  pfn_pte(unsigned long pfn, pgprot_t prot) @@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)  #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#define MAX_POSSIBLE_PHYSMEM_BITS 36  #define pte_pfn(x)		((unsigned long)((x).pte_high >> 6))  static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)  #else +#define MAX_POSSIBLE_PHYSMEM_BITS 32  #ifdef CONFIG_CPU_VR41XX  #define pte_pfn(x)		((unsigned long)((x).pte >> (PAGE_SHIFT + 2)))  #define pfn_pte(pfn, prot)	__pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot)) diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5bc3b04693c7..18e69ebf5691 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)  {  	unsigned long cfg = read_c0_conf();  	write_c0_conf(cfg | R30XX_CONF_HALT); -	local_irq_enable(); +	raw_local_irq_enable();  }  static void __cpuidle r39xx_wait(void)  {  	if (!need_resched())  		write_c0_conf(read_c0_conf() | TX39_CONF_HALT); -	local_irq_enable(); +	raw_local_irq_enable();  }  void __cpuidle r4k_wait(void)  { -	local_irq_enable(); +	raw_local_irq_enable();  	__r4k_wait();  } @@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)  		"	.set	arch=r4000	\n"  		"	wait			\n"  		"	.set	pop		\n"); -	local_irq_enable(); +	raw_local_irq_enable();  }  /* @@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)  		"	wait						\n"  		"	mtc0	$1, $12		# stalls until W stage	\n"  		"	.set	pop					\n"); -	local_irq_enable(); +	raw_local_irq_enable();  }  /* @@ -257,7 +257,7 @@ void arch_cpu_idle(void)  	if (cpu_wait)  		cpu_wait();  	else -		local_irq_enable(); +		raw_local_irq_enable();  }  #ifdef CONFIG_CPU_IDLE diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d4253208bde..ca579deef939 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -262,8 +262,8 @@ static void __init bootmem_init(void)  static void __init bootmem_init(void)  {  	phys_addr_t ramstart, ramend; -	phys_addr_t start, end; -	u64 i; +	unsigned long start, end; +	int i;  	ramstart = memblock_start_of_DRAM();  	ramend = memblock_end_of_DRAM(); @@ -300,7 +300,7 @@ static void __init bootmem_init(void)  	min_low_pfn = ARCH_PFN_OFFSET;  	max_pfn = PFN_DOWN(ramend); -	for_each_mem_range(i, &start, &end) { +	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {  		/*  		 * Skip highmem here so we get an accurate max_low_pfn if low  		 * memory stops short of high memory. diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 38e2894d5fa3..1b939abbe4ca 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -438,6 +438,7 @@ int has_transparent_hugepage(void)  	}  	return mask == PM_HUGE_MASK;  } +EXPORT_SYMBOL(has_transparent_hugepage);  #endif /* CONFIG_TRANSPARENT_HUGEPAGE  */ diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 4ffe857e6ada..50b4eb19a6cc 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);  void arch_cpu_idle(void)  { -	local_irq_enable(); +	raw_local_irq_enable();  }  /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 0ff391f00334..3c98728cce24 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -79,7 +79,7 @@ void machine_power_off(void)   */  void arch_cpu_idle(void)  { -	local_irq_enable(); +	raw_local_irq_enable();  	if (mfspr(SPR_UPR) & SPR_UPR_PMP)  		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);  } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index f196d96e2f9f..a92a23d6acd9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)  void __cpuidle arch_cpu_idle(void)  { -	local_irq_enable(); +	raw_local_irq_enable();  	/* nop on real hardware, qemu will idle sleep. */  	asm volatile("or %%r10,%%r10,%%r10\n":::); diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a4d56f0a41d9..16b8336f91dd 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -248,7 +248,6 @@ KBUILD_CFLAGS		+= $(call cc-option,-mno-string)  cpu-as-$(CONFIG_40x)		+= -Wa,-m405  cpu-as-$(CONFIG_44x)		+= -Wa,-m440  cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E200)		+= -Wa,-me200  cpu-as-$(CONFIG_E500)		+= -Wa,-me500  # When using '-many -mpower4' gas will first try and find a matching power4 diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 36443cda8dcf..1376be95e975 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte)   */  #ifdef CONFIG_PTE_64BIT  #define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36  #else  #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32  #endif  /* diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3ee1ec60be84..a39e2d193fdc 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -27,6 +27,7 @@  #endif  .endm +#ifdef CONFIG_PPC_KUAP  .macro kuap_check_amr gpr1, gpr2  #ifdef CONFIG_PPC_KUAP_DEBUG  	BEGIN_MMU_FTR_SECTION_NESTED(67) @@ -38,6 +39,7 @@  	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)  #endif  .endm +#endif  .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr  #ifdef CONFIG_PPC_KUAP @@ -61,6 +63,10 @@  #else /* !__ASSEMBLY__ */ +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); +  #ifdef CONFIG_PPC_KUAP  #include <asm/mmu.h> @@ -103,8 +109,16 @@ static inline void kuap_check_amr(void)  static inline unsigned long get_kuap(void)  { +	/* +	 * We return AMR_KUAP_BLOCKED when we don't support KUAP because +	 * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to +	 * cause restore_user_access to do a flush. +	 * +	 * This has no effect in terms of actually blocking things on hash, +	 * so it doesn't break anything. +	 */  	if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) -		return 0; +		return AMR_KUAP_BLOCKED;  	return mfspr(SPRN_AMR);  } @@ -123,6 +137,29 @@ static inline void set_kuap(unsigned long value)  	isync();  } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ +	return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && +		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), +		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ +	return 0UL; +} + +static inline unsigned long get_kuap(void) +{ +	return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ +  static __always_inline void allow_user_access(void __user *to, const void __user *from,  					      unsigned long size, unsigned long dir)  { @@ -142,6 +179,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,  				       unsigned long size, unsigned long dir)  {  	set_kuap(AMR_KUAP_BLOCKED); +	if (static_branch_unlikely(&uaccess_flush_key)) +		do_uaccess_flush();  }  static inline unsigned long prevent_user_access_return(void) @@ -149,6 +188,8 @@ static inline unsigned long prevent_user_access_return(void)  	unsigned long flags = get_kuap();  	set_kuap(AMR_KUAP_BLOCKED); +	if (static_branch_unlikely(&uaccess_flush_key)) +		do_uaccess_flush();  	return flags;  } @@ -156,30 +197,9 @@ static inline unsigned long prevent_user_access_return(void)  static inline void restore_user_access(unsigned long flags)  {  	set_kuap(flags); +	if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) +		do_uaccess_flush();  } - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ -	return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && -		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), -		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline void kuap_check_amr(void) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ -	return 0; -} -#endif /* CONFIG_PPC_KUAP */ -  #endif /* __ASSEMBLY__ */  #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ebe95aa04d53..1d32b174ab6a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -57,11 +57,18 @@  	nop;								\  	nop +#define ENTRY_FLUSH_SLOT						\ +	ENTRY_FLUSH_FIXUP_SECTION;					\ +	nop;								\ +	nop;								\ +	nop; +  /*   * r10 must be free to use, r13 must be paca   */  #define INTERRUPT_TO_KERNEL						\ -	STF_ENTRY_BARRIER_SLOT +	STF_ENTRY_BARRIER_SLOT;						\ +	ENTRY_FLUSH_SLOT  /*   * Macros for annotating the expected destination of (h)rfid @@ -137,6 +144,9 @@  	RFSCV;								\  	b	rfscv_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void);  #endif /* __ASSEMBLY__ */  #endif	/* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,22 @@ label##3:					       	\  	FTR_ENTRY_OFFSET 955b-956b;			\  	.popsection; +#define UACCESS_FLUSH_FIXUP_SECTION			\ +959:							\ +	.pushsection __uaccess_flush_fixup,"a";		\ +	.align 2;					\ +960:							\ +	FTR_ENTRY_OFFSET 959b-960b;			\ +	.popsection; + +#define ENTRY_FLUSH_FIXUP_SECTION			\ +957:							\ +	.pushsection __entry_flush_fixup,"a";		\ +	.align 2;					\ +958:							\ +	FTR_ENTRY_OFFSET 957b-958b;			\ +	.popsection; +  #define RFI_FLUSH_FIXUP_SECTION				\  951:							\  	.pushsection __rfi_flush_fixup,"a";		\ @@ -237,8 +253,11 @@ label##3:					       	\  #include <linux/types.h>  extern long stf_barrier_fallback; +extern long entry_flush_fallback;  extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;  extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;  extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;  extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;  extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1d0f7d838b2e..0d93331d0fab 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,7 +14,7 @@  #define KUAP_CURRENT_WRITE	8  #define KUAP_CURRENT		(KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64  #include <asm/book3s/64/kup-radix.h>  #endif  #ifdef CONFIG_PPC_8xx @@ -35,6 +35,9 @@  .macro kuap_check	current, gpr  .endm +.macro kuap_check_amr	gpr1, gpr2 +.endm +  #endif  #else /* !__ASSEMBLY__ */ @@ -53,17 +56,28 @@ static inline void setup_kuep(bool disabled) { }  void setup_kuap(bool disabled);  #else  static inline void setup_kuap(bool disabled) { } + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ +	return false; +} + +static inline void kuap_check_amr(void) { } + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC_BOOK3S_64  static inline void allow_user_access(void __user *to, const void __user *from,  				     unsigned long size, unsigned long dir) { }  static inline void prevent_user_access(void __user *to, const void __user *from,  				       unsigned long size, unsigned long dir) { }  static inline unsigned long prevent_user_access_return(void) { return 0UL; }  static inline void restore_user_access(unsigned long flags) { } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ -	return false; -} +#endif /* CONFIG_PPC_BOOK3S_64 */  #endif /* CONFIG_PPC_KUAP */  static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 91c69ff53a8a..6cda76b57c5d 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -46,5 +46,10 @@ u64 memory_hotplug_max(void);  #define __HAVE_ARCH_RESERVED_KERNEL_PAGES  #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern int create_section_mapping(unsigned long start, unsigned long end, +				  int nid, pgprot_t prot); +#endif +  #endif /* __KERNEL__ */  #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 85ed2390fb99..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags)  static inline bool  bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)  { -	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), +	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),  		    "Bug: fault blocked by AP register !");  } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 1d9ac0f9c794..0bd1b144eb76 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -33,19 +33,18 @@   * respectively NA for All or X for Supervisor and no access for User.   * Then we use the APG to say whether accesses are according to Page rules or   * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MI_APG_INIT	0x40000000 - -/* - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MI_APG_KUEP	0x60000000 + * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say + * "all User" rules, that will lead to NA for all. + * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED + * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) + * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) + * 2 => User => 11 (all accesses performed according as user iaw page definition) + * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT + *                    => 10 (all accesses performed according to swaped page definition) for KUEP + * 4-15 => Not Used + */ +#define MI_APG_INIT	0xdc000000 +#define MI_APG_KUEP	0xde000000  /* The effective page number register.  When read, contains the information   * about the last instruction TLB miss.  When MI_RPN is written, bits in @@ -106,25 +105,9 @@  #define MD_Ks		0x80000000	/* Should not be set */  #define MD_Kp		0x40000000	/* Should always be set */ -/* - * All pages' PP data bits are set to either 000 or 011 or 001, which means - * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user and NA for ALL. - * Then we use the APG to say whether accesses are according to Page rules or - * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MD_APG_INIT	0x40000000 - -/* - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MD_APG_KUAP	0x60000000 +/* See explanation above at the definition of MI_APG_INIT */ +#define MD_APG_INIT	0xdc000000 +#define MD_APG_KUAP	0xde000000  /* The effective page number register.  When read, contains the information   * about the last instruction TLB miss.  When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ee2243ba96cf..96522f7f0618 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);   */  #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)  #define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36  #else  #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32  #endif  /* diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 66f403a7da44..1581204467e1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -39,9 +39,9 @@   * into the TLB.   */  #define _PAGE_GUARDED	0x0010	/* Copied to L1 G entry in DTLB */ -#define _PAGE_SPECIAL	0x0020	/* SW entry */ +#define _PAGE_ACCESSED	0x0020	/* Copied to L1 APG 1 entry in I/DTLB */  #define _PAGE_EXEC	0x0040	/* Copied to PP (bit 21) in ITLB */ -#define _PAGE_ACCESSED	0x0080	/* software: page referenced */ +#define _PAGE_SPECIAL	0x0080	/* SW entry */  #define _PAGE_NA	0x0200	/* Supervisor NA, User no access */  #define _PAGE_RO	0x0600	/* Supervisor RO, User no access */ @@ -59,11 +59,12 @@  #define _PMD_PRESENT	0x0001  #define _PMD_PRESENT_MASK	_PMD_PRESENT -#define _PMD_BAD	0x0fd0 +#define _PMD_BAD	0x0f90  #define _PMD_PAGE_MASK	0x000c  #define _PMD_PAGE_8M	0x000c  #define _PMD_PAGE_512K	0x0004 -#define _PMD_USER	0x0020	/* APG 1 */ +#define _PMD_ACCESSED	0x0020	/* APG 1 */ +#define _PMD_USER	0x0040	/* APG 2 */  #define _PTE_NONE_MASK	0 diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fbb8fa32150f..b774a4477d5f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -86,12 +86,19 @@ static inline bool security_ftr_enabled(u64 feature)  // Software required to flush link stack on context switch  #define SEC_FTR_FLUSH_LINK_STACK	0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY		0x0000000000004000ull + +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS	0x0000000000008000ull  // Features enabled by default  #define SEC_FTR_DEFAULT \  	(SEC_FTR_L1D_FLUSH_HV | \  	 SEC_FTR_L1D_FLUSH_PR | \  	 SEC_FTR_BNDS_CHK_SPEC_BAR | \ +	 SEC_FTR_L1D_FLUSH_ENTRY | \ +	 SEC_FTR_L1D_FLUSH_UACCESS | \  	 SEC_FTR_FAVOUR_SECURITY)  #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 9efbddee2bca..a466749703f1 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,16 @@ enum l1d_flush_type {  };  void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable);  void do_rfi_flush_fixups(enum l1d_flush_type types);  #ifdef CONFIG_PPC_BARRIER_NOSPEC  void setup_barrier_nospec(void);  #else  static inline void setup_barrier_nospec(void) { };  #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); +void do_entry_flush_fixups(enum l1d_flush_type types);  void do_barrier_nospec_fixups(bool enable);  extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 1e6fa371cc38..d072866842e4 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,9 +13,9 @@  #endif /* CONFIG_SPARSEMEM */  #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, -				  int nid, pgprot_t prot);  extern int remove_section_mapping(unsigned long start, unsigned long end); +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid  #ifdef CONFIG_NUMA  extern int hot_add_scn_to_nid(unsigned long scn_addr); @@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr)  }  #endif /* CONFIG_NUMA */  #endif /* CONFIG_MEMORY_HOTPLUG */ -  #endif /* __KERNEL__ */  #endif /* _ASM_POWERPC_SPARSEMEM_H */ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8728590f514a..3beeb030cd78 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -6,6 +6,7 @@  struct device;  struct device_node; +struct drmem_lmb;  #ifdef CONFIG_NUMA @@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu)  	 */  	return (nid < 0) ? 0 : nid;  } + +int of_drconf_to_nid_single(struct drmem_lmb *lmb); +  #else  static inline int early_cpu_to_node(int cpu) { return 0; } @@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)  	return 0;  } -#endif /* CONFIG_NUMA */ +static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) +{ +	return first_online_node; +} -struct drmem_lmb; -int of_drconf_to_nid_single(struct drmem_lmb *lmb); +#endif /* CONFIG_NUMA */  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)  extern int find_and_online_cpu_nid(int cpu); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ef5bbb705c08..501c9a79038c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -178,7 +178,7 @@ do {								\   * are no aliasing issues.   */  #define __put_user_asm_goto(x, addr, label, op)			\ -	asm volatile goto(					\ +	asm_volatile_goto(					\  		"1:	" op "%U1%X1 %0,%1	# put_user\n"	\  		EX_TABLE(1b, %l2)				\  		:						\ @@ -191,7 +191,7 @@ do {								\  	__put_user_asm_goto(x, ptr, label, "std")  #else /* __powerpc64__ */  #define __put_user_asm2_goto(x, addr, label)			\ -	asm volatile goto(					\ +	asm_volatile_goto(					\  		"1:	stw%X1 %0, %1\n"			\  		"2:	stw%X1 %L0, %L1\n"			\  		EX_TABLE(1b, %l2)				\ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 6b50bf15d8c1..bf3270426d82 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)  {  	struct pci_io_addr_range *piar;  	struct rb_node *n; +	unsigned long flags; -	spin_lock(&pci_io_addr_cache_root.piar_lock); +	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);  	for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {  		piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)  		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",  		       &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));  	} -	spin_unlock(&pci_io_addr_cache_root.piar_lock); +	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);  	return 0;  } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7d748b88705..4d01f09ecf80 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)   * Vectors for the FWNMI option.  Share common code.   */  TRAMP_REAL_BEGIN(system_reset_fwnmi) -	/* XXX: fwnmi guest could run a nested/PR guest, so why no test?  */ -	__IKVM_REAL(system_reset)=0  	GEN_INT_ENTRY system_reset, virt=0  #endif /* CONFIG_PPC_PSERIES */ @@ -1412,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)   *   If none is found, do a Linux page fault. Linux page faults can happen in   *   kernel mode due to user copy operations of course.   * + *   KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest + *   MMU context, which may cause a DSI in the host, which must go to the + *   KVM handler. MSR[IR] is not enabled, so the real-mode handler will + *   always be used regardless of AIL setting. + *   * - Radix MMU   *   The hardware loads from the Linux page table directly, so a fault goes   *   immediately to Linux page fault. @@ -1422,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access)  	IVEC=0x300  	IDAR=1  	IDSISR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	IKVM_SKIP=1  	IKVM_REAL=1 -#endif  INT_DEFINE_END(data_access)  EXC_REAL_BEGIN(data_access, 0x300, 0x80) @@ -1464,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)   *   ppc64_bolted_size (first segment). The kernel handler must avoid stomping   *   on user-handler data structures.   * + *   KVM: Same as 0x300, DSLB must test for KVM guest. + *   * A dedicated save area EXSLB is used (XXX: but it actually need not be   * these days, we could use EXGEN).   */ @@ -1472,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb)  	IAREA=PACA_EXSLB  	IRECONCILE=0  	IDAR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	IKVM_SKIP=1  	IKVM_REAL=1 -#endif  INT_DEFINE_END(data_access_slb)  EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) @@ -2951,15 +2952,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)  	.endr  	blr -TRAMP_REAL_BEGIN(rfi_flush_fallback) -	SET_SCRATCH0(r13); -	GET_PACA(r13); -	std	r1,PACA_EXRFI+EX_R12(r13) -	ld	r1,PACAKSAVE(r13) -	std	r9,PACA_EXRFI+EX_R9(r13) -	std	r10,PACA_EXRFI+EX_R10(r13) -	std	r11,PACA_EXRFI+EX_R11(r13) -	mfctr	r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH  	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)  	ld	r11,PACA_L1D_FLUSH_SIZE(r13)  	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2970,7 +2964,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)  	sync  	/* -	 * The load adresses are at staggered offsets within cachelines, +	 * The load addresses are at staggered offsets within cachelines,  	 * which suits some pipelines better (on others it should not  	 * hurt).  	 */ @@ -2985,7 +2979,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)  	ld	r11,(0x80 + 8)*7(r10)  	addi	r10,r10,0x80*8  	bdnz	1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) +	std	r9,PACA_EXRFI+EX_R9(r13) +	std	r10,PACA_EXRFI+EX_R10(r13) +	std	r11,PACA_EXRFI+EX_R11(r13) +	mfctr	r9 +	L1D_DISPLACEMENT_FLUSH +	mtctr	r9 +	ld	r9,PACA_EXRFI+EX_R9(r13) +	ld	r10,PACA_EXRFI+EX_R10(r13) +	ld	r11,PACA_EXRFI+EX_R11(r13) +	blr + +TRAMP_REAL_BEGIN(rfi_flush_fallback) +	SET_SCRATCH0(r13); +	GET_PACA(r13); +	std	r1,PACA_EXRFI+EX_R12(r13) +	ld	r1,PACAKSAVE(r13) +	std	r9,PACA_EXRFI+EX_R9(r13) +	std	r10,PACA_EXRFI+EX_R10(r13) +	std	r11,PACA_EXRFI+EX_R11(r13) +	mfctr	r9 +	L1D_DISPLACEMENT_FLUSH  	mtctr	r9  	ld	r9,PACA_EXRFI+EX_R9(r13)  	ld	r10,PACA_EXRFI+EX_R10(r13) @@ -3003,32 +3020,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)  	std	r10,PACA_EXRFI+EX_R10(r13)  	std	r11,PACA_EXRFI+EX_R11(r13)  	mfctr	r9 -	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) -	ld	r11,PACA_L1D_FLUSH_SIZE(r13) -	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ -	mtctr	r11 -	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - -	/* order ld/st prior to dcbt stop all streams with flushing */ -	sync - -	/* -	 * The load adresses are at staggered offsets within cachelines, -	 * which suits some pipelines better (on others it should not -	 * hurt). -	 */ -1: -	ld	r11,(0x80 + 8)*0(r10) -	ld	r11,(0x80 + 8)*1(r10) -	ld	r11,(0x80 + 8)*2(r10) -	ld	r11,(0x80 + 8)*3(r10) -	ld	r11,(0x80 + 8)*4(r10) -	ld	r11,(0x80 + 8)*5(r10) -	ld	r11,(0x80 + 8)*6(r10) -	ld	r11,(0x80 + 8)*7(r10) -	addi	r10,r10,0x80*8 -	bdnz	1b - +	L1D_DISPLACEMENT_FLUSH  	mtctr	r9  	ld	r9,PACA_EXRFI+EX_R9(r13)  	ld	r10,PACA_EXRFI+EX_R10(r13) @@ -3079,8 +3071,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)  	RFSCV  USE_TEXT_SECTION() -	MASKED_INTERRUPT -	MASKED_INTERRUPT hsrr=1 + +_GLOBAL(do_uaccess_flush) +	UACCESS_FLUSH_FIXUP_SECTION +	nop +	nop +	nop +	blr +	L1D_DISPLACEMENT_FLUSH +	blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + + +MASKED_INTERRUPT +MASKED_INTERRUPT hsrr=1  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER  kvmppc_skip_interrupt: diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 44c9018aed1b..a1ae00689e0f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit)  	rlwimi	r11, r10, 22, 20, 29	/* Compute PTE address */  	lwz	r11, 0(r11)		/* Get Linux PTE */ -#ifdef CONFIG_SWAP  	li	r9, _PAGE_PRESENT | _PAGE_ACCESSED -#else -	li	r9, _PAGE_PRESENT -#endif  	andc.	r9, r9, r11		/* Check permission */  	bne	5f @@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit)  	rlwimi	r11, r10, 22, 20, 29	/* Compute PTE address */  	lwz	r11, 0(r11)		/* Get Linux PTE */ -#ifdef CONFIG_SWAP  	li	r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else -	li	r9, _PAGE_PRESENT | _PAGE_EXEC -#endif  	andc.	r9, r9, r11		/* Check permission */  	bne	5f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f359d3fba74..ee0bfebc375f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -202,9 +202,7 @@ SystemCall:  InstructionTLBMiss:  	mtspr	SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)  	mtspr	SPRN_SPRG_SCRATCH1, r11 -#endif  	/* If we are faulting a kernel address, we have to use the  	 * kernel page tables. @@ -224,25 +222,13 @@ InstructionTLBMiss:  3:  	mtcr	r11  #endif -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)  	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */  	mtspr	SPRN_MD_TWC, r11 -#else -	lwz	r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */ -	mtspr	SPRN_MI_TWC, r10	/* Set segment attributes */ -	mtspr	SPRN_MD_TWC, r10 -#endif  	mfspr	r10, SPRN_MD_TWC  	lwz	r10, 0(r10)	/* Get the pte */ -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) +	rlwimi	r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED  	rlwimi	r11, r10, 32 - 9, _PMD_PAGE_512K  	mtspr	SPRN_MI_TWC, r11 -#endif -#ifdef CONFIG_SWAP -	rlwinm	r11, r10, 32-5, _PAGE_PRESENT -	and	r11, r11, r10 -	rlwimi	r10, r11, 0, _PAGE_PRESENT -#endif  	/* The Linux PTE won't go exactly into the MMU TLB.  	 * Software indicator bits 20 and 23 must be clear.  	 * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -256,9 +242,7 @@ InstructionTLBMiss:  	/* Restore registers */  0:	mfspr	r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)  	mfspr	r11, SPRN_SPRG_SCRATCH1 -#endif  	rfi  	patch_site	0b, patch__itlbmiss_exit_1 @@ -268,9 +252,7 @@ InstructionTLBMiss:  	addi	r10, r10, 1  	stw	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)  	mfspr	r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)  	mfspr	r11, SPRN_SPRG_SCRATCH1 -#endif  	rfi  #endif @@ -297,30 +279,16 @@ DataStoreTLBMiss:  	mfspr	r10, SPRN_MD_TWC  	lwz	r10, 0(r10)	/* Get the pte */ -	/* Insert the Guarded flag into the TWC from the Linux PTE. +	/* Insert Guarded and Accessed flags into the TWC from the Linux PTE.  	 * It is bit 27 of both the Linux PTE and the TWC (at least  	 * I got that right :-).  It will be better when we can put  	 * this into the Linux pgd/pmd and load it in the operation  	 * above.  	 */ -	rlwimi	r11, r10, 0, _PAGE_GUARDED +	rlwimi	r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED  	rlwimi	r11, r10, 32 - 9, _PMD_PAGE_512K  	mtspr	SPRN_MD_TWC, r11 -	/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. -	 * We also need to know if the insn is a load/store, so: -	 * Clear _PAGE_PRESENT and load that which will -	 * trap into DTLB Error with store bit set accordinly. -	 */ -	/* PRESENT=0x1, ACCESSED=0x20 -	 * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); -	 * r10 = (r10 & ~PRESENT) | r11; -	 */ -#ifdef CONFIG_SWAP -	rlwinm	r11, r10, 32-5, _PAGE_PRESENT -	and	r11, r11, r10 -	rlwimi	r10, r11, 0, _PAGE_PRESENT -#endif  	/* The Linux PTE won't go exactly into the MMU TLB.  	 * Software indicator bits 24, 25, 26, and 27 must be  	 * set.  All other Linux PTE bits control the behavior @@ -711,7 +679,7 @@ initial_mmu:  	li	r9, 4				/* up to 4 pages of 8M */  	mtctr	r9  	lis	r9, KERNELBASE@h		/* Create vaddr for TLB */ -	li	r10, MI_PS8MEG | MI_SVALID	/* Set 8M byte page */ +	li	r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID  	li	r11, MI_BOOTINIT		/* Create RPN for address 0 */  1:  	mtspr	SPRN_MI_CTR, r8	/* Set instruction MMU control */ @@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb)  #ifdef CONFIG_PIN_TLB_TEXT  	LOAD_REG_IMMEDIATE(r5, 28 << 8)  	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) -	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)  	LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)  	LOAD_REG_ADDR(r9, _sinittext)  	li	r0, 4 @@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb)  	LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)  #ifdef CONFIG_PIN_TLB_DATA  	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) -	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) +	LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)  #ifdef CONFIG_PIN_TLB_IMMR  	li	r0, 3  #else @@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb)  #endif  #ifdef CONFIG_PIN_TLB_IMMR  	LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) -	LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) +	LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)  	mfspr   r8, SPRN_IMMR  	rlwinm	r8, r8, 0, 0xfff80000  	ori	r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..a0dda2a1f2df 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -156,6 +156,7 @@ __after_mmu_off:  	bl	initial_bats  	bl	load_segment_registers  BEGIN_MMU_FTR_SECTION +	bl	reloc_offset  	bl	early_hash_table  END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)  #if defined(CONFIG_BOOTX_TEXT) @@ -457,11 +458,7 @@ InstructionTLBMiss:  	cmplw	0,r1,r3  #endif  	mfspr	r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP  	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else -	li	r1,_PAGE_PRESENT | _PAGE_EXEC -#endif  #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)  	bgt-	112f  	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */ @@ -523,11 +520,7 @@ DataLoadTLBMiss:  	lis	r1, TASK_SIZE@h		/* check if kernel address */  	cmplw	0,r1,r3  	mfspr	r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP  	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else -	li	r1, _PAGE_PRESENT -#endif  	bgt-	112f  	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */  	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */ @@ -603,11 +596,7 @@ DataStoreTLBMiss:  	lis	r1, TASK_SIZE@h		/* check if kernel address */  	cmplw	0,r1,r3  	mfspr	r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP  	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else -	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif  	bgt-	112f  	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */  	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */ @@ -932,7 +921,7 @@ early_hash_table:  	ori	r6, r6, 3	/* 256kB table */  	mtspr	SPRN_SDR1, r6  	lis	r6, early_hash@h -	lis	r3, Hash@ha +	addis	r3, r3, Hash@ha  	stw	r6, Hash@l(r3)  	blr diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index ae0e2632393d..1f835539fda4 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -52,9 +52,9 @@ void arch_cpu_idle(void)  		 * interrupts enabled, some don't.  		 */  		if (irqs_disabled()) -			local_irq_enable(); +			raw_local_irq_enable();  	} else { -		local_irq_enable(); +		raw_local_irq_enable();  		/*  		 * Go into low thread priority and possibly  		 * low power mode. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bb9cab3641d7..74fd47f46fa5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -945,7 +945,13 @@ early_initcall(disable_hardlockup_detector);  static enum l1d_flush_type enabled_flush_types;  static void *l1d_flush_fallback_area;  static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush;  bool rfi_flush; +bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key);  static int __init handle_no_rfi_flush(char *p)  { @@ -955,6 +961,22 @@ static int __init handle_no_rfi_flush(char *p)  }  early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ +	pr_info("entry-flush: disabled on command line."); +	no_entry_flush = true; +	return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ +	pr_info("uaccess-flush: disabled on command line."); +	no_uaccess_flush = true; +	return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); +  /*   * The RFI flush is not KPTI, but because users will see doco that says to use   * nopti we hijack that option here to also disable the RFI flush. @@ -986,6 +1008,32 @@ void rfi_flush_enable(bool enable)  	rfi_flush = enable;  } +void entry_flush_enable(bool enable) +{ +	if (enable) { +		do_entry_flush_fixups(enabled_flush_types); +		on_each_cpu(do_nothing, NULL, 1); +	} else { +		do_entry_flush_fixups(L1D_FLUSH_NONE); +	} + +	entry_flush = enable; +} + +void uaccess_flush_enable(bool enable) +{ +	if (enable) { +		do_uaccess_flush_fixups(enabled_flush_types); +		static_branch_enable(&uaccess_flush_key); +		on_each_cpu(do_nothing, NULL, 1); +	} else { +		static_branch_disable(&uaccess_flush_key); +		do_uaccess_flush_fixups(L1D_FLUSH_NONE); +	} + +	uaccess_flush = enable; +} +  static void __ref init_fallback_flush(void)  {  	u64 l1d_size, limit; @@ -1044,10 +1092,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)  	enabled_flush_types = types; -	if (!no_rfi_flush && !cpu_mitigations_off()) +	if (!cpu_mitigations_off() && !no_rfi_flush)  		rfi_flush_enable(enable);  } +void setup_entry_flush(bool enable) +{ +	if (cpu_mitigations_off()) +		return; + +	if (!no_entry_flush) +		entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ +	if (cpu_mitigations_off()) +		return; + +	if (!no_uaccess_flush) +		uaccess_flush_enable(enable); +} +  #ifdef CONFIG_DEBUG_FS  static int rfi_flush_set(void *data, u64 val)  { @@ -1075,9 +1141,63 @@ static int rfi_flush_get(void *data, u64 *val)  DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ +	bool enable; + +	if (val == 1) +		enable = true; +	else if (val == 0) +		enable = false; +	else +		return -EINVAL; + +	/* Only do anything if we're changing state */ +	if (enable != entry_flush) +		entry_flush_enable(enable); + +	return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ +	*val = entry_flush ? 1 : 0; +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ +	bool enable; + +	if (val == 1) +		enable = true; +	else if (val == 0) +		enable = false; +	else +		return -EINVAL; + +	/* Only do anything if we're changing state */ +	if (enable != uaccess_flush) +		uaccess_flush_enable(enable); + +	return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ +	*val = uaccess_flush ? 1 : 0; +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); +  static __init int rfi_flush_debugfs_init(void)  {  	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); +	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); +	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);  	return 0;  }  device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3c6b9822f978..8c2857cbd960 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu)  /* Activate a secondary processor. */  void start_secondary(void *unused)  { -	unsigned int cpu = smp_processor_id(); +	unsigned int cpu = raw_smp_processor_id();  	mmgrab(&init_mm);  	current->active_mm = &init_mm;  	smp_store_cpu_info(cpu);  	set_dec(tb_ticks_per_jiffy); +	rcu_cpu_starting(cpu);  	preempt_disable();  	cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 8e50818aa50b..310bcd768cd5 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -2,7 +2,7 @@  #include <linux/err.h>  #include <asm/asm-prototypes.h> -#include <asm/book3s/64/kup-radix.h> +#include <asm/kup.h>  #include <asm/cputime.h>  #include <asm/hw_irq.h>  #include <asm/kprobes.h> diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e0548b4950de..6db90cdf11da 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,20 @@ SECTIONS  	}  	. = ALIGN(8); +	__uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { +		__start___uaccess_flush_fixup = .; +		*(__uaccess_flush_fixup) +		__stop___uaccess_flush_fixup = .; +	} + +	. = ALIGN(8); +	__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { +		__start___entry_flush_fixup = .; +		*(__entry_flush_fixup) +		__stop___entry_flush_fixup = .; +	} + +	. = ALIGN(8);  	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {  		__start___stf_exit_barrier_fixup = .;  		*(__stf_exit_barrier_fixup) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d0c2db0e07fa..a59a94f02733 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)  	}  	state = &sb->irq_state[src]; + +	/* Some sanity checking */ +	if (!state->valid) { +		pr_devel("%s: source %lx invalid !\n", __func__, irq); +		return VM_FAULT_SIGBUS; +	} +  	kvmppc_xive_select_irq(state, &hw_num, &xd);  	arch_spin_lock(&sb->lock); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4c0a7ee9fa00..321c12a9ef6b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,110 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)  	do_stf_exit_barrier_fixups(types);  } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ +	unsigned int instrs[4], *dest; +	long *start, *end; +	int i; + +	start = PTRRELOC(&__start___uaccess_flush_fixup); +	end = PTRRELOC(&__stop___uaccess_flush_fixup); + +	instrs[0] = 0x60000000; /* nop */ +	instrs[1] = 0x60000000; /* nop */ +	instrs[2] = 0x60000000; /* nop */ +	instrs[3] = 0x4e800020; /* blr */ + +	i = 0; +	if (types == L1D_FLUSH_FALLBACK) { +		instrs[3] = 0x60000000; /* nop */ +		/* fallthrough to fallback flush */ +	} + +	if (types & L1D_FLUSH_ORI) { +		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ +		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ +	} + +	if (types & L1D_FLUSH_MTTRIG) +		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + +	for (i = 0; start < end; start++, i++) { +		dest = (void *)start + *start; + +		pr_devel("patching dest %lx\n", (unsigned long)dest); + +		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + +		patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); +		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); +		patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); +	} + +	printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, +		(types == L1D_FLUSH_NONE)       ? "no" : +		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" : +		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG) +							? "ori+mttrig type" +							: "ori type" : +		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type" +						: "unknown"); +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ +	unsigned int instrs[3], *dest; +	long *start, *end; +	int i; + +	start = PTRRELOC(&__start___entry_flush_fixup); +	end = PTRRELOC(&__stop___entry_flush_fixup); + +	instrs[0] = 0x60000000; /* nop */ +	instrs[1] = 0x60000000; /* nop */ +	instrs[2] = 0x60000000; /* nop */ + +	i = 0; +	if (types == L1D_FLUSH_FALLBACK) { +		instrs[i++] = 0x7d4802a6; /* mflr r10		*/ +		instrs[i++] = 0x60000000; /* branch patched below */ +		instrs[i++] = 0x7d4803a6; /* mtlr r10		*/ +	} + +	if (types & L1D_FLUSH_ORI) { +		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ +		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ +	} + +	if (types & L1D_FLUSH_MTTRIG) +		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + +	for (i = 0; start < end; start++, i++) { +		dest = (void *)start + *start; + +		pr_devel("patching dest %lx\n", (unsigned long)dest); + +		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + +		if (types == L1D_FLUSH_FALLBACK) +			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, +				     BRANCH_SET_LINK); +		else +			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + +		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); +	} + +	printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, +		(types == L1D_FLUSH_NONE)       ? "no" : +		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" : +		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG) +							? "ori+mttrig type" +							: "ori type" : +		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type" +						: "unknown"); +} +  void do_rfi_flush_fixups(enum l1d_flush_type types)  {  	unsigned int instrs[3], *dest; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01ec2a252f09..3fc325bebe4d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -50,6 +50,7 @@  #include <asm/rtas.h>  #include <asm/kasan.h>  #include <asm/svm.h> +#include <asm/mmzone.h>  #include <mm/mmu_decl.h> diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event)  			/* If this is a valid record, create the sample */  			struct perf_output_handle handle; -			if (perf_output_begin(&handle, event, header.size)) +			if (perf_output_begin(&handle, &data, event, header.size))  				return;  			perf_output_sample(&handle, &header, &data, event); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 9acaa0f131b9..46115231a3b2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)  		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);  } -static void pnv_setup_rfi_flush(void) +static void pnv_setup_security_mitigations(void)  {  	struct device_node *np, *fw_features;  	enum l1d_flush_type type; @@ -122,12 +122,31 @@ static void pnv_setup_rfi_flush(void)  			type = L1D_FLUSH_ORI;  	} +	/* +	 * If we are non-Power9 bare metal, we don't need to flush on kernel +	 * entry or after user access: they fix a P9 specific vulnerability. +	 */ +	if (!pvr_version_is(PVR_POWER9)) { +		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); +		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); +	} +  	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \  		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \  		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));  	setup_rfi_flush(type, enable);  	setup_count_cache_flush(); + +	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && +		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); +	setup_entry_flush(enable); + +	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && +		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); +	setup_uaccess_flush(enable); + +	setup_stf_barrier();  }  static void __init pnv_check_guarded_cores(void) @@ -156,8 +175,7 @@ static void __init pnv_setup_arch(void)  {  	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); -	pnv_setup_rfi_flush(); -	setup_stf_barrier(); +	pnv_setup_security_mitigations();  	/* Initialize SMP */  	pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index d6f4162478a5..2f73cb5bf12d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -349,8 +349,8 @@ void post_mobility_fixup(void)  	cpus_read_unlock(); -	/* Possibly switch to a new RFI flush type */ -	pseries_setup_rfi_flush(); +	/* Possibly switch to a new L1 flush type */ +	pseries_setup_security_mitigations();  	/* Reinitialise system information for hv-24x7 */  	read_24x7_sys_info(); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4..593840847cd3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void)  int dlpar_workqueue_init(void); -void pseries_setup_rfi_flush(void); +void pseries_setup_security_mitigations(void);  void pseries_lpar_read_hblkrm_characteristics(void);  #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 633c45ec406d..090c13f6c881 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)  		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);  } -void pseries_setup_rfi_flush(void) +void pseries_setup_security_mitigations(void)  {  	struct h_cpu_char_result result;  	enum l1d_flush_type types; @@ -579,6 +579,16 @@ void pseries_setup_rfi_flush(void)  	setup_rfi_flush(types, enable);  	setup_count_cache_flush(); + +	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && +		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); +	setup_entry_flush(enable); + +	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && +		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); +	setup_uaccess_flush(enable); + +	setup_stf_barrier();  }  #ifdef CONFIG_PCI_IOV @@ -768,8 +778,7 @@ static void __init pSeries_setup_arch(void)  	fwnmi_init(); -	pseries_setup_rfi_flush(); -	setup_stf_barrier(); +	pseries_setup_security_mitigations();  	pseries_lpar_read_hblkrm_characteristics();  	/* By default, only probe PCI (can be overridden by rtas_pci) */ diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index b0ab66e5fdb1..5b2e79e5bfa5 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -14,4 +14,6 @@  #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)  #define PGDIR_MASK      (~(PGDIR_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 34 +  #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index c47e6b35c551..824b2c9da75b 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -476,7 +476,7 @@ do {									\  do {									\  	long __kr_err;							\  									\ -	__put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err);	\ +	__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err);	\  	if (unlikely(__kr_err))						\  		goto err_label;						\  } while (0) diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 82a5693b1861..134388cbaaa1 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -4,6 +4,8 @@  #ifndef __ASSEMBLY__ +#include <asm/barrier.h> +  static inline void cpu_relax(void)  {  #ifdef __riscv_muldiv diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 99e12faa5498..765b62434f30 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0  /*   * Copyright (C) 2013 Linaro Limited   * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 11e2a4fe66e0..7e849797c9c3 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -35,6 +35,10 @@ ENTRY(_start)  	.word 0  #endif  	.balign 8 +#ifdef CONFIG_RISCV_M_MODE +	/* Image load offset (0MB) from start of RAM for M-mode */ +	.dword 0 +#else  #if __riscv_xlen == 64  	/* Image load offset(2MB) from start of RAM */  	.dword 0x200000 @@ -42,6 +46,7 @@ ENTRY(_start)  	/* Image load offset(4MB) from start of RAM */  	.dword 0x400000  #endif +#endif  	/* Effective size of kernel image */  	.dword _end - _start  	.dword __HEAD_FLAGS diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = perf_reg_abi(current); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 19225ec65db6..dd5f985b1f40 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);  void arch_cpu_idle(void)  {  	wait_for_interrupt(); -	local_irq_enable(); +	raw_local_irq_enable();  }  void show_regs(struct pt_regs *regs) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c424cc6dd833..117f3212a8e4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p)  	*cmdline_p = boot_command_line;  	early_ioremap_setup(); +	jump_label_init();  	parse_early_param();  	efi_init(); diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 11ebee9e4c1d..3a19def868ec 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,3 +1,4 @@  # SPDX-License-Identifier: GPL-2.0-only  vdso.lds  *.tmp +vdso-syms.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 7d6a94d45ec9..0cfd6da784f8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -43,19 +43,14 @@ $(obj)/vdso.o: $(obj)/vdso.so  SYSCFLAGS_vdso.so.dbg = $(c_flags)  $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE  	$(call if_changed,vdsold) +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ +	-Wl,--build-id=sha1 -Wl,--hash-style=both  # We also create a special relocatable object that should mirror the symbol  # table and layout of the linked DSO. With ld --just-symbols we can then  # refer to these symbols in the kernel code rather than hand-coded addresses. - -SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ -	-Wl,--build-id=sha1 -Wl,--hash-style=both -$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE -	$(call if_changed,vdsold) - -LDFLAGS_vdso-syms.o := -r --just-symbols -$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE -	$(call if_changed,ld) +$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE +	$(call if_changed,so2s)  # strip rule for the .so file  $(obj)/%.so: OBJCOPYFLAGS := -S @@ -73,6 +68,11 @@ quiet_cmd_vdsold = VDSOLD  $@                             $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \                     rm $@.tmp +# Extracts symbol offsets from the VDSO, converting them into an assembly file +# that contains the same symbols at the same offsets. +quiet_cmd_so2s = SO2S    $@ +      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ +  # install commands for the unstripped file  quiet_cmd_vdso_install = INSTALL $@        cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh new file mode 100755 index 000000000000..e64cb6d9440e --- /dev/null +++ b/arch/riscv/kernel/vdso/so2s.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com> + +sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \ +| grep '^\.' diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 1359e21c0c62..3c8b9e433c67 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -86,6 +86,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a  	pmd_t *pmd, *pmd_k;  	pte_t *pte_k;  	int index; +	unsigned long pfn;  	/* User mode accesses just cause a SIGSEGV */  	if (user_mode(regs)) @@ -100,7 +101,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a  	 * of a task switch.  	 */  	index = pgd_index(addr); -	pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; +	pfn = csr_read(CSR_SATP) & SATP_PPN; +	pgd = (pgd_t *)pfn_to_virt(pfn) + index;  	pgd_k = init_mm.pgd + index;  	if (!pgd_present(*pgd_k)) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ea933b789a88..8e577f14f120 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -154,9 +154,8 @@ disable:  void __init setup_bootmem(void)  { -	phys_addr_t mem_size = 0; -	phys_addr_t total_mem = 0; -	phys_addr_t mem_start, start, end = 0; +	phys_addr_t mem_start = 0; +	phys_addr_t start, end = 0;  	phys_addr_t vmlinux_end = __pa_symbol(&_end);  	phys_addr_t vmlinux_start = __pa_symbol(&_start);  	u64 i; @@ -164,21 +163,18 @@ void __init setup_bootmem(void)  	/* Find the memory region containing the kernel */  	for_each_mem_range(i, &start, &end) {  		phys_addr_t size = end - start; -		if (!total_mem) +		if (!mem_start)  			mem_start = start;  		if (start <= vmlinux_start && vmlinux_end <= end)  			BUG_ON(size == 0); -		total_mem = total_mem + size;  	}  	/* -	 * Remove memblock from the end of usable area to the -	 * end of region +	 * The maximal physical memory size is -PAGE_OFFSET. +	 * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed +	 * as it is unusable by kernel.  	 */ -	mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET); -	if (mem_start + mem_size < end) -		memblock_remove(mem_start + mem_size, -				end - mem_start - mem_size); +	memblock_enforce_memory_limit(mem_start - PAGE_OFFSET);  	/* Reserve from the start of the kernel to the end of the kernel */  	memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); @@ -297,6 +293,7 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;  #define NUM_EARLY_PMDS		(1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)  #endif  pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);  static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)  { @@ -494,6 +491,18 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)  				   load_pa + (va - PAGE_OFFSET),  				   map_size, PAGE_KERNEL_EXEC); +#ifndef __PAGETABLE_PMD_FOLDED +	/* Setup early PMD for DTB */ +	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, +			   (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); +	/* Create two consecutive PMD mappings for FDT early scan */ +	pa = dtb_pa & ~(PMD_SIZE - 1); +	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, +			   pa, PMD_SIZE, PAGE_KERNEL); +	create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, +			   pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); +	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else  	/* Create two consecutive PGD mappings for FDT early scan */  	pa = dtb_pa & ~(PGDIR_SIZE - 1);  	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, @@ -501,6 +510,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)  	create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,  			   pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);  	dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); +#endif  	dtb_early_pa = dtb_pa;  	/* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0784bf3caf43..fe6f529ac82c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,3 +1,4 @@ +CONFIG_UAPI_HEADER_TEST=y  CONFIG_SYSVIPC=y  CONFIG_POSIX_MQUEUE=y  CONFIG_WATCH_QUEUE=y @@ -93,9 +94,10 @@ CONFIG_CLEANCACHE=y  CONFIG_FRONTSWAP=y  CONFIG_CMA_DEBUG=y  CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_AREAS=7  CONFIG_MEM_SOFT_DIRTY=y  CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y  CONFIG_ZSMALLOC_STAT=y  CONFIG_DEFERRED_STRUCT_PAGE_INIT=y  CONFIG_IDLE_PAGE_TRACKING=y @@ -378,7 +380,6 @@ CONFIG_NETLINK_DIAG=m  CONFIG_CGROUP_NET_PRIO=y  CONFIG_BPF_JIT=y  CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set  CONFIG_PCI=y  # CONFIG_PCIEASPM is not set  CONFIG_PCI_DEBUG=y @@ -386,7 +387,7 @@ CONFIG_HOTPLUG_PCI=y  CONFIG_HOTPLUG_PCI_S390=y  CONFIG_DEVTMPFS=y  CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y  CONFIG_BLK_DEV_LOOP=m  CONFIG_BLK_DEV_CRYPTOLOOP=m  CONFIG_BLK_DEV_DRBD=m @@ -689,6 +690,7 @@ CONFIG_CRYPTO_TEST=m  CONFIG_CRYPTO_DH=m  CONFIG_CRYPTO_ECDH=m  CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m  CONFIG_CRYPTO_CURVE25519=m  CONFIG_CRYPTO_GCM=y  CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -709,7 +711,6 @@ CONFIG_CRYPTO_RMD160=m  CONFIG_CRYPTO_RMD256=m  CONFIG_CRYPTO_RMD320=m  CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m  CONFIG_CRYPTO_TGR192=m  CONFIG_CRYPTO_WP512=m  CONFIG_CRYPTO_AES_TI=m @@ -753,6 +754,7 @@ CONFIG_CRYPTO_DES_S390=m  CONFIG_CRYPTO_AES_S390=m  CONFIG_CRYPTO_GHASH_S390=m  CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m  CONFIG_CORDIC=m  CONFIG_CRC32_SELFTEST=y  CONFIG_CRC4=m @@ -829,6 +831,7 @@ CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m  CONFIG_FAULT_INJECTION=y  CONFIG_FAILSLAB=y  CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAULT_INJECTION_USERCOPY=y  CONFIG_FAIL_MAKE_REQUEST=y  CONFIG_FAIL_IO_TIMEOUT=y  CONFIG_FAIL_FUTEX=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 905bc8c4cfaf..17d5df2c1eff 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -87,9 +87,10 @@ CONFIG_KSM=y  CONFIG_TRANSPARENT_HUGEPAGE=y  CONFIG_CLEANCACHE=y  CONFIG_FRONTSWAP=y +CONFIG_CMA_AREAS=7  CONFIG_MEM_SOFT_DIRTY=y  CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y  CONFIG_ZSMALLOC_STAT=y  CONFIG_DEFERRED_STRUCT_PAGE_INIT=y  CONFIG_IDLE_PAGE_TRACKING=y @@ -371,7 +372,6 @@ CONFIG_NETLINK_DIAG=m  CONFIG_CGROUP_NET_PRIO=y  CONFIG_BPF_JIT=y  CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set  CONFIG_PCI=y  # CONFIG_PCIEASPM is not set  CONFIG_HOTPLUG_PCI=y @@ -379,7 +379,7 @@ CONFIG_HOTPLUG_PCI_S390=y  CONFIG_UEVENT_HELPER=y  CONFIG_DEVTMPFS=y  CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y  CONFIG_BLK_DEV_LOOP=m  CONFIG_BLK_DEV_CRYPTOLOOP=m  CONFIG_BLK_DEV_DRBD=m @@ -680,6 +680,7 @@ CONFIG_CRYPTO_TEST=m  CONFIG_CRYPTO_DH=m  CONFIG_CRYPTO_ECDH=m  CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m  CONFIG_CRYPTO_CURVE25519=m  CONFIG_CRYPTO_GCM=y  CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -701,7 +702,6 @@ CONFIG_CRYPTO_RMD160=m  CONFIG_CRYPTO_RMD256=m  CONFIG_CRYPTO_RMD320=m  CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m  CONFIG_CRYPTO_TGR192=m  CONFIG_CRYPTO_WP512=m  CONFIG_CRYPTO_AES_TI=m @@ -745,6 +745,7 @@ CONFIG_CRYPTO_DES_S390=m  CONFIG_CRYPTO_AES_S390=m  CONFIG_CRYPTO_GHASH_S390=m  CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m  CONFIG_CORDIC=m  CONFIG_PRIME_NUMBERS=m  CONFIG_CRC4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8f67c55625f9..a302630341ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -17,11 +17,11 @@ CONFIG_HZ_100=y  # CONFIG_CHSC_SCH is not set  # CONFIG_SCM_BUS is not set  CONFIG_CRASH_DUMP=y -# CONFIG_SECCOMP is not set  # CONFIG_PFAULT is not set  # CONFIG_S390_HYPFS_FS is not set  # CONFIG_VIRTUALIZATION is not set  # CONFIG_S390_GUEST is not set +# CONFIG_SECCOMP is not set  CONFIG_PARTITION_ADVANCED=y  CONFIG_IBM_PARTITION=y  # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6b8d8c69b1a1..b5dbae78969b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -692,16 +692,6 @@ static inline int pud_large(pud_t pud)  	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);  } -static inline unsigned long pud_pfn(pud_t pud) -{ -	unsigned long origin_mask; - -	origin_mask = _REGION_ENTRY_ORIGIN; -	if (pud_large(pud)) -		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; -	return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; -} -  #define pmd_leaf	pmd_large  static inline int pmd_large(pmd_t pmd)  { @@ -747,16 +737,6 @@ static inline int pmd_none(pmd_t pmd)  	return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;  } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ -	unsigned long origin_mask; - -	origin_mask = _SEGMENT_ENTRY_ORIGIN; -	if (pmd_large(pmd)) -		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; -	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; -} -  #define pmd_write pmd_write  static inline int pmd_write(pmd_t pmd)  { @@ -1238,11 +1218,39 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)  #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))  #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) -#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)  #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)  #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +static inline unsigned long pmd_deref(pmd_t pmd) +{ +	unsigned long origin_mask; + +	origin_mask = _SEGMENT_ENTRY_ORIGIN; +	if (pmd_large(pmd)) +		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; +	return pmd_val(pmd) & origin_mask; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ +	return pmd_deref(pmd) >> PAGE_SHIFT; +} + +static inline unsigned long pud_deref(pud_t pud) +{ +	unsigned long origin_mask; + +	origin_mask = _REGION_ENTRY_ORIGIN; +	if (pud_large(pud)) +		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; +	return pud_val(pud) & origin_mask; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ +	return pud_deref(pud) >> PAGE_SHIFT; +} +  /*   * The pgd_offset function *always* adds the index for the top-level   * region/segment table. This is done to get a sequence like the diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index a996d3990a02..0c2151451ba5 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -26,14 +26,14 @@ static inline int arch_is_kernel_initmem_freed(unsigned long addr)   * final .boot.data section, which should be identical in the decompressor and   * the decompressed kernel (that is checked during the build).   */ -#define __bootdata(var) __section(".boot.data.var") var +#define __bootdata(var) __section(".boot.data." #var) var  /*   * .boot.preserved.data is similar to .boot.data, but it is not part of the   * .init section and thus will be preserved for later use in the decompressed   * kernel.   */ -#define __bootdata_preserved(var) __section(".boot.preserved.data.var") var +#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var  extern unsigned long __sdma, __edma;  extern unsigned long __stext_dma, __etext_dma; diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/s390/include/asm/vdso/vdso.h +++ /dev/null diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ece58f2217cb..483051e10db3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -53,22 +53,14 @@ int main(void)  	/* stack_frame offsets */  	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);  	OFFSET(__SF_GPRS, stack_frame, gprs); -	OFFSET(__SF_EMPTY, stack_frame, empty1); -	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); -	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); -	OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); -	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); +	OFFSET(__SF_EMPTY, stack_frame, empty1[0]); +	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]); +	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]); +	OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); +	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);  	BLANK();  	OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);  	BLANK(); -	/* constants used by the vdso */ -	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); -	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); -	DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); -	DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); -	DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); -	DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); -	BLANK();  	/* idle data offsets */  	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);  	OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 86235919c2d1..26bb0603c5a1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -422,6 +422,7 @@ ENTRY(system_call)  #endif  	LOCKDEP_SYS_EXIT  .Lsysc_tif: +	DISABLE_INTS  	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK  	jnz	.Lsysc_work  	TSTMSK	__TI_flags(%r12),_TIF_WORK @@ -444,6 +445,7 @@ ENTRY(system_call)  # One of the work bits is on. Find out which one.  #  .Lsysc_work: +	ENABLE_INTS  	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED  	jo	.Lsysc_reschedule  	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL_RESTART @@ -1066,6 +1068,7 @@ EXPORT_SYMBOL(save_fpu_regs)   *	%r4   */  load_fpu_regs: +	stnsm	__SF_EMPTY(%r15),0xfc  	lg	%r4,__LC_CURRENT  	aghi	%r4,__TASK_thread  	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU @@ -1097,6 +1100,7 @@ load_fpu_regs:  .Lload_fpu_regs_done:  	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU  .Lload_fpu_regs_exit: +	ssm	__SF_EMPTY(%r15)  	BR_EX	%r14  .Lload_fpu_regs_end:  ENDPROC(load_fpu_regs) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index f7f1e64e0d98..2b85096964f8 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,10 +33,10 @@ void enabled_wait(void)  		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;  	clear_cpu_flag(CIF_NOHZ_DELAY); -	local_irq_save(flags); +	raw_local_irq_save(flags);  	/* Call the assembler magic in entry.S */  	psw_idle(idle, psw_mask); -	local_irq_restore(flags); +	raw_local_irq_restore(flags);  	/* Account time spent with enabled wait psw loaded as idle time. */  	raw_write_seqcount_begin(&idle->seqcount); @@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)  void arch_cpu_idle(void)  {  	enabled_wait(); -	local_irq_enable(); +	raw_local_irq_enable();  }  void arch_cpu_idle_exit(void) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..19cd7b961c45 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,  	rcu_read_lock();  	perf_prepare_sample(&header, data, event, regs); -	if (perf_output_begin(&handle, event, header.size)) +	if (perf_output_begin(&handle, data, event, header.size))  		goto out;  	/* Update the process ID (see also kernel/events/core.c) */ @@ -2228,4 +2228,4 @@ out:  }  arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	/*  	 * Use the regs from the first interruption and let diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ebfe86d097f0..390d97daa2b3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -855,13 +855,14 @@ void __init smp_detect_cpus(void)  static void smp_init_secondary(void)  { -	int cpu = smp_processor_id(); +	int cpu = raw_smp_processor_id();  	S390_lowcore.last_update_clock = get_tod_clock();  	restore_access_regs(S390_lowcore.access_regs_save_area);  	set_cpu_flag(CIF_ASCE_PRIMARY);  	set_cpu_flag(CIF_ASCE_SECONDARY);  	cpu_init(); +	rcu_cpu_starting(cpu);  	preempt_disable();  	init_cpu_timer();  	vtime_init(); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 14bd9d58edc9..883bfed9f5c2 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -129,8 +129,15 @@ int uv_destroy_page(unsigned long paddr)  		.paddr = paddr  	}; -	if (uv_call(0, (u64)&uvcb)) +	if (uv_call(0, (u64)&uvcb)) { +		/* +		 * Older firmware uses 107/d as an indication of a non secure +		 * page. Let us emulate the newer variant (no-op). +		 */ +		if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd) +			return 0;  		return -EINVAL; +	}  	return 0;  } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b74b92c1a58..425d3d75320b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2312,7 +2312,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)  		struct kvm_s390_pv_unp unp = {};  		r = -EINVAL; -		if (!kvm_s390_pv_is_protected(kvm)) +		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))  			break;  		r = -EFAULT; @@ -3564,7 +3564,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)  		vcpu->arch.sie_block->pp = 0;  		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;  		vcpu->arch.sie_block->todpr = 0; -		vcpu->arch.sie_block->cpnc = 0;  	}  } @@ -3582,7 +3581,6 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)  	regs->etoken = 0;  	regs->etoken_extension = 0; -	regs->diag318 = 0;  }  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index eb99e2f95ebe..f5847f9dec7c 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -208,7 +208,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)  		return -EIO;  	}  	kvm->arch.gmap->guest_handle = uvcb.guest_handle; -	atomic_set(&kvm->mm->context.is_protected, 1);  	return 0;  } @@ -228,6 +227,8 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,  	*rrc = uvcb.header.rrc;  	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",  		     *rc, *rrc); +	if (!cc) +		atomic_set(&kvm->mm->context.is_protected, 1);  	return cc ? -EINVAL : 0;  } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cfb0017f33a7..64795d034926 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2690,6 +2690,8 @@ static const struct mm_walk_ops reset_acc_walk_ops = {  #include <linux/sched/mm.h>  void s390_reset_acc(struct mm_struct *mm)  { +	if (!mm_is_protected(mm)) +		return;  	/*  	 * we might be called during  	 * reset:                             we walk the pages and clear diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d33f21545dfd..9a6bae503fe6 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -101,6 +101,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)  		if (ret)  			break; +		/* the PCI function will be scanned once function 0 appears */ +		if (!zdev->zbus->bus) +			break; +  		pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);  		if (!pdev)  			break; diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 0dc0f52f9bb8..f59814983bd5 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ static void (*sh_idle)(void);  void default_idle(void)  {  	set_bl_bit(); -	local_irq_enable(); +	raw_local_irq_enable();  	/* Isn't this racy ? */  	cpu_sleep();  	clear_bl_bit(); diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 065e2d4b7290..396f46bca52e 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)  	register unsigned int address = (unsigned int)leon3_irqctrl_regs;  	/* Interrupts need to be enabled to not hang the CPU */ -	local_irq_enable(); +	raw_local_irq_enable();  	__asm__ __volatile__ (  		"wr	%%g0, %%asr19\n" @@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)  static void pmc_leon_idle(void)  {  	/* Interrupts need to be enabled to not hang the CPU */ -	local_irq_enable(); +	raw_local_irq_enable();  	/* For systems without power-down, this will be no-op */  	__asm__ __volatile__ ("wr	%g0, %asr19\n\t"); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index adfcaeab3ddc..a02363735915 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -74,7 +74,7 @@ void arch_cpu_idle(void)  {  	if (sparc_idle)  		(*sparc_idle)(); -	local_irq_enable(); +	raw_local_irq_enable();  }  /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a75093b993f9..6f8c7822fc06 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -62,11 +62,11 @@ void arch_cpu_idle(void)  {  	if (tlb_type != hypervisor) {  		touch_nmi_watchdog(); -		local_irq_enable(); +		raw_local_irq_enable();  	} else {  		unsigned long pstate; -		local_irq_enable(); +		raw_local_irq_enable();                  /* The sun4v sleeping code requires that we have PSTATE.IE cleared over                   * the cpu sleep hypervisor call. diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 5393e13e07e0..2bbf28cf3aa9 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -33,7 +33,13 @@ do {							\  } while (0)  #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x)) + +#define __pmd_free_tlb(tlb, pmd, address)		\ +do {							\ +	pgtable_pmd_page_dtor(virt_to_page(pmd));	\ +	tlb_remove_page((tlb),virt_to_page(pmd));	\ +} while (0)						\ +  #endif  #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3bed09538dd9..9505a7e87396 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -217,7 +217,7 @@ void arch_cpu_idle(void)  {  	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();  	um_idle_sleep(); -	local_irq_enable(); +	raw_local_irq_enable();  }  int __cant_sleep(void) { diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 95c355181dcd..bfb70c456b30 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -21,7 +21,7 @@   * on some systems.   */ -void __section(".__syscall_stub") +void __attribute__ ((__section__ (".__syscall_stub")))  stub_clone_handler(void)  {  	struct stub_data *data = (struct stub_data *) STUB_DATA; diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a5e5db6ada3c..39b2eded7bc2 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode)  	add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);  	/* Load the new page-table. */ +	sev_verify_cbit(top_level_pgt);  	write_cr3(top_level_pgt);  } diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b11..aa561795efd1 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit)  SYM_FUNC_END(get_sev_encryption_bit)  	.code64 + +#include "../../kernel/sev_verify_cbit.S" +  SYM_FUNC_START(set_sev_encryption_mask)  #ifdef CONFIG_AMD_MEM_ENCRYPT  	push	%rbp @@ -81,6 +84,19 @@ SYM_FUNC_START(set_sev_encryption_mask)  	bts	%rax, sme_me_mask(%rip)	/* Create the encryption mask */ +	/* +	 * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in +	 * get_sev_encryption_bit() because this function is 32-bit code and +	 * shared between 64-bit and 32-bit boot path. +	 */ +	movl	$MSR_AMD64_SEV, %ecx	/* Read the SEV MSR */ +	rdmsr + +	/* Store MSR value in sev_status */ +	shlq	$32, %rdx +	orq	%rdx, %rax +	movq	%rax, sev_status(%rip) +  .Lno_sev_mask:  	movq	%rbp, %rsp		/* Restore original stack pointer */ @@ -96,5 +112,7 @@ SYM_FUNC_END(set_sev_encryption_mask)  #ifdef CONFIG_AMD_MEM_ENCRYPT  	.balign	8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask,		.quad 0) +SYM_DATA(sev_status,		.quad 0) +SYM_DATA(sev_check_data,	.quad 0)  #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 6d31f1b4c4d1..d9a631c5973c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -159,4 +159,6 @@ void boot_page_fault(void);  void boot_stage1_vc(void);  void boot_stage2_vc(void); +unsigned long sev_verify_cbit(unsigned long cr3); +  #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e508dbd91813..c44aba290fbb 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -158,6 +158,7 @@ static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,  			dctx->s[1] = get_unaligned_le32(&inp[4]);  			dctx->s[2] = get_unaligned_le32(&inp[8]);  			dctx->s[3] = get_unaligned_le32(&inp[12]); +			acc += POLY1305_BLOCK_SIZE;  			dctx->sset = true;  		}  	} diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1f47e24fb65c..379819244b91 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,10 +364,10 @@  440	common	process_madvise		sys_process_madvise  # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64.  These syscalls have numbers 512-547. +# Do not add new syscalls to this range.  Numbers 548 and above are available +# for non-x32 use.  #  512	x32	rt_sigaction		compat_sys_rt_sigaction  513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn @@ -405,3 +405,5 @@  545	x32	execveat		compat_sys_execveat  546	x32	preadv2			compat_sys_preadv64v2  547	x32	pwritev2		compat_sys_pwritev64v2 +# This is the end of the legacy x32 range.  Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f1926e9f2143..af457f8cb29d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)  		u64 pebs_enabled = cpuc->pebs_enabled;  		handled++; -		x86_pmu.drain_pebs(regs); +		x86_pmu.drain_pebs(regs, &data);  		status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;  		/* @@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void)  	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ +	if (version >= 5) { +		x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; +		if (x86_pmu.intel_cap.anythread_deprecated) +			pr_cont(" AnyThread deprecated, "); +	} +  	/*  	 * Install the hw-cache-events table:  	 */ @@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void)  	x86_pmu.intel_ctrl |=  		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; +	/* AnyThread may be deprecated on arch perfmon v5 or later */ +	if (x86_pmu.intel_cap.anythread_deprecated) +		x86_pmu.format_attrs = intel_arch_formats_attr; +  	if (x86_pmu.event_constraints) {  		/*  		 * event on fixed counter2 (REF_CYCLES) only works on this diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 442e1ed4acd4..4eb7ee5fed72 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -107,14 +107,14 @@  MODULE_LICENSE("GPL");  #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\ -static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\ -				struct kobj_attribute *attr,	\ +static ssize_t __cstate_##_var##_show(struct device *dev,	\ +				struct device_attribute *attr,	\  				char *page)			\  {								\  	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\  	return sprintf(page, _format "\n");			\  }								\ -static struct kobj_attribute format_attr_##_var =		\ +static struct device_attribute format_attr_##_var =		\  	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)  static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..b47cc4226934 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void)  	rcu_read_lock();  	perf_prepare_sample(&header, &data, event, ®s); -	if (perf_output_begin(&handle, event, header.size * -			      (top - base - skip))) +	if (perf_output_begin(&handle, &data, event, +			      header.size * (top - base - skip)))  		goto unlock;  	for (at = base; at < top; at++) { @@ -670,7 +670,9 @@ unlock:  static inline void intel_pmu_drain_pebs_buffer(void)  { -	x86_pmu.drain_pebs(NULL); +	struct perf_sample_data data; + +	x86_pmu.drain_pebs(NULL, &data);  }  /* @@ -1719,23 +1721,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)  	return 0;  } -static void __intel_pmu_pebs_event(struct perf_event *event, -				   struct pt_regs *iregs, -				   void *base, void *top, -				   int bit, int count, -				   void (*setup_sample)(struct perf_event *, -						struct pt_regs *, -						void *, -						struct perf_sample_data *, -						struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, +		       struct pt_regs *iregs, +		       struct perf_sample_data *data, +		       void *base, void *top, +		       int bit, int count, +		       void (*setup_sample)(struct perf_event *, +					    struct pt_regs *, +					    void *, +					    struct perf_sample_data *, +					    struct pt_regs *))  {  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);  	struct hw_perf_event *hwc = &event->hw; -	struct perf_sample_data data;  	struct x86_perf_regs perf_regs;  	struct pt_regs *regs = &perf_regs.regs;  	void *at = get_next_pebs_record_by_bit(base, top, bit); -	struct pt_regs dummy_iregs; +	static struct pt_regs dummy_iregs;  	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {  		/* @@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  		iregs = &dummy_iregs;  	while (count > 1) { -		setup_sample(event, iregs, at, &data, regs); -		perf_event_output(event, &data, regs); +		setup_sample(event, iregs, at, data, regs); +		perf_event_output(event, data, regs);  		at += cpuc->pebs_record_size;  		at = get_next_pebs_record_by_bit(at, top, bit);  		count--;  	} -	setup_sample(event, iregs, at, &data, regs); +	setup_sample(event, iregs, at, data, regs);  	if (iregs == &dummy_iregs) {  		/*  		 * The PEBS records may be drained in the non-overflow context, @@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  		 * last record the same as other PEBS records, and doesn't  		 * invoke the generic overflow handler.  		 */ -		perf_event_output(event, &data, regs); +		perf_event_output(event, data, regs);  	} else {  		/*  		 * All but the last records are processed.  		 * The last one is left to be able to call the overflow handler.  		 */ -		if (perf_event_overflow(event, &data, regs)) +		if (perf_event_overflow(event, data, regs))  			x86_pmu_stop(event, 0);  	}  } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)  {  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);  	struct debug_store *ds = cpuc->ds; @@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)  		return;  	} -	__intel_pmu_pebs_event(event, iregs, at, top, 0, n, +	__intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,  			       setup_pebs_fixed_sample_data);  } @@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int  	}  } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data)  {  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);  	struct debug_store *ds = cpuc->ds; @@ -1942,14 +1945,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  		}  		if (counts[bit]) { -			__intel_pmu_pebs_event(event, iregs, base, +			__intel_pmu_pebs_event(event, iregs, data, base,  					       top, bit, counts[bit],  					       setup_pebs_fixed_sample_data);  		}  	}  } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)  {  	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)  		if (WARN_ON_ONCE(!event->attr.precise_ip))  			continue; -		__intel_pmu_pebs_event(event, iregs, base, +		__intel_pmu_pebs_event(event, iregs, data, base,  				       top, bit, counts[bit],  				       setup_pebs_adaptive_sample_data);  	} diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86d012b3e0b4..80d52cbe2fde 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -94,8 +94,8 @@ end:  	return map;  } -ssize_t uncore_event_show(struct kobject *kobj, -			  struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, +			  struct device_attribute *attr, char *buf)  {  	struct uncore_event_desc *event =  		container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 83d2a7d490e0..9efea154349d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -157,7 +157,7 @@ struct intel_uncore_box {  #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS	2  struct uncore_event_desc { -	struct kobj_attribute attr; +	struct device_attribute attr;  	const char *config;  }; @@ -179,8 +179,8 @@ struct pci2phy_map {  struct pci2phy_map *__find_pci2phy_map(int segment);  int uncore_pcibus_to_physid(struct pci_bus *bus); -ssize_t uncore_event_show(struct kobject *kobj, -			  struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, +			  struct device_attribute *attr, char *buf);  static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)  { @@ -201,14 +201,14 @@ extern int __uncore_max_dies;  }  #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\ -static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\ -				struct kobj_attribute *attr,		\ +static ssize_t __uncore_##_var##_show(struct device *dev,		\ +				struct device_attribute *attr,		\  				char *page)				\  {									\  	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\  	return sprintf(page, _format "\n");				\  }									\ -static struct kobj_attribute format_attr_##_var =			\ +static struct device_attribute format_attr_##_var =			\  	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)  static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 39e632ed6ca9..bbd1120ae161 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types {  static struct freerunning_counters snb_uncore_imc_freerunning[] = {  	[SNB_PCI_UNCORE_IMC_DATA_READS]		= { SNB_UNCORE_PCI_IMC_DATA_READS_BASE,  							0x0, 0x0, 1, 32 }, -	[SNB_PCI_UNCORE_IMC_DATA_READS]		= { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, +	[SNB_PCI_UNCORE_IMC_DATA_WRITES]	= { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE,  							0x0, 0x0, 1, 32 },  	[SNB_PCI_UNCORE_IMC_GT_REQUESTS]	= { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE,  							0x0, 0x0, 1, 32 }, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee2b9b9fc2a5..6a8edfe59b09 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -585,6 +585,7 @@ union perf_capabilities {  		u64     pebs_baseline:1;  		u64	perf_metrics:1;  		u64	pebs_output_pt_available:1; +		u64	anythread_deprecated:1;  	};  	u64	capabilities;  }; @@ -727,7 +728,7 @@ struct x86_pmu {  	int		pebs_record_size;  	int		pebs_buffer_size;  	int		max_pebs_events; -	void		(*drain_pebs)(struct pt_regs *regs); +	void		(*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);  	struct event_constraint *pebs_constraints;  	void		(*pebs_aliases)(struct perf_event *event);  	unsigned long	large_pebs_flags; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 7c0120e2e957..7dbbeaacd995 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {   * any other bit is reserved   */  #define RAPL_EVENT_MASK	0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)		\ -static ssize_t __rapl_##_var##_show(struct kobject *kobj,	\ -				struct kobj_attribute *attr,	\ -				char *page)			\ -{								\ -	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\ -	return sprintf(page, _format "\n");			\ -}								\ -static struct kobj_attribute format_attr_##_var =		\ -	__ATTR(_name, 0444, __rapl_##_var##_show, NULL) -  #define RAPL_CNTR_WIDTH 32  #define RAPL_EVENT_ATTR_STR(_name, v, str)					\ @@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = {  	.attrs = attrs_empty,  }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7");  static struct attribute *rapl_formats_attr[] = {  	&format_attr_event.attr,  	NULL, diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 40e0e322161d..284e73661a18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -273,11 +273,15 @@ void __init hv_apic_init(void)  		pr_info("Hyper-V: Using enlightened APIC (%s mode)",  			x2apic_enabled() ? "x2apic" : "xapic");  		/* -		 * With x2apic, architectural x2apic MSRs are equivalent to the -		 * respective synthetic MSRs, so there's no need to override -		 * the apic accessors.  The only exception is -		 * hv_apic_eoi_write, because it benefits from lazy EOI when -		 * available, but it works for both xapic and x2apic modes. +		 * When in x2apic mode, don't use the Hyper-V specific APIC +		 * accessors since the field layout in the ICR register is +		 * different in x2apic mode. Furthermore, the architectural +		 * x2apic MSRs function just as well as the Hyper-V +		 * synthetic APIC MSRs, so there's no benefit in having +		 * separate Hyper-V accessors for x2apic mode. The only +		 * exception is hv_apic_eoi_write, because it benefits from +		 * lazy EOI when available, but the same accessor works for +		 * both xapic and x2apic because the field layout is the same.  		 */  		apic_set_eoi_write(hv_apic_eoi_write);  		if (!x2apic_enabled()) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d44858b69353..7e5f33a0d0e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -639,6 +639,7 @@ struct kvm_vcpu_arch {  	int cpuid_nent;  	struct kvm_cpuid_entry2 *cpuid_entries; +	unsigned long cr3_lm_rsvd_bits;  	int maxphyaddr;  	int max_tdp_level; @@ -1655,6 +1656,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);  int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v);  int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);  int kvm_cpu_get_interrupt(struct kvm_vcpu *v);  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e039a933aca3..29dd27b5a339 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,  static inline void __sti_mwait(unsigned long eax, unsigned long ecx)  { -	trace_hardirqs_on(); -  	mds_idle_clear_cpu_buffers();  	/* "mwait %eax, %ecx;" */  	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx {  	struct {  		unsigned int num_counters_fixed:5;  		unsigned int bit_width_fixed:8; -		unsigned int reserved:19; +		unsigned int reserved1:2; +		unsigned int anythread_deprecated:1; +		unsigned int reserved2:16;  	} split;  	unsigned int full;  }; diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6bfc878f6771..6a9ccc1b2be5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -28,4 +28,14 @@  #endif  #endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +#endif /* __ASSEMBLY__ */ +  #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@  #ifndef _ASM_X86_UV_UV_H  #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> -  enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; -  #ifdef CONFIG_X86_UV  #include <linux/efi.h> @@ -44,10 +38,6 @@ static inline int is_uv_system(void)	{ return 0; }  static inline int is_uv_hubbed(int uv)	{ return 0; }  static inline void uv_cpu_init(void)	{ }  static inline void uv_system_init(void)	{ } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, -		    const struct flush_tlb_info *info) -{ return cpumask; }  #endif	/* X86_UV */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 812e9b4c1114..950afebfba88 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -32,6 +32,7 @@  #define KVM_FEATURE_POLL_CONTROL	12  #define KVM_FEATURE_PV_SCHED_YIELD	13  #define KVM_FEATURE_ASYNC_PF_INT	14 +#define KVM_FEATURE_MSI_EXT_DEST_ID	15  #define KVM_HINTS_REALTIME      0 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4adbe65afe23..2400ad62f330 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -807,6 +807,15 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)  	temp_mm_state_t temp_state;  	lockdep_assert_irqs_disabled(); + +	/* +	 * Make sure not to be in TLB lazy mode, as otherwise we'll end up +	 * with a stale address space WITHOUT being in lazy mode after +	 * restoring the previous mm. +	 */ +	if (this_cpu_read(cpu_tlbstate.is_lazy)) +		leave_mm(smp_processor_id()); +  	temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);  	switch_mm_irqs_off(NULL, mm, current); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 714233cee0b5..1b98f8c12b96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -33,7 +33,7 @@ static union uvh_apicid		uvh_apicid;  static int			uv_node_id;  /* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */ -static u8 uv_archtype[UV_AT_SIZE]; +static u8 uv_archtype[UV_AT_SIZE + 1];  static u8 oem_id[ACPI_OEM_ID_SIZE + 1];  static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; @@ -290,6 +290,9 @@ static void __init uv_stringify(int len, char *to, char *from)  {  	/* Relies on 'to' being NULL chars so result will be NULL terminated */  	strncpy(to, from, len-1); + +	/* Trim trailing spaces */ +	(void)strim(to);  }  /* Find UV arch type entry in UVsystab */ @@ -317,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr)  	if (n > 0 && n < sizeof(uv_ate->archtype)) {  		pr_info("UV: UVarchtype received from BIOS\n"); -		uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype); +		uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype);  		return 1;  	}  	return 0; @@ -366,7 +369,7 @@ static int __init early_get_arch_type(void)  	return ret;  } -static int __init uv_set_system_type(char *_oem_id) +static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)  {  	/* Save OEM_ID passed from ACPI MADT */  	uv_stringify(sizeof(oem_id), oem_id, _oem_id); @@ -375,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id)  	if (!early_get_arch_type())  		/* If not use OEM ID for UVarchtype */ -		uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id); +		uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id);  	/* Check if not hubbed */  	if (strncmp(uv_archtype, "SGI", 3) != 0) { @@ -386,13 +389,23 @@ static int __init uv_set_system_type(char *_oem_id)  			/* (Not hubless), not a UV */  			return 0; +		/* Is UV hubless system */ +		uv_hubless_system = 0x01; + +		/* UV5 Hubless */ +		if (strncmp(uv_archtype, "NSGI5", 5) == 0) +			uv_hubless_system |= 0x20; +  		/* UV4 Hubless: CH */ -		if (strncmp(uv_archtype, "NSGI4", 5) == 0) -			uv_hubless_system = 0x11; +		else if (strncmp(uv_archtype, "NSGI4", 5) == 0) +			uv_hubless_system |= 0x10;  		/* UV3 Hubless: UV300/MC990X w/o hub */  		else -			uv_hubless_system = 0x9; +			uv_hubless_system |= 0x8; + +		/* Copy APIC type */ +		uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);  		pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",  			oem_id, oem_table_id, uv_system_type, uv_hubless_system); @@ -456,7 +469,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)  	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;  	/* If not UV, return. */ -	if (likely(uv_set_system_type(_oem_id) == 0)) +	if (uv_set_system_type(_oem_id, _oem_table_id) == 0)  		return 0;  	/* Save and Decode OEM Table ID */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3f0db463f96..d41b70fe4918 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)  	if (boot_cpu_has(X86_FEATURE_IBPB)) {  		setup_force_cpu_cap(X86_FEATURE_USE_IBPB); +		spectre_v2_user_ibpb = mode;  		switch (cmd) {  		case SPECTRE_V2_USER_CMD_FORCE:  		case SPECTRE_V2_USER_CMD_PRCTL_IBPB:  		case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:  			static_branch_enable(&switch_mm_always_ibpb); +			spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;  			break;  		case SPECTRE_V2_USER_CMD_PRCTL:  		case SPECTRE_V2_USER_CMD_AUTO: @@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)  		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",  			static_key_enabled(&switch_mm_always_ibpb) ?  			"always-on" : "conditional"); - -		spectre_v2_user_ibpb = mode;  	}  	/* @@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)  	return 0;  } +static bool is_spec_ib_user_controlled(void) +{ +	return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || +		spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || +		spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || +		spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} +  static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)  {  	switch (ctrl) { @@ -1261,16 +1269,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)  		if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&  		    spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)  			return 0; +  		/* -		 * Indirect branch speculation is always disabled in strict -		 * mode. It can neither be enabled if it was force-disabled -		 * by a  previous prctl call. +		 * With strict mode for both IBPB and STIBP, the instruction +		 * code paths avoid checking this task flag and instead, +		 * unconditionally run the instruction. However, STIBP and IBPB +		 * are independent and either can be set to conditionally +		 * enabled regardless of the mode of the other. +		 * +		 * If either is set to conditional, allow the task flag to be +		 * updated, unless it was force-disabled by a previous prctl +		 * call. Currently, this is possible on an AMD CPU which has the +		 * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the +		 * kernel is booted with 'spectre_v2_user=seccomp', then +		 * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and +		 * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.  		 */ -		if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || -		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || -		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || +		if (!is_spec_ib_user_controlled() ||  		    task_spec_ib_force_disable(task))  			return -EPERM; +  		task_clear_spec_ib_disable(task);  		task_update_spec_tif(task);  		break; @@ -1283,10 +1301,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)  		if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&  		    spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)  			return -EPERM; -		if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || -		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || -		    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + +		if (!is_spec_ib_user_controlled())  			return 0; +  		task_set_spec_ib_disable(task);  		if (ctrl == PR_SPEC_FORCE_DISABLE)  			task_set_spec_ib_force_disable(task); @@ -1351,20 +1369,17 @@ static int ib_prctl_get(struct task_struct *task)  	if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&  	    spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)  		return PR_SPEC_ENABLE; -	else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || -	    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || -	    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) -		return PR_SPEC_DISABLE; -	else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || -	    spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || -	    spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || -	    spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { +	else if (is_spec_ib_user_controlled()) {  		if (task_spec_ib_force_disable(task))  			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;  		if (task_spec_ib_disable(task))  			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;  		return PR_SPEC_PRCTL | PR_SPEC_ENABLE; -	} else +	} else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || +	    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || +	    spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) +		return PR_SPEC_DISABLE; +	else  		return PR_SPEC_NOT_AFFECTED;  } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..32b7099e3511 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs)  	 * When there's any problem use only local no_way_out state.  	 */  	if (!lmce) { -		if (mce_end(order) < 0) -			no_way_out = worst >= MCE_PANIC_SEVERITY; +		if (mce_end(order) < 0) { +			if (!no_way_out) +				no_way_out = worst >= MCE_PANIC_SEVERITY; +		}  	} else {  		/*  		 * If there was a fatal machine check we should have diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6a99535d7f37..7e8e07bddd5f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev  	return find_matching_signature(mc, csig, cpf);  } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, -			      unsigned long sig) -{ -	unsigned long total_size = get_totalsize(mc_header); -	unsigned long data_size = get_datasize(mc_header); -	struct extended_sigtable *ext_header; -	unsigned int fam_ucode, model_ucode; -	struct extended_signature *ext_sig; -	unsigned int fam, model; -	int ext_sigcount, i; - -	fam   = x86_family(sig); -	model = x86_model(sig); - -	fam_ucode   = x86_family(mc_header->sig); -	model_ucode = x86_model(mc_header->sig); - -	if (fam == fam_ucode && model == model_ucode) -		return true; - -	/* Look for ext. headers: */ -	if (total_size <= data_size + MC_HEADER_SIZE) -		return false; - -	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE; -	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE; -	ext_sigcount = ext_header->count; - -	for (i = 0; i < ext_sigcount; i++) { -		fam_ucode   = x86_family(ext_sig->sig); -		model_ucode = x86_model(ext_sig->sig); - -		if (fam == fam_ucode && model == model_ucode) -			return true; - -		ext_sig++; -	} -	return false; -} -  static struct ucode_patch *memdup_patch(void *data, unsigned int size)  {  	struct ucode_patch *p; @@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size)  	return p;  } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size)  {  	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;  	struct ucode_patch *iter, *tmp, *p = NULL; @@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size)  	if (!p)  		return; +	if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) +		return; +  	/*  	 * Save for early loading. On 32-bit, that needs to be a physical  	 * address as the APs are running from physical addresses, before @@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)  		size -= mc_size; -		if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { +		if (!find_matching_signature(data, uci->cpu_sig.sig, +					     uci->cpu_sig.pf)) {  			data += mc_size;  			continue;  		}  		if (save) { -			save_microcode_patch(data, mc_size); +			save_microcode_patch(uci, data, mc_size);  			goto next;  		} @@ -483,14 +440,14 @@ static void show_saved_mc(void)   * Save this microcode patch. It will be loaded early when a CPU is   * hot-added or resumes.   */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)  {  	/* Synchronization during CPU hotplug. */  	static DEFINE_MUTEX(x86_cpu_microcode_mutex);  	mutex_lock(&x86_cpu_microcode_mutex); -	save_microcode_patch(mc, size); +	save_microcode_patch(uci, mc, size);  	show_saved_mc();  	mutex_unlock(&x86_cpu_microcode_mutex); @@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)  	 * permanent memory. So it will be loaded early when a CPU is hot added  	 * or resumes.  	 */ -	save_mc_for_early(new_mc, new_mc_size); +	save_mc_for_early(uci, new_mc, new_mc_size);  	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",  		 cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index af323e2e3100..6f4ca4bea625 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -507,6 +507,24 @@ unlock:  	return ret ?: nbytes;  } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ +	kernfs_put(rdtgrp->kn); +	kfree(rdtgrp); +} +  struct task_move_callback {  	struct callback_head	work;  	struct rdtgroup		*rdtgrp; @@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head)  	    (rdtgrp->flags & RDT_DELETED)) {  		current->closid = 0;  		current->rmid = 0; -		kfree(rdtgrp); +		rdtgroup_remove(rdtgrp);  	}  	if (unlikely(current->flags & PF_EXITING)) @@ -1769,7 +1787,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,  	if (IS_ERR(kn_subdir))  		return PTR_ERR(kn_subdir); -	kernfs_get(kn_subdir);  	ret = rdtgroup_kn_set_ugid(kn_subdir);  	if (ret)  		return ret; @@ -1792,7 +1809,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)  	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);  	if (IS_ERR(kn_info))  		return PTR_ERR(kn_info); -	kernfs_get(kn_info);  	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);  	if (ret) @@ -1813,12 +1829,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)  			goto out_destroy;  	} -	/* -	 * This extra ref will be put in kernfs_remove() and guarantees -	 * that @rdtgrp->kn is always accessible. -	 */ -	kernfs_get(kn_info); -  	ret = rdtgroup_kn_set_ugid(kn_info);  	if (ret)  		goto out_destroy; @@ -1847,12 +1857,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,  	if (dest_kn)  		*dest_kn = kn; -	/* -	 * This extra ref will be put in kernfs_remove() and guarantees -	 * that @rdtgrp->kn is always accessible. -	 */ -	kernfs_get(kn); -  	ret = rdtgroup_kn_set_ugid(kn);  	if (ret)  		goto out_destroy; @@ -2079,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)  		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)  			rdtgroup_pseudo_lock_remove(rdtgrp);  		kernfs_unbreak_active_protection(kn); -		kernfs_put(rdtgrp->kn); -		kfree(rdtgrp); +		rdtgroup_remove(rdtgrp);  	} else {  		kernfs_unbreak_active_protection(kn);  	} @@ -2139,13 +2142,11 @@ static int rdt_get_tree(struct fs_context *fc)  					  &kn_mongrp);  		if (ret < 0)  			goto out_info; -		kernfs_get(kn_mongrp);  		ret = mkdir_mondata_all(rdtgroup_default.kn,  					&rdtgroup_default, &kn_mondata);  		if (ret < 0)  			goto out_mongrp; -		kernfs_get(kn_mondata);  		rdtgroup_default.mon.mon_data_kn = kn_mondata;  	} @@ -2357,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)  		if (atomic_read(&sentry->waitcount) != 0)  			sentry->flags = RDT_DELETED;  		else -			kfree(sentry); +			rdtgroup_remove(sentry);  	}  } @@ -2399,7 +2400,7 @@ static void rmdir_all_sub(void)  		if (atomic_read(&rdtgrp->waitcount) != 0)  			rdtgrp->flags = RDT_DELETED;  		else -			kfree(rdtgrp); +			rdtgroup_remove(rdtgrp);  	}  	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */  	update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2499,11 +2500,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,  	if (IS_ERR(kn))  		return PTR_ERR(kn); -	/* -	 * This extra ref will be put in kernfs_remove() and guarantees -	 * that kn is always accessible. -	 */ -	kernfs_get(kn);  	ret = rdtgroup_kn_set_ugid(kn);  	if (ret)  		goto out_destroy; @@ -2838,8 +2834,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,  	/*  	 * kernfs_remove() will drop the reference count on "kn" which  	 * will free it. But we still need it to stick around for the -	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference -	 * here, which will be dropped inside rdtgroup_kn_unlock(). +	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here, +	 * which will be dropped by kernfs_put() in rdtgroup_remove().  	 */  	kernfs_get(kn); @@ -2880,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,  out_idfree:  	free_rmid(rdtgrp->mon.rmid);  out_destroy: +	kernfs_put(rdtgrp->kn);  	kernfs_remove(rdtgrp->kn);  out_free_rgrp:  	kfree(rdtgrp); @@ -2892,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)  {  	kernfs_remove(rgrp->kn);  	free_rmid(rgrp->mon.rmid); -	kfree(rgrp); +	rdtgroup_remove(rgrp);  }  /* @@ -3049,11 +3046,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,  	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));  	list_del(&rdtgrp->mon.crdtgrp_list); -	/* -	 * one extra hold on this, will drop when we kfree(rdtgrp) -	 * in rdtgroup_kn_unlock() -	 */ -	kernfs_get(kn);  	kernfs_remove(rdtgrp->kn);  	return 0; @@ -3065,11 +3057,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,  	rdtgrp->flags = RDT_DELETED;  	list_del(&rdtgrp->rdtgroup_list); -	/* -	 * one extra hold on this, will drop when we kfree(rdtgrp) -	 * in rdtgroup_kn_unlock() -	 */ -	kernfs_get(kn);  	kernfs_remove(rdtgrp->kn);  	return 0;  } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 25c06b67e7e0..97aa900386cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,  	if (!user_mode(regs))  		return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); +	/* The user space code from other tasks cannot be accessed. */ +	if (regs != task_pt_regs(current)) +		return -EPERM;  	/*  	 * Make sure userspace isn't trying to trick us into dumping kernel  	 * memory by pointing the userspace instruction pointer at it. @@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,  	if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))  		return -EINVAL; +	/* +	 * Even if named copy_from_user_nmi() this can be invoked from +	 * other contexts and will not try to resolve a pagefault, which is +	 * the correct thing to do here as this code can be called from any +	 * context. +	 */  	return copy_from_user_nmi(buf, (void __user *)src, nbytes);  } @@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)  	u8 opcodes[OPCODE_BUFSIZE];  	unsigned long prologue = regs->ip - PROLOGUE_SIZE; -	if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { -		printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", -		       loglvl, prologue); -	} else { +	switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { +	case 0:  		printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"  		       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,  		       opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1); +		break; +	case -EPERM: +		/* No access to the user space stack of other tasks. Ignore. */ +		break; +	default: +		printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", +		       loglvl, prologue); +		break;  	}  } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7eb2a1c87969..3c417734790f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)  	/* Setup early boot stage 4-/5-level pagetables. */  	addq	phys_base(%rip), %rax + +	/* +	 * For SEV guests: Verify that the C-bit is correct. A malicious +	 * hypervisor could lie about the C-bit position to perform a ROP +	 * attack on the guest by writing to the unencrypted stack and wait for +	 * the next RET instruction. +	 * %rsi carries pointer to realmode data and is callee-clobbered. Save +	 * and restore it. +	 */ +	pushq	%rsi +	movq	%rax, %rdi +	call	sev_verify_cbit +	popq	%rsi + +	/* Switch to new page-table */  	movq	%rax, %cr3  	/* Ensure I am executing from virtual addresses */ @@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)  SYM_CODE_END(secondary_startup_64)  #include "verify_cpu.S" +#include "sev_verify_cbit.S"  #ifdef CONFIG_HOTPLUG_CPU  /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 57c2ecf43134..ce831f9448e7 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -200,8 +200,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,  	params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;  	/* Copying screen_info will do? */ -	memcpy(¶ms->screen_info, &boot_params.screen_info, -				sizeof(struct screen_info)); +	memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info));  	/* Fill in memsize later */  	params->screen_info.ext_mem_k = 0; diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..f9e5352b3bef 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task)  }  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  {  	regs_user->regs = task_pt_regs(current);  	regs_user->abi = perf_reg_abi(current); @@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task)  		return PERF_SAMPLE_REGS_ABI_64;  } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); +  void perf_get_regs_user(struct perf_regs *regs_user, -			struct pt_regs *regs, -			struct pt_regs *regs_user_copy) +			struct pt_regs *regs)  { +	struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs);  	struct pt_regs *user_regs = task_pt_regs(current); +	if (!in_nmi()) { +		regs_user->regs = user_regs; +		regs_user->abi = perf_reg_abi(current); +		return; +	} +  	/*  	 * If we're in an NMI that interrupted task_pt_regs setup, then  	 * we can't sample user regs at all.  This check isn't really diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba4593a913fa..145a7ac0c19a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -685,7 +685,7 @@ void arch_cpu_idle(void)   */  void __cpuidle default_idle(void)  { -	safe_halt(); +	raw_safe_halt();  }  #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)  EXPORT_SYMBOL(default_idle); @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)  /*   * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power   * states (local apic timer and TSC stop). + * + * XXX this function is completely buggered vs RCU and tracing.   */  static void amd_e400_idle(void)  { @@ -757,9 +759,9 @@ static void amd_e400_idle(void)  	 * The switch back from broadcast mode needs to be called with  	 * interrupts disabled.  	 */ -	local_irq_disable(); +	raw_local_irq_disable();  	tick_broadcast_exit(); -	local_irq_enable(); +	raw_local_irq_enable();  }  /* @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)  		if (!need_resched())  			__sti_mwait(0, 0);  		else -			local_irq_enable(); +			raw_local_irq_enable();  	} else { -		local_irq_enable(); +		raw_local_irq_enable();  	}  	__current_clr_polling();  } diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 5f83ccaab877..7d04b356d44d 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)  		goto fail;  	regs->dx = val >> 32; +	/* +	 * This is a VC handler and the #VC is only raised when SEV-ES is +	 * active, which means SEV must be active too. Do sanity checks on the +	 * CPUID results to make sure the hypervisor does not trick the kernel +	 * into the no-sev path. This could map sensitive data unencrypted and +	 * make it accessible to the hypervisor. +	 * +	 * In particular, check for: +	 *	- Hypervisor CPUID bit +	 *	- Availability of CPUID leaf 0x8000001f +	 *	- SEV CPUID bit. +	 * +	 * The hypervisor might still report the wrong C-bit position, but this +	 * can't be checked here. +	 */ + +	if ((fn == 1 && !(regs->cx & BIT(31)))) +		/* Hypervisor bit */ +		goto fail; +	else if (fn == 0x80000000 && (regs->ax < 0x8000001f)) +		/* SEV leaf check */ +		goto fail; +	else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) +		/* SEV bit */ +		goto fail; +  	/* Skip over the CPUID two-byte opcode */  	regs->ip += 2; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 4a96726fbaf8..0bd1a0fc587e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -374,8 +374,8 @@ fault:  	return ES_EXCEPTION;  } -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, -				 unsigned long vaddr, phys_addr_t *paddr) +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, +					   unsigned long vaddr, phys_addr_t *paddr)  {  	unsigned long va = (unsigned long)vaddr;  	unsigned int level; @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,  		if (user_mode(ctxt->regs))  			ctxt->fi.error_code |= X86_PF_USER; -		return false; +		return ES_EXCEPTION;  	} +	if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) +		/* Emulated MMIO to/from encrypted memory not supported */ +		return ES_UNSUPPORTED; +  	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;  	pa |= va & ~page_level_mask(level);  	*paddr = pa; -	return true; +	return ES_OK;  }  /* Include code shared with pre-decompression boot stage */ @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,  {  	u64 exit_code, exit_info_1, exit_info_2;  	unsigned long ghcb_pa = __pa(ghcb); +	enum es_result res;  	phys_addr_t paddr;  	void __user *ref; @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,  	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; -	if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { -		if (!read) +	res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); +	if (res != ES_OK) { +		if (res == ES_EXCEPTION && !read)  			ctxt->fi.error_code |= X86_PF_WRITE; -		return ES_EXCEPTION; +		return res;  	}  	exit_info_1 = paddr; diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S new file mode 100644 index 000000000000..ee04941a6546 --- /dev/null +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + *	sev_verify_cbit.S - Code for verification of the C-bit position reported + *			    by the Hypervisor when running with SEV enabled. + * + *	Copyright (c) 2020  Joerg Roedel (jroedel@suse.de) + * + * sev_verify_cbit() is called before switching to a new long-mode page-table + * at boot. + * + * Verify that the C-bit position is correct by writing a random value to + * an encrypted memory location while on the current page-table. Then it + * switches to the new page-table to verify the memory content is still the + * same. After that it switches back to the current page-table and when the + * check succeeded it returns. If the check failed the code invalidates the + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to + * make sure no interrupt or exception can get the CPU out of the hlt loop. + * + * New page-table pointer is expected in %rdi (first parameter) + * + */ +SYM_FUNC_START(sev_verify_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT +	/* First check if a C-bit was detected */ +	movq	sme_me_mask(%rip), %rsi +	testq	%rsi, %rsi +	jz	3f + +	/* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */ +	movq	sev_status(%rip), %rsi +	testq	%rsi, %rsi +	jz	3f + +	/* Save CR4 in %rsi */ +	movq	%cr4, %rsi + +	/* Disable Global Pages */ +	movq	%rsi, %rdx +	andq	$(~X86_CR4_PGE), %rdx +	movq	%rdx, %cr4 + +	/* +	 * Verified that running under SEV - now get a random value using +	 * RDRAND. This instruction is mandatory when running as an SEV guest. +	 * +	 * Don't bail out of the loop if RDRAND returns errors. It is better to +	 * prevent forward progress than to work with a non-random value here. +	 */ +1:	rdrand	%rdx +	jnc	1b + +	/* Store value to memory and keep it in %rdx */ +	movq	%rdx, sev_check_data(%rip) + +	/* Backup current %cr3 value to restore it later */ +	movq	%cr3, %rcx + +	/* Switch to new %cr3 - This might unmap the stack */ +	movq	%rdi, %cr3 + +	/* +	 * Compare value in %rdx with memory location. If C-bit is incorrect +	 * this would read the encrypted data and make the check fail. +	 */ +	cmpq	%rdx, sev_check_data(%rip) + +	/* Restore old %cr3 */ +	movq	%rcx, %cr3 + +	/* Restore previous CR4 */ +	movq	%rsi, %cr4 + +	/* Check CMPQ result */ +	je	3f + +	/* +	 * The check failed, prevent any forward progress to prevent ROP +	 * attacks, invalidate the stack and go into a hlt loop. +	 */ +	xorq	%rsp, %rsp +	subq	$0x1000, %rsp +2:	hlt +	jmp 2b +3: +#endif +	/* Return page-table pointer */ +	movq	%rdi, %rax +	ret +SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 992fb1415c0f..ae64f98ec2ab 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,16 +514,10 @@ int tboot_force_iommu(void)  	if (!tboot_enabled())  		return 0; -	if (intel_iommu_tboot_noforce) -		return 1; - -	if (no_iommu || swiotlb || dmar_disabled) +	if (no_iommu || dmar_disabled)  		pr_warn("Forcing Intel-IOMMU to enabled\n");  	dmar_disabled = 0; -#ifdef CONFIG_SWIOTLB -	swiotlb = 0; -#endif  	no_iommu = 0;  	return 1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3c70fb34028b..e19df6cde35d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -793,19 +793,6 @@ static __always_inline unsigned long debug_read_clear_dr6(void)  	set_debugreg(DR6_RESERVED, 6);  	dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ -	/* -	 * Clear the virtual DR6 value, ptrace routines will set bits here for -	 * things we want signals for. -	 */ -	current->thread.virtual_dr6 = 0; - -	/* -	 * The SDM says "The processor clears the BTF flag when it -	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep -	 * TIF_BLOCKSTEP in sync with the hardware BTF flag. -	 */ -	clear_thread_flag(TIF_BLOCKSTEP); -  	return dr6;  } @@ -873,6 +860,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,  	 */  	WARN_ON_ONCE(user_mode(regs)); +	if (test_thread_flag(TIF_BLOCKSTEP)) { +		/* +		 * The SDM says "The processor clears the BTF flag when it +		 * generates a debug exception." but PTRACE_BLOCKSTEP requested +		 * it for userspace, but we just took a kernel #DB, so re-set +		 * BTF. +		 */ +		unsigned long debugctl; + +		rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +		debugctl |= DEBUGCTLMSR_BTF; +		wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +	} +  	/*  	 * Catch SYSENTER with TF set and clear DR_STEP. If this hit a  	 * watchpoint at the same time then that will still be handled. @@ -936,6 +937,22 @@ static __always_inline void exc_debug_user(struct pt_regs *regs,  	instrumentation_begin();  	/* +	 * Start the virtual/ptrace DR6 value with just the DR_STEP mask +	 * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits. +	 * +	 * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6) +	 * even if it is not the result of PTRACE_SINGLESTEP. +	 */ +	current->thread.virtual_dr6 = (dr6 & DR_STEP); + +	/* +	 * The SDM says "The processor clears the BTF flag when it +	 * generates a debug exception."  Clear TIF_BLOCKSTEP to keep +	 * TIF_BLOCKSTEP in sync with the hardware BTF flag. +	 */ +	clear_thread_flag(TIF_BLOCKSTEP); + +	/*  	 * If dr6 has no reason to give us about the origin of this trap,  	 * then it's very likely the result of an icebp/int01 trap.  	 * User wants a sigtrap for that. diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 6a339ce328e0..73f800100066 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -321,19 +321,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address);  unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)  { -	struct task_struct *task = state->task; -  	if (unwind_done(state))  		return NULL;  	if (state->regs)  		return &state->regs->ip; -	if (task != current && state->sp == task->thread.sp) { -		struct inactive_task_frame *frame = (void *)task->thread.sp; -		return &frame->ret_addr; -	} -  	if (state->sp)  		return (unsigned long *)state->sp - 1; @@ -663,7 +656,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,  	} else {  		struct inactive_task_frame *frame = (void *)task->thread.sp; -		state->sp = task->thread.sp; +		state->sp = task->thread.sp + sizeof(*frame);  		state->bp = READ_ONCE_NOCHECK(frame->bp);  		state->ip = READ_ONCE_NOCHECK(frame->ret_addr);  		state->signal = (void *)state->ip == ret_from_fork; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..83637a2ff605 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)  	return 0;  } +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ +	struct kvm_cpuid_entry2 *best; + +	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + +	/* +	 * save the feature bitmap to avoid cpuid lookup for every PV +	 * operation +	 */ +	if (best) +		vcpu->arch.pv_cpuid.features = best->eax; +} +  void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)  {  	struct kvm_cpuid_entry2 *best; @@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)  		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))  		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); -	/* -	 * save the feature bitmap to avoid cpuid lookup for every PV -	 * operation -	 */ -	if (best) -		vcpu->arch.pv_cpuid.features = best->eax; -  	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {  		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);  		if (best) @@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)  		vcpu->arch.guest_supported_xcr0 =  			(best->eax | ((u64)best->edx << 32)) & supported_xcr0; +	kvm_update_pv_runtime(vcpu); +  	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);  	kvm_mmu_reset_context(vcpu); @@ -169,6 +178,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)  	vcpu->arch.cr4_guest_rsvd_bits =  	    __cr4_reserved_bits(guest_cpuid_has, vcpu); +	vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); +  	/* Invoke the vendor callback only after the above state is updated. */  	kvm_x86_ops.vcpu_after_set_cpuid(vcpu);  } @@ -672,7 +683,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)  		edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);  		edx.split.bit_width_fixed = cap.bit_width_fixed; -		edx.split.reserved = 0; +		edx.split.anythread_deprecated = 1; +		edx.split.reserved1 = 0; +		edx.split.reserved2 = 0;  		entry->eax = eax.full;  		entry->ebx = cap.events_mask; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index bf8577947ed2..f7a6e8f83783 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;  void kvm_set_cpu_caps(void);  void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);  struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,  					      u32 function, u32 index);  int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d917eb70319..56cae1ff9e3f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4046,6 +4046,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)  	return X86EMUL_CONTINUE;  } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ +	/* emulating clflushopt regardless of cpuid */ +	return X86EMUL_CONTINUE; +} +  static int em_movsxd(struct x86_emulate_ctxt *ctxt)  {  	ctxt->dst.val = (s32) ctxt->src.val; @@ -4585,7 +4591,7 @@ static const struct opcode group11[] = {  };  static const struct gprefix pfx_0f_ae_7 = { -	I(SrcMem | ByteOp, em_clflush), N, N, N, +	I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,  };  static const struct group_dual group15 = { { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 99d118ffc67d..814698e5b152 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -40,29 +40,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v)   * check if there is pending interrupt from   * non-APIC source without intack.   */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ -	u8 accept = kvm_apic_accept_pic_intr(v); - -	if (accept) { -		if (irqchip_split(v->kvm)) -			return pending_userspace_extint(v); -		else -			return v->kvm->arch.vpic->output; -	} else -		return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v)  {  	/* -	 * FIXME: interrupt.injected represents an interrupt that it's +	 * FIXME: interrupt.injected represents an interrupt whose  	 * side-effects have already been applied (e.g. bit from IRR  	 * already moved to ISR). Therefore, it is incorrect to rely  	 * on interrupt.injected to know if there is a pending @@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)  	if (!lapic_in_kernel(v))  		return v->arch.interrupt.injected; +	if (!kvm_apic_accept_pic_intr(v)) +		return 0; + +	if (irqchip_split(v->kvm)) +		return pending_userspace_extint(v); +	else +		return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{  	if (kvm_cpu_has_extint(v))  		return 1; @@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);   */  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)  { -	/* -	 * FIXME: interrupt.injected represents an interrupt that it's -	 * side-effects have already been applied (e.g. bit from IRR -	 * already moved to ISR). Therefore, it is incorrect to rely -	 * on interrupt.injected to know if there is a pending -	 * interrupt in the user-mode LAPIC. -	 * This leads to nVMX/nSVM not be able to distinguish -	 * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on -	 * pending interrupt or should re-inject an injected -	 * interrupt. -	 */ -	if (!lapic_in_kernel(v)) -		return v->arch.interrupt.injected; -  	if (kvm_cpu_has_extint(v))  		return 1; @@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);   */  static int kvm_cpu_get_extint(struct kvm_vcpu *v)  { -	if (kvm_cpu_has_extint(v)) { -		if (irqchip_split(v->kvm)) { -			int vector = v->arch.pending_external_vector; - -			v->arch.pending_external_vector = -1; -			return vector; -		} else -			return kvm_pic_read_irq(v->kvm); /* PIC */ -	} else +	if (!kvm_cpu_has_extint(v)) { +		WARN_ON(!lapic_in_kernel(v));  		return -1; +	} + +	if (!lapic_in_kernel(v)) +		return v->arch.interrupt.nr; + +	if (irqchip_split(v->kvm)) { +		int vector = v->arch.pending_external_vector; + +		v->arch.pending_external_vector = -1; +		return vector; +	} else +		return kvm_pic_read_irq(v->kvm); /* PIC */  }  /* @@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)   */  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)  { -	int vector; - -	if (!lapic_in_kernel(v)) -		return v->arch.interrupt.nr; - -	vector = kvm_cpu_get_extint(v); - +	int vector = kvm_cpu_get_extint(v);  	if (vector != -1)  		return vector;			/* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f2..86c33d53c90a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)  	struct kvm_lapic *apic = vcpu->arch.apic;  	u32 ppr; -	if (!kvm_apic_hw_enabled(apic)) +	if (!kvm_apic_present(vcpu))  		return -1;  	__apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17587f496ec7..7a6ae9e90bd7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -225,7 +225,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)  {  	u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; -	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) +	gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)  	       & shadow_nonpresent_or_rsvd_mask;  	return gpa >> PAGE_SHIFT; @@ -591,15 +591,15 @@ static u64 mmu_spte_get_lockless(u64 *sptep)  static u64 restore_acc_track_spte(u64 spte)  {  	u64 new_spte = spte; -	u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) -			 & shadow_acc_track_saved_bits_mask; +	u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) +			 & SHADOW_ACC_TRACK_SAVED_BITS_MASK;  	WARN_ON_ONCE(spte_ad_enabled(spte));  	WARN_ON_ONCE(!is_access_track_spte(spte));  	new_spte &= ~shadow_acc_track_mask; -	new_spte &= ~(shadow_acc_track_saved_bits_mask << -		      shadow_acc_track_saved_bits_shift); +	new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << +		      SHADOW_ACC_TRACK_SAVED_BITS_SHIFT);  	new_spte |= saved_bits;  	return new_spte; @@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,  	} else {  		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);  		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); -		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { -			desc = desc->more; +		while (desc->sptes[PTE_LIST_EXT-1]) {  			count += PTE_LIST_EXT; -		} -		if (desc->sptes[PTE_LIST_EXT-1]) { -			desc->more = mmu_alloc_pte_list_desc(vcpu); + +			if (!desc->more) { +				desc->more = mmu_alloc_pte_list_desc(vcpu); +				desc = desc->more; +				break; +			}  			desc = desc->more;  		}  		for (i = 0; desc->sptes[i]; ++i) @@ -3515,7 +3517,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)  {  	u64 sptes[PT64_ROOT_MAX_LEVEL];  	struct rsvd_bits_validate *rsvd_check; -	int root = vcpu->arch.mmu->root_level; +	int root = vcpu->arch.mmu->shadow_root_level;  	int leaf;  	int level;  	bool reserved = false; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d9c5665a55e9..fcac2cac78fe 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -55,7 +55,7 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)  	mask |= shadow_mmio_value | access;  	mask |= gpa | shadow_nonpresent_or_rsvd_mask;  	mask |= (gpa & shadow_nonpresent_or_rsvd_mask) -		<< shadow_nonpresent_or_rsvd_mask_len; +		<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;  	return mask;  } @@ -231,12 +231,12 @@ u64 mark_spte_for_access_track(u64 spte)  		  !spte_can_locklessly_be_made_writable(spte),  		  "kvm: Writable SPTE is not locklessly dirty-trackable\n"); -	WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << -			  shadow_acc_track_saved_bits_shift), +	WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << +			  SHADOW_ACC_TRACK_SAVED_BITS_SHIFT),  		  "kvm: Access Tracking saved bit locations are not zero\n"); -	spte |= (spte & shadow_acc_track_saved_bits_mask) << -		shadow_acc_track_saved_bits_shift; +	spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << +		SHADOW_ACC_TRACK_SAVED_BITS_SHIFT;  	spte &= ~shadow_acc_track_mask;  	return spte; @@ -245,7 +245,7 @@ u64 mark_spte_for_access_track(u64 spte)  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask)  {  	BUG_ON((u64)(unsigned)access_mask != access_mask); -	WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); +	WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN));  	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);  	shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;  	shadow_mmio_access_mask = access_mask; @@ -306,9 +306,9 @@ void kvm_mmu_reset_all_pte_masks(void)  	low_phys_bits = boot_cpu_data.x86_phys_bits;  	if (boot_cpu_has_bug(X86_BUG_L1TF) &&  	    !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= -			  52 - shadow_nonpresent_or_rsvd_mask_len)) { +			  52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) {  		low_phys_bits = boot_cpu_data.x86_cache_bits -			- shadow_nonpresent_or_rsvd_mask_len; +			- SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;  		shadow_nonpresent_or_rsvd_mask =  			rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1);  	} diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 4ecf40e0b8fe..5c75a451c000 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -105,19 +105,19 @@ extern u64 __read_mostly shadow_acc_track_mask;  extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;  /* + * The number of high-order 1 bits to use in the mask above. + */ +#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 + +/*   * The mask/shift to use for saving the original R/X bits when marking the PTE   * as not-present for access tracking purposes. We do not save the W bit as the   * PTEs being access tracked also need to be dirty tracked, so the W bit will be   * restored only when a write is attempted to the page.   */ -static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | -						    PT64_EPT_EXECUTABLE_MASK; -static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; - -/* - * The number of high-order 1 bits to use in the mask above. - */ -static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ +					  PT64_EPT_EXECUTABLE_MASK) +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT  /*   * In some cases, we need to preserve the GFN of a non-present or reserved diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 27e381c9da6c..ff28a5c6abd6 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -49,7 +49,14 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)  {  	struct kvm_mmu_page *sp; +	if (!kvm->arch.tdp_mmu_enabled) +		return false; +	if (WARN_ON(!VALID_PAGE(hpa))) +		return false; +  	sp = to_shadow_page(hpa); +	if (WARN_ON(!sp)) +		return false;  	return sp->tdp_mmu_page && sp->root_count;  } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..566f4d18185b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -642,8 +642,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,  	 * Its safe to read more than we are asked, caller should ensure that  	 * destination has enough space.  	 */ -	src_paddr = round_down(src_paddr, 16);  	offset = src_paddr & 15; +	src_paddr = round_down(src_paddr, 16);  	sz = round_up(sz + offset, 16);  	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f32fd09e259..79b3a564f1c9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)  		svm->avic_is_running = true;  	svm->msrpm = svm_vcpu_alloc_msrpm(); -	if (!svm->msrpm) +	if (!svm->msrpm) { +		err = -ENOMEM;  		goto error_free_vmcb_page; +	}  	svm_vcpu_init_msrpm(vcpu, svm->msrpm); @@ -3741,6 +3743,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)  static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)  {  	struct vcpu_svm *svm = to_svm(vcpu); +	struct kvm_cpuid_entry2 *best;  	vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&  				    boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3753,6 +3756,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)  	/* Check again if INVPCID interception if required */  	svm_check_invpcid(svm); +	/* For sev guests, the memory encryption bit is not reserved in CR3.  */ +	if (sev_guest(vcpu->kvm)) { +		best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0); +		if (best) +			vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f)); +	} +  	if (!kvm_vcpu_apicv_active(vcpu))  		return; diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index e5325bd0f304..f3199bb02f22 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -297,14 +297,13 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {  };  const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)  {  	vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;  	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;  	vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;  	vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; -  }  #endif diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e5f7a7ebf27d..bd41d9462355 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -185,7 +185,7 @@ static inline void evmcs_load(u64 phys_addr)  	vp_ap->enlighten_vmentry = 1;  } -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);  #else /* !IS_ENABLED(CONFIG_HYPERV) */  static inline void evmcs_write64(unsigned long field, u64 value) {}  static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -194,7 +194,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; }  static inline u32 evmcs_read32(unsigned long field) { return 0; }  static inline u16 evmcs_read16(unsigned long field) { return 0; }  static inline void evmcs_load(u64 phys_addr) {} -static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}  static inline void evmcs_touch_msr_bitmap(void) {}  #endif /* IS_ENABLED(CONFIG_HYPERV) */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d14c94d0aff1..47b8357b9751 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2560,8 +2560,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,  	vmcs_conf->vmexit_ctrl         = _vmexit_control;  	vmcs_conf->vmentry_ctrl        = _vmentry_control; -	if (static_branch_unlikely(&enable_evmcs)) +#if IS_ENABLED(CONFIG_HYPERV) +	if (enlightened_vmcs)  		evmcs_sanitize_exec_ctrls(vmcs_conf); +#endif  	return 0;  } @@ -6834,7 +6836,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)  static int vmx_create_vcpu(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx; -	unsigned long *msr_bitmap;  	int i, cpu, err;  	BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); @@ -6894,7 +6895,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)  	bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);  	bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); -	msr_bitmap = vmx->vmcs01.msr_bitmap;  	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);  	vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);  	vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 397f599b20e5..e545a8a613b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,24 +255,23 @@ static struct kmem_cache *x86_emulator_cache;  /*   * When called, it means the previous get/set msr reached an invalid msr. - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want - * to fail the caller. + * Return true if we want to ignore/silent this failed msr access.   */ -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, -				 u64 data, bool write) +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, +				  u64 data, bool write)  {  	const char *op = write ? "wrmsr" : "rdmsr";  	if (ignore_msrs) {  		if (report_ignored_msrs) -			vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", -				    op, msr, data); +			kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", +				      op, msr, data);  		/* Mask the error */ -		return 0; +		return true;  	} else { -		vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", -				       op, msr, data); -		return -ENOENT; +		kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", +				      op, msr, data); +		return false;  	}  } @@ -1042,7 +1041,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  	}  	if (is_long_mode(vcpu) && -	    (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) +	    (cr3 & vcpu->arch.cr3_lm_rsvd_bits))  		return 1;  	else if (is_pae_paging(vcpu) &&  		 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) @@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)  	if (r == KVM_MSR_RET_INVALID) {  		/* Unconditionally clear the output for simplicity */  		*data = 0; -		r = kvm_msr_ignored_check(vcpu, index, 0, false); +		if (kvm_msr_ignored_check(vcpu, index, 0, false)) +			r = 0;  	}  	if (r) @@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,  	struct msr_data msr;  	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) -		return -EPERM; +		return KVM_MSR_RET_FILTERED;  	switch (index) {  	case MSR_FS_BASE: @@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,  	int ret = __kvm_set_msr(vcpu, index, data, host_initiated);  	if (ret == KVM_MSR_RET_INVALID) -		ret = kvm_msr_ignored_check(vcpu, index, data, true); +		if (kvm_msr_ignored_check(vcpu, index, data, true)) +			ret = 0;  	return ret;  } @@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,  	int ret;  	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) -		return -EPERM; +		return KVM_MSR_RET_FILTERED;  	msr.index = index;  	msr.host_initiated = host_initiated; @@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,  	if (ret == KVM_MSR_RET_INVALID) {  		/* Unconditionally clear *data for simplicity */  		*data = 0; -		ret = kvm_msr_ignored_check(vcpu, index, 0, false); +		if (kvm_msr_ignored_check(vcpu, index, 0, false)) +			ret = 0;  	}  	return ret; @@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)  static u64 kvm_msr_reason(int r)  {  	switch (r) { -	case -ENOENT: +	case KVM_MSR_RET_INVALID:  		return KVM_MSR_EXIT_REASON_UNKNOWN; -	case -EPERM: +	case KVM_MSR_RET_FILTERED:  		return KVM_MSR_EXIT_REASON_FILTER;  	default:  		return KVM_MSR_EXIT_REASON_INVAL; @@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,  	struct kvm_arch *ka = &vcpu->kvm->arch;  	if (vcpu->vcpu_id == 0 && !host_initiated) { -		if (ka->boot_vcpu_runs_old_kvmclock && old_msr) +		if (ka->boot_vcpu_runs_old_kvmclock != old_msr)  			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);  		ka->boot_vcpu_runs_old_kvmclock = old_msr; @@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  			/* Values other than LBR and BTF are vendor-specific,  			   thus reserved and should throw a #GP */  			return 1; -		} -		vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", -			    __func__, data); +		} else if (report_ignored_msrs) +			vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", +				    __func__, data);  		break;  	case 0x200 ... 0x2ff:  		return kvm_mtrr_set_msr(vcpu, msr, data); @@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)  		msr_info->data = vcpu->arch.efer;  		break;  	case MSR_KVM_WALL_CLOCK: +		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) +			return 1; + +		msr_info->data = vcpu->kvm->arch.wall_clock; +		break;  	case MSR_KVM_WALL_CLOCK_NEW: +		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) +			return 1; +  		msr_info->data = vcpu->kvm->arch.wall_clock;  		break;  	case MSR_KVM_SYSTEM_TIME: +		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) +			return 1; + +		msr_info->data = vcpu->arch.time; +		break;  	case MSR_KVM_SYSTEM_TIME_NEW: +		if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) +			return 1; +  		msr_info->data = vcpu->arch.time;  		break;  	case MSR_KVM_ASYNC_PF_EN: +		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) +			return 1; +  		msr_info->data = vcpu->arch.apf.msr_en_val;  		break;  	case MSR_KVM_ASYNC_PF_INT: +		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) +			return 1; +  		msr_info->data = vcpu->arch.apf.msr_int_val;  		break;  	case MSR_KVM_ASYNC_PF_ACK: +		if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) +			return 1; +  		msr_info->data = 0;  		break;  	case MSR_KVM_STEAL_TIME: +		if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) +			return 1; +  		msr_info->data = vcpu->arch.st.msr_val;  		break;  	case MSR_KVM_PV_EOI_EN: +		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) +			return 1; +  		msr_info->data = vcpu->arch.pv_eoi.msr_val;  		break;  	case MSR_KVM_POLL_CONTROL: +		if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) +			return 1; +  		msr_info->data = vcpu->arch.msr_kvm_poll_control;  		break;  	case MSR_IA32_P5_MC_ADDR: @@ -4015,21 +4051,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,  static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)  { +	/* +	 * We can accept userspace's request for interrupt injection +	 * as long as we have a place to store the interrupt number. +	 * The actual injection will happen when the CPU is able to +	 * deliver the interrupt. +	 */ +	if (kvm_cpu_has_extint(vcpu)) +		return false; + +	/* Acknowledging ExtINT does not happen if LINT0 is masked.  */  	return (!lapic_in_kernel(vcpu) ||  		kvm_apic_accept_pic_intr(vcpu));  } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */  static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)  {  	return kvm_arch_interrupt_allowed(vcpu) && -		!kvm_cpu_has_interrupt(vcpu) && -		!kvm_event_needs_reinjection(vcpu) &&  		kvm_cpu_accept_dm_intr(vcpu);  } @@ -4575,6 +4613,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,  	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:  		vcpu->arch.pv_cpuid.enforce = cap->args[0]; +		if (vcpu->arch.pv_cpuid.enforce) +			kvm_update_pv_runtime(vcpu);  		return 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3900ab0c6004..e7ca622a468f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,  int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);  bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); -#define  KVM_MSR_RET_INVALID  2 +/* + * Internal error codes that are used to indicate that MSR emulation encountered + * an error that should result in #GP in the guest, unless userspace + * handles it. + */ +#define  KVM_MSR_RET_INVALID	2	/* in-kernel MSR emulation #GP condition */ +#define  KVM_MSR_RET_FILTERED	3	/* #GP due to userspace MSR filter */  #define __cr4_reserved_bits(__cpu_has, __c)             \  ({                                                      \ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 037faac46b0c..1e299ac73c86 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@   * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.   */ -.weak memcpy -  /*   * memcpy - Copy a memory block.   * @@ -30,7 +28,7 @@   * rax original destination   */  SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy)  	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \  		      "jmp memcpy_erms", X86_FEATURE_ERMS diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 7ff00ea64e4f..41902fe8b859 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,9 +24,7 @@   * Output:   * rax: dest   */ -.weak memmove - -SYM_FUNC_START_ALIAS(memmove) +SYM_FUNC_START_WEAK(memmove)  SYM_FUNC_START(__memmove)  	mov %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9ff15ee404a4..0bfd26e4ca9e 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -6,8 +6,6 @@  #include <asm/alternative-asm.h>  #include <asm/export.h> -.weak memset -  /*   * ISO C memset - set a memory block to a byte value. This function uses fast   * string to get better performance than the original function. The code is @@ -19,7 +17,7 @@   *   * rax   original destination   */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset)  SYM_FUNC_START(__memset)  	/*  	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index efbb3de472df..bc0833713be9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -39,6 +39,7 @@   */  u64 sme_me_mask __section(".data") = 0;  u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0;  EXPORT_SYMBOL(sme_me_mask);  DEFINE_STATIC_KEY_FALSE(sev_enable_key);  EXPORT_SYMBOL_GPL(sev_enable_key); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 44148691d78b..5eb4dc2b97da 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start)  	return meminfo_to_nid(&numa_reserved_meminfo, start);  } +EXPORT_SYMBOL_GPL(phys_to_target_node);  int memory_add_physaddr_to_nid(u64 start)  { @@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start)  		nid = numa_meminfo.blk[0].nid;  	return nid;  } +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);  #endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8f5759df7776..e1e8d4e3a213 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void)  	gfp_mask = GFP_KERNEL | __GFP_ZERO;  	efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);  	if (!efi_pgd) -		return -ENOMEM; +		goto fail;  	pgd = efi_pgd + pgd_index(EFI_VA_END);  	p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); -	if (!p4d) { -		free_page((unsigned long)efi_pgd); -		return -ENOMEM; -	} +	if (!p4d) +		goto free_pgd;  	pud = pud_alloc(&init_mm, p4d, EFI_VA_END); -	if (!pud) { -		if (pgtable_l5_enabled()) -			free_page((unsigned long) pgd_page_vaddr(*pgd)); -		free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); -		return -ENOMEM; -	} +	if (!pud) +		goto free_p4d;  	efi_mm.pgd = efi_pgd;  	mm_init_cpumask(&efi_mm);  	init_new_context(NULL, &efi_mm);  	return 0; + +free_p4d: +	if (pgtable_l5_enabled()) +		free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: +	free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: +	return -ENOMEM;  }  /* diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index fdcd58af707a..27361cbb7ca9 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -8,7 +8,7 @@  #include <sysdep/mcontext.h>  #include <sys/ucontext.h> -void __section(".__syscall_stub") +void __attribute__ ((__section__ (".__syscall_stub")))  stub_segv_handler(int sig, siginfo_t *info, void *p)  {  	ucontext_t *uc = p; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 799f4eba0a62..043c73dfd2c9 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu)  void xen_uninit_lock_cpu(int cpu)  { +	int irq; +  	if (!xen_pvspin)  		return; -	unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); +	/* +	 * When booting the kernel with 'mitigations=auto,nosmt', the secondary +	 * CPUs are not activated, and lock_kicker_irq is not initialized. +	 */ +	irq = per_cpu(lock_kicker_irq, cpu); +	if (irq == -1) +		return; + +	unbind_from_irqhandler(irq, NULL);  	per_cpu(lock_kicker_irq, cpu) = -1;  	kfree(per_cpu(irq_name, cpu));  	per_cpu(irq_name, cpu) = NULL; diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index fa054a1772e1..4dc04e6c01d7 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -69,7 +69,7 @@   */  #define VMALLOC_START		(XCHAL_KSEG_CACHED_VADDR - 0x10000000)  #define VMALLOC_END		(VMALLOC_START + 0x07FEFFFF) -#define TLBTEMP_BASE_1		(VMALLOC_END + 1) +#define TLBTEMP_BASE_1		(VMALLOC_START + 0x08000000)  #define TLBTEMP_BASE_2		(TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)  #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE  #define TLBTEMP_SIZE		(2 * DCACHE_WAY_SIZE) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index b9758119feca..5c9fb8005aa8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)  	return -EFAULT;  }  #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count);  #endif  /* diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 5835406b3cec..085b8c77b9d9 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page,  			kvaddr = TLBTEMP_BASE_1 +  				(page_to_phys(page) & DCACHE_ALIAS_MASK); +			preempt_disable();  			__invalidate_dcache_page_alias(kvaddr,  						       page_to_phys(page)); +			preempt_enable();  		}  	}  } @@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page)  		if (!alias && !mapping)  			return; +		preempt_disable();  		virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);  		__flush_invalidate_dcache_page_alias(virt, phys); @@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page)  		if (mapping)  			__invalidate_icache_page_alias(virt, phys); +		preempt_enable();  	}  	/* There shouldn't be an entry in the cache for this page anymore. */ @@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,  	unsigned long phys = page_to_phys(pfn_to_page(pfn));  	unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); +	preempt_disable();  	__flush_invalidate_dcache_page_alias(virt, phys);  	__invalidate_icache_page_alias(virt, phys); +	preempt_enable();  }  EXPORT_SYMBOL(local_flush_cache_page); @@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)  		unsigned long phys = page_to_phys(page);  		unsigned long tmp; +		preempt_disable();  		tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);  		__flush_invalidate_dcache_page_alias(tmp, phys);  		tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);  		__flush_invalidate_dcache_page_alias(tmp, phys);  		__invalidate_icache_page_alias(tmp, phys); +		preempt_enable();  		clear_bit(PG_arch_1, &page->flags);  	} @@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,  	if (alias) {  		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); +		preempt_disable();  		__flush_invalidate_dcache_page_alias(t, phys); +		preempt_enable();  	}  	/* Copy data */ @@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,  	if (alias) {  		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); +		preempt_disable();  		__flush_invalidate_dcache_range((unsigned long) dst, len);  		if ((vma->vm_flags & VM_EXEC) != 0)  			__invalidate_icache_page_alias(t, phys); +		preempt_enable();  	} else if ((vma->vm_flags & VM_EXEC) != 0) {  		__flush_dcache_range((unsigned long)dst,len); @@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,  	if (alias) {  		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); +		preempt_disable();  		__flush_invalidate_dcache_page_alias(t, phys); +		preempt_enable();  	}  	memcpy(dst, src, len); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index c6fc83efee0c..8731b7ad9308 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -89,8 +89,8 @@ static void __init free_highpages(void)  	/* set highmem page free */  	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,  				&range_start, &range_end, NULL) { -		unsigned long start = PHYS_PFN(range_start); -		unsigned long end = PHYS_PFN(range_end); +		unsigned long start = PFN_UP(range_start); +		unsigned long end = PFN_DOWN(range_end);  		/* Ignore complete lowmem entries */  		if (end <= max_low) | 
