diff options
217 files changed, 2531 insertions, 1302 deletions
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 6d6d211883aee..daee0c0fc9153 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -113,11 +113,8 @@ allOf: maxItems: 1 - if: - properties: - compatible: - contains: - enum: - - fsl,imx95-usb-phy + required: + - orientation-switch then: $ref: /schemas/usb/usb-switch.yaml# diff --git a/Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml b/Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml index ca401a209cca7..47c425c9fff16 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,qca6390-pmu.yaml @@ -18,6 +18,7 @@ properties: compatible: enum: - qcom,qca6390-pmu + - qcom,wcn6750-pmu - qcom,wcn6855-pmu - qcom,wcn7850-pmu @@ -27,6 +28,9 @@ properties: vddaon-supply: description: VDD_AON supply regulator handle + vddasd-supply: + description: VDD_ASD supply regulator handle + vdddig-supply: description: VDD_DIG supply regulator handle @@ -42,6 +46,9 @@ properties: vddio1p2-supply: description: VDD_IO_1P2 supply regulator handle + vddrfa0p8-supply: + description: VDD_RFA_0P8 supply regulator handle + vddrfa0p95-supply: description: VDD_RFA_0P95 supply regulator handle @@ -51,12 +58,18 @@ properties: vddrfa1p3-supply: description: VDD_RFA_1P3 supply regulator handle + vddrfa1p7-supply: + description: VDD_RFA_1P7 supply regulator handle + vddrfa1p8-supply: description: VDD_RFA_1P8 supply regulator handle vddrfa1p9-supply: description: VDD_RFA_1P9 supply regulator handle + vddrfa2p2-supply: + description: VDD_RFA_2P2 supply regulator handle + vddpcie1p3-supply: description: VDD_PCIE_1P3 supply regulator handle @@ -123,6 +136,20 @@ allOf: properties: compatible: contains: + const: qcom,wcn6750-pmu + then: + required: + - vddaon-supply + - vddasd-supply + - vddpmu-supply + - vddrfa0p8-supply + - vddrfa1p2-supply + - vddrfa1p7-supply + - vddrfa2p2-supply + - if: + properties: + compatible: + contains: const: qcom,wcn6855-pmu then: required: diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 53d1996460abf..12f429359a823 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -347,7 +347,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: `int pm_runtime_resume_and_get(struct device *dev);` - run pm_runtime_resume(dev) and if successful, increment the device's - usage counter; return the result of pm_runtime_resume + usage counter; returns 0 on success (whether or not the device's + runtime PM status was already 'active') or the error code from + pm_runtime_resume() on failure. `int pm_request_idle(struct device *dev);` - submit a request to execute the subsystem-level idle callback for the diff --git a/MAINTAINERS b/MAINTAINERS index e6e71b05710ba..baf0eeb9a3554 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3893,7 +3893,7 @@ W: http://www.baycom.org/~tom/ham/ham.html F: drivers/net/hamradio/baycom* BCACHE (BLOCK LAYER CACHE) -M: Coly Li <colyli@suse.de> +M: Coly Li <colyli@kernel.org> M: Kent Overstreet <kent.overstreet@linux.dev> L: linux-bcache@vger.kernel.org S: Maintained diff --git a/Makefile b/Makefile index 64c594bd7ad03..e5b8a8832c0ca 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5b24881420414..ea5a1dcb133b4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -297,7 +297,6 @@ config ARC_PAGE_SIZE_16K config ARC_PAGE_SIZE_4K bool "4KB" select HAVE_PAGE_SIZE_4KB - depends on ARC_MMU_V3 || ARC_MMU_V4 endchoice @@ -474,7 +473,8 @@ config HIGHMEM config ARC_HAS_PAE40 bool "Support for the 40-bit Physical Address Extension" - depends on ISA_ARCV2 + depends on ARC_MMU_V4 + depends on !ARC_PAGE_SIZE_4K select HIGHMEM select PHYS_ADDR_T_64BIT help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 2390dd042e363..fb98478ed1ab0 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ KBUILD_DEFCONFIG := haps_hs_smp_defconfig ifeq ($(CROSS_COMPILE),) -CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux- arc-linux-gnu-) endif cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 2a151607b0805..88bcc7ab6f5af 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -54,7 +54,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index c0a812674ce9e..9a2dc39a5cff4 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -62,7 +62,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 67556f4b70574..f31382cb8be4b 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -69,7 +69,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index b644353853049..3add2fe257f8b 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -250,7 +250,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; }; @@ -258,7 +258,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <1>; }; @@ -266,7 +266,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <2>; }; }; @@ -281,7 +281,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <30>; + ngpios = <30>; reg = <0>; }; @@ -289,7 +289,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <10>; + ngpios = <10>; reg = <1>; }; @@ -297,7 +297,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <2>; }; }; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 41b980df862b1..98bb850722a4b 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -308,7 +308,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <24>; + ngpios = <24>; reg = <0>; }; }; diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 4b13f60fe7cae..005d9e4d187a0 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -146,7 +146,7 @@ #ifndef __ASSEMBLY__ -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> /* Helpers */ #define TO_KB(bytes) ((bytes) >> 10) diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 58045c8983404..76f43db0890fc 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -48,7 +48,7 @@ \ switch(sizeof((_p_))) { \ case 1: \ - _prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *)_p_, (uintptr_t)_o_, (uintptr_t)_n_); \ + _prev_ = (__typeof__(*(ptr)))cmpxchg_emu_u8((volatile u8 *__force)_p_, (uintptr_t)_o_, (uintptr_t)_n_); \ break; \ case 4: \ _prev_ = __cmpxchg(_p_, _o_, _n_); \ diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index d85dc07219071..41412642f2796 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_MMU_ARCV2_H #define _ASM_ARC_MMU_ARCV2_H -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> /* * TLB Management regs diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c index 4458e409ca0a8..6d989b6d88c69 100644 --- a/arch/arc/net/bpf_jit_arcv2.c +++ b/arch/arc/net/bpf_jit_arcv2.c @@ -2916,7 +2916,7 @@ bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond) addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal; disp = get_displacement(curr_off + addendum, targ_off); - if (ARC_CC_AL) + if (cond == ARC_CC_AL) return is_valid_far_disp(disp); else return is_valid_near_disp(disp); diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 19973ab4ea6b5..9e10d7a6b5a2c 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -233,7 +233,7 @@ #interrupt-cells = <0x1>; compatible = "pci-host-ecam-generic"; device_type = "pci"; - bus-range = <0x0 0x1>; + bus-range = <0x0 0xff>; reg = <0x0 0x40000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 85ef966c08cd2..4ef52d7245bbb 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -87,7 +87,7 @@ 1 << PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter .Lskip_spe_el2_\@: - mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + mov x0, #MDCR_EL2_E2PB_MASK orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. @@ -100,7 +100,7 @@ and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 - mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + mov x0, #MDCR_EL2_E2TB_MASK orr x2, x2, x0 // allow the EL1&0 translation // to own it. diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 65f76064c86b2..ae990da1eae5a 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -114,8 +114,8 @@ SYM_CODE_START_LOCAL(__finalise_el2) // Use EL2 translations for SPE & TRBE and disable access from EL1 mrs x0, mdcr_el2 - bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) + bic x0, x0, #MDCR_EL2_E2PB_MASK + bic x0, x0, #MDCR_EL2_E2TB_MASK msr mdcr_el2, x0 // Transfer the MM state from EL1 to EL2 diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 14ac6fdb872b9..37e24f1bd2279 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1462,10 +1462,33 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; + int err; + + if (ksig->ka.sa.sa_flags & SA_RESTORER) + sigtramp = ksig->ka.sa.sa_restorer; + else + sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + + err = gcs_signal_entry(sigtramp, ksig); + if (err) + return err; + + /* + * We must not fail from this point onwards. We are going to update + * registers, including SP, in order to invoke the signal handler. If + * we failed and attempted to deliver a nested SIGSEGV to a handler + * after that point, the subsequent sigreturn would end up restoring + * the (partial) state for the original signal handler. + */ regs->regs[0] = usig; + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + regs->regs[1] = (unsigned long)&user->sigframe->info; + regs->regs[2] = (unsigned long)&user->sigframe->uc; + } regs->sp = (unsigned long)user->sigframe; regs->regs[29] = (unsigned long)&user->next_frame->fp; + regs->regs[30] = (unsigned long)sigtramp; regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* @@ -1506,14 +1529,7 @@ static int setup_return(struct pt_regs *regs, struct ksignal *ksig, sme_smstop(); } - if (ksig->ka.sa.sa_flags & SA_RESTORER) - sigtramp = ksig->ka.sa.sa_restorer; - else - sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); - - regs->regs[30] = (unsigned long)sigtramp; - - return gcs_signal_entry(sigtramp, ksig); + return 0; } static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, @@ -1537,14 +1553,16 @@ static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, err |= __save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigframe(&user, regs, set, &ua_state); - if (err == 0) { + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + if (err == 0) err = setup_return(regs, ksig, &user, usig); - if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - regs->regs[1] = (unsigned long)&frame->info; - regs->regs[2] = (unsigned long)&frame->uc; - } - } + + /* + * We must not fail if setup_return() succeeded - see comment at the + * beginning of setup_return(). + */ if (err == 0) set_handler_user_access_state(); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index caef85462acb6..1d9d51d7627fd 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -26,7 +26,6 @@ enum kunwind_source { KUNWIND_SOURCE_CALLER, KUNWIND_SOURCE_TASK, KUNWIND_SOURCE_REGS_PC, - KUNWIND_SOURCE_REGS_LR, }; union unwind_flags { @@ -138,8 +137,10 @@ kunwind_recover_return_address(struct kunwind_state *state) orig_pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, state->common.pc, (void *)state->common.fp); - if (WARN_ON_ONCE(state->common.pc == orig_pc)) + if (state->common.pc == orig_pc) { + WARN_ON_ONCE(state->task == current); return -EINVAL; + } state->common.pc = orig_pc; state->flags.fgraph = 1; } @@ -178,23 +179,8 @@ int kunwind_next_regs_pc(struct kunwind_state *state) state->regs = regs; state->common.pc = regs->pc; state->common.fp = regs->regs[29]; - state->source = KUNWIND_SOURCE_REGS_PC; - return 0; -} - -static __always_inline int -kunwind_next_regs_lr(struct kunwind_state *state) -{ - /* - * The stack for the regs was consumed by kunwind_next_regs_pc(), so we - * cannot consume that again here, but we know the regs are safe to - * access. - */ - state->common.pc = state->regs->regs[30]; - state->common.fp = state->regs->regs[29]; state->regs = NULL; - state->source = KUNWIND_SOURCE_REGS_LR; - + state->source = KUNWIND_SOURCE_REGS_PC; return 0; } @@ -215,12 +201,12 @@ kunwind_next_frame_record_meta(struct kunwind_state *state) case FRAME_META_TYPE_FINAL: if (meta == &task_pt_regs(tsk)->stackframe) return -ENOENT; - WARN_ON_ONCE(1); + WARN_ON_ONCE(tsk == current); return -EINVAL; case FRAME_META_TYPE_PT_REGS: return kunwind_next_regs_pc(state); default: - WARN_ON_ONCE(1); + WARN_ON_ONCE(tsk == current); return -EINVAL; } } @@ -274,11 +260,8 @@ kunwind_next(struct kunwind_state *state) case KUNWIND_SOURCE_FRAME: case KUNWIND_SOURCE_CALLER: case KUNWIND_SOURCE_TASK: - case KUNWIND_SOURCE_REGS_LR: - err = kunwind_next_frame_record(state); - break; case KUNWIND_SOURCE_REGS_PC: - err = kunwind_next_regs_lr(state); + err = kunwind_next_frame_record(state); break; default: err = -EINVAL; @@ -436,7 +419,6 @@ static const char *state_source_string(const struct kunwind_state *state) case KUNWIND_SOURCE_CALLER: return "C"; case KUNWIND_SOURCE_TASK: return "T"; case KUNWIND_SOURCE_REGS_PC: return "P"; - case KUNWIND_SOURCE_REGS_LR: return "L"; default: return "U"; } } diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 8c5d7990e5b31..3d7eb395e33dd 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -739,8 +739,15 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, final_attr = s1_parattr; break; default: - /* MemAttr[2]=0, Device from S2 */ - final_attr = s2_memattr & GENMASK(1,0) << 2; + /* + * MemAttr[2]=0, Device from S2. + * + * FWB does not influence the way that stage 1 + * memory types and attributes are combined + * with stage 2 Device type and attributes. + */ + final_attr = min(s2_memattr_to_attr(s2_memattr), + s1_parattr); } } else { /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 01616c39a8107..071993c16de81 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -126,7 +126,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) /* Trap SPE */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { mdcr_set |= MDCR_EL2_TPMS; - mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + mdcr_clear |= MDCR_EL2_E2PB_MASK; } /* Trap Trace Filter */ @@ -143,7 +143,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) /* Trap External Trace */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) - mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + mdcr_clear |= MDCR_EL2_E2TB_MASK; vcpu->arch.mdcr_el2 |= mdcr_set; vcpu->arch.mdcr_el2 &= ~mdcr_clear; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 83c6b4a07ef56..e2a5c2918d9e5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2618,7 +2618,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 | ID_AA64MMFR0_EL1_TGRAN4_2 | ID_AA64MMFR0_EL1_TGRAN64_2 | - ID_AA64MMFR0_EL1_TGRAN16_2)), + ID_AA64MMFR0_EL1_TGRAN16_2 | + ID_AA64MMFR0_EL1_ASIDBITS)), ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | ID_AA64MMFR1_EL1_HCX | ID_AA64MMFR1_EL1_TWED | diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index f4c4494645c34..fb96802799c6f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -608,12 +608,22 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, lockdep_assert_held(&its->its_lock); vgic_get_irq_kref(irq); + old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); + + /* + * Put the reference taken on @irq if the store fails. Intentionally do + * not return the error as the translation cache is best effort. + */ + if (xa_is_err(old)) { + vgic_put_irq(kvm, irq); + return; + } + /* * We could have raced with another CPU caching the same * translation behind our back, ensure we don't leak a * reference if that is the case. */ - old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); if (old) vgic_put_irq(kvm, old); } diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index 7388edd88986f..d08bf7fb3aee6 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -22,7 +22,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT)); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + preempt_disable(); + local_flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + preempt_enable(); return true; } diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c index 6eee6f736f687..654ed159c830b 100644 --- a/arch/riscv/kernel/jump_label.c +++ b/arch/riscv/kernel/jump_label.c @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, insn = RISCV_INSN_NOP; } - mutex_lock(&text_mutex); - patch_insn_write(addr, &insn, sizeof(insn)); - mutex_unlock(&text_mutex); + if (early_boot_irqs_disabled) { + riscv_patch_in_stop_machine = 1; + patch_insn_write(addr, &insn, sizeof(insn)); + riscv_patch_in_stop_machine = 0; + } else { + mutex_lock(&text_mutex); + patch_insn_write(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); + } return true; } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 016b48fcd6f27..45010e71df86c 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -227,7 +227,7 @@ static void __init init_resources(void) static void __init parse_dtb(void) { /* Early scan of device tree from init memory */ - if (early_init_dt_scan(dtb_early_va, __pa(dtb_early_va))) { + if (early_init_dt_scan(dtb_early_va, dtb_early_pa)) { const char *name = of_flat_dt_get_machine_name(); if (name) { diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index dcced4db7fe8c..19afd1f235377 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -590,7 +590,7 @@ void kvm_riscv_aia_enable(void) csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) - csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); + csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0e8c20adcd98d..fc53ce748c804 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1566,7 +1566,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) pmd_clear(pmd); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap) { struct page *page = pud_page(*pud); struct ptdesc *ptdesc = page_ptdesc(page); @@ -1579,7 +1579,8 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) return; } - pagetable_pmd_dtor(ptdesc); + if (!is_vmemmap) + pagetable_pmd_dtor(ptdesc); if (PageReserved(page)) free_reserved_page(page); else @@ -1703,7 +1704,7 @@ static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, un remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); if (pgtable_l4_enabled) - free_pmd_table(pmd_base, pudp); + free_pmd_table(pmd_base, pudp, is_vmemmap); } } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 097bdc022d0f4..ae0b438a2c991 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -36,6 +36,26 @@ u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; EXPORT_SYMBOL_GPL(kvm_cpu_caps); +struct cpuid_xstate_sizes { + u32 eax; + u32 ebx; + u32 ecx; +}; + +static struct cpuid_xstate_sizes xstate_sizes[XFEATURE_MAX] __ro_after_init; + +void __init kvm_init_xstate_sizes(void) +{ + u32 ign; + int i; + + for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes); i++) { + struct cpuid_xstate_sizes *xs = &xstate_sizes[i]; + + cpuid_count(0xD, i, &xs->eax, &xs->ebx, &xs->ecx, &ign); + } +} + u32 xstate_required_size(u64 xstate_bv, bool compacted) { int feature_bit = 0; @@ -44,14 +64,15 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) xstate_bv &= XFEATURE_MASK_EXTEND; while (xstate_bv) { if (xstate_bv & 0x1) { - u32 eax, ebx, ecx, edx, offset; - cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); + struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit]; + u32 offset; + /* ECX[1]: 64B alignment in compacted form */ if (compacted) - offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret; + offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret; else - offset = ebx; - ret = max(ret, offset + eax); + offset = xs->ebx; + ret = max(ret, offset + xs->eax); } xstate_bv >>= 1; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c8dc66eddefda..f16a7b2c2adcf 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -31,6 +31,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); +void __init kvm_init_xstate_sizes(void); u32 xstate_required_size(u64 xstate_bv, bool compacted); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2e713480933a1..c8160baf38385 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13997,6 +13997,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault); static int __init kvm_x86_init(void) { + kvm_init_xstate_sizes(); + kvm_mmu_x86_module_init(); mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); return 0; diff --git a/block/bio.c b/block/bio.c index 699a78c85c756..d5bdc31d88d32 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1171,7 +1171,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) } EXPORT_SYMBOL_GPL(__bio_release_pages); -void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) +void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter) { WARN_ON_ONCE(bio->bi_max_vecs); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e68c725cf8d97..45a395862fbc8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1324,10 +1324,14 @@ void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css) struct blkcg *blkcg = css_to_blkcg(blkcg_css); do { + struct blkcg *parent; + if (!refcount_dec_and_test(&blkcg->online_pin)) break; + + parent = blkcg_parent(blkcg); blkcg_destroy_blkgs(blkcg); - blkcg = blkcg_parent(blkcg); + blkcg = parent; } while (blkcg); } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 384aa15e8260b..a5894ec9696e7 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1098,7 +1098,14 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, iocg->child_active_sum); } else { - inuse = clamp_t(u32, inuse, 1, active); + /* + * It may be tempting to turn this into a clamp expression with + * a lower limit of 1 but active may be 0, which cannot be used + * as an upper limit in that situation. This expression allows + * active to clamp inuse unless it is 0, in which case inuse + * becomes 1. + */ + inuse = min(inuse, active) ?: 1; } iocg->last_inuse = iocg->inuse; diff --git a/block/blk-map.c b/block/blk-map.c index b5fd1d8574615..894009b2d881b 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -574,7 +574,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); if (!bio) return -ENOMEM; - bio_iov_bvec_set(bio, (struct iov_iter *)iter); + bio_iov_bvec_set(bio, iter); /* check that the data layout matches the hardware restrictions */ ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 156e9bb07abf1..cd5ea6eaa76b0 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -275,15 +275,13 @@ void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned long i; - mutex_lock(&q->sysfs_dir_lock); + lockdep_assert_held(&q->sysfs_dir_lock); + if (!q->mq_sysfs_init_done) - goto unlock; + return; queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); - -unlock: - mutex_unlock(&q->sysfs_dir_lock); } int blk_mq_sysfs_register_hctxs(struct request_queue *q) @@ -292,9 +290,10 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) unsigned long i; int ret = 0; - mutex_lock(&q->sysfs_dir_lock); + lockdep_assert_held(&q->sysfs_dir_lock); + if (!q->mq_sysfs_init_done) - goto unlock; + return ret; queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); @@ -302,8 +301,5 @@ int blk_mq_sysfs_register_hctxs(struct request_queue *q) break; } -unlock: - mutex_unlock(&q->sysfs_dir_lock); - return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index aa340b097b6e4..6b6111513986f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1544,19 +1544,17 @@ static void blk_mq_requeue_work(struct work_struct *work) while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); + list_del_init(&rq->queuelist); /* - * If RQF_DONTPREP ist set, the request has been started by the + * If RQF_DONTPREP is set, the request has been started by the * driver already and might have driver-specific data allocated * already. Insert it into the hctx dispatch list to avoid * block layer merges for the request. */ - if (rq->rq_flags & RQF_DONTPREP) { - list_del_init(&rq->queuelist); + if (rq->rq_flags & RQF_DONTPREP) blk_mq_request_bypass_insert(rq, 0); - } else { - list_del_init(&rq->queuelist); + else blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD); - } } while (!list_empty(&flush_list)) { @@ -4455,7 +4453,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, unsigned long i, j; /* protect against switching io scheduler */ - mutex_lock(&q->sysfs_lock); + lockdep_assert_held(&q->sysfs_lock); + for (i = 0; i < set->nr_hw_queues; i++) { int old_node; int node = blk_mq_get_hctx_node(set, i); @@ -4488,7 +4487,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); - mutex_unlock(&q->sysfs_lock); /* unregister cpuhp callbacks for exited hctxs */ blk_mq_remove_hw_queues_cpuhp(q); @@ -4520,10 +4518,14 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, xa_init(&q->hctx_table); + mutex_lock(&q->sysfs_lock); + blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) goto err_hctxs; + mutex_unlock(&q->sysfs_lock); + INIT_WORK(&q->timeout_work, blk_mq_timeout_work); blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); @@ -4542,6 +4544,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, return 0; err_hctxs: + mutex_unlock(&q->sysfs_lock); blk_mq_release(q); err_exit: q->mq_ops = NULL; @@ -4922,12 +4925,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head, return false; /* q->elevator needs protection from ->sysfs_lock */ - mutex_lock(&q->sysfs_lock); + lockdep_assert_held(&q->sysfs_lock); /* the check has to be done with holding sysfs_lock */ if (!q->elevator) { kfree(qe); - goto unlock; + goto out; } INIT_LIST_HEAD(&qe->node); @@ -4937,9 +4940,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head, __elevator_get(qe->type); list_add(&qe->node, head); elevator_disable(q); -unlock: - mutex_unlock(&q->sysfs_lock); - +out: return true; } @@ -4968,11 +4969,9 @@ static void blk_mq_elv_switch_back(struct list_head *head, list_del(&qe->node); kfree(qe); - mutex_lock(&q->sysfs_lock); elevator_switch(q, t); /* drop the reference acquired in blk_mq_elv_switch_none */ elevator_put(t); - mutex_unlock(&q->sysfs_lock); } static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, @@ -4992,8 +4991,11 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues) return; - list_for_each_entry(q, &set->tag_list, tag_set_list) + list_for_each_entry(q, &set->tag_list, tag_set_list) { + mutex_lock(&q->sysfs_dir_lock); + mutex_lock(&q->sysfs_lock); blk_mq_freeze_queue(q); + } /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done @@ -5049,8 +5051,11 @@ switch_back: list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_elv_switch_back(&head, q); - list_for_each_entry(q, &set->tag_list, tag_set_list) + list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_unfreeze_queue(q); + mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); + } /* Free the excess tags when nr_hw_queues shrink. */ for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4241aea84161c..64f70c713d2f9 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -263,7 +263,7 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page) static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) { - return queue_var_show(blk_queue_passthrough_stat(disk->queue), page); + return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); } static ssize_t queue_iostats_passthrough_store(struct gendisk *disk, @@ -706,11 +706,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, if (entry->load_module) entry->load_module(disk, page, length); - blk_mq_freeze_queue(q); mutex_lock(&q->sysfs_lock); + blk_mq_freeze_queue(q); res = entry->store(disk, page, length); - mutex_unlock(&q->sysfs_lock); blk_mq_unfreeze_queue(q); + mutex_unlock(&q->sysfs_lock); return res; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 263e28b720538..84da1eadff642 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -41,7 +41,6 @@ static const char *const zone_cond_name[] = { /* * Per-zone write plug. * @node: hlist_node structure for managing the plug using a hash table. - * @link: To list the plug in the zone write plug error list of the disk. * @ref: Zone write plug reference counter. A zone write plug reference is * always at least 1 when the plug is hashed in the disk plug hash table. * The reference is incremented whenever a new BIO needing plugging is @@ -63,7 +62,6 @@ static const char *const zone_cond_name[] = { */ struct blk_zone_wplug { struct hlist_node node; - struct list_head link; refcount_t ref; spinlock_t lock; unsigned int flags; @@ -80,8 +78,8 @@ struct blk_zone_wplug { * - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged, * that is, that write BIOs are being throttled due to a write BIO already * being executed or the zone write plug bio list is not empty. - * - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be - * recovered with a report zone to update the zone write pointer offset. + * - BLK_ZONE_WPLUG_NEED_WP_UPDATE: Indicates that we lost track of a zone + * write pointer offset and need to update it. * - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed * from the disk hash table and that the initial reference to the zone * write plug set when the plug was first added to the hash table has been @@ -91,11 +89,9 @@ struct blk_zone_wplug { * freed once all remaining references from BIOs or functions are dropped. */ #define BLK_ZONE_WPLUG_PLUGGED (1U << 0) -#define BLK_ZONE_WPLUG_ERROR (1U << 1) +#define BLK_ZONE_WPLUG_NEED_WP_UPDATE (1U << 1) #define BLK_ZONE_WPLUG_UNHASHED (1U << 2) -#define BLK_ZONE_WPLUG_BUSY (BLK_ZONE_WPLUG_PLUGGED | BLK_ZONE_WPLUG_ERROR) - /** * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX. * @zone_cond: BLK_ZONE_COND_XXX. @@ -115,6 +111,30 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond) } EXPORT_SYMBOL_GPL(blk_zone_cond_str); +struct disk_report_zones_cb_args { + struct gendisk *disk; + report_zones_cb user_cb; + void *user_data; +}; + +static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, + struct blk_zone *zone); + +static int disk_report_zones_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct disk_report_zones_cb_args *args = data; + struct gendisk *disk = args->disk; + + if (disk->zone_wplugs_hash) + disk_zone_wplug_sync_wp_offset(disk, zone); + + if (!args->user_cb) + return 0; + + return args->user_cb(zone, idx, args->user_data); +} + /** * blkdev_report_zones - Get zones information * @bdev: Target block device @@ -139,6 +159,11 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, { struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); + struct disk_report_zones_cb_args args = { + .disk = disk, + .user_cb = cb, + .user_data = data, + }; if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; @@ -146,7 +171,8 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, if (!nr_zones || sector >= capacity) return 0; - return disk->fops->report_zones(disk, sector, nr_zones, cb, data); + return disk->fops->report_zones(disk, sector, nr_zones, + disk_report_zones_cb, &args); } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -427,7 +453,7 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) { if (refcount_dec_and_test(&zwplug->ref)) { WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list)); - WARN_ON_ONCE(!list_empty(&zwplug->link)); + WARN_ON_ONCE(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)); call_rcu(&zwplug->rcu_head, disk_free_zone_wplug_rcu); @@ -441,8 +467,8 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return false; - /* If the zone write plug is still busy, it cannot be removed. */ - if (zwplug->flags & BLK_ZONE_WPLUG_BUSY) + /* If the zone write plug is still plugged, it cannot be removed. */ + if (zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) return false; /* @@ -525,12 +551,11 @@ again: return NULL; INIT_HLIST_NODE(&zwplug->node); - INIT_LIST_HEAD(&zwplug->link); refcount_set(&zwplug->ref, 2); spin_lock_init(&zwplug->lock); zwplug->flags = 0; zwplug->zone_no = zno; - zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1); + zwplug->wp_offset = bdev_offset_from_zone_start(disk->part0, sector); bio_list_init(&zwplug->bio_list); INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work); zwplug->disk = disk; @@ -574,124 +599,81 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) } /* - * Abort (fail) all plugged BIOs of a zone write plug that are not aligned - * with the assumed write pointer location of the zone when the BIO will - * be unplugged. + * Set a zone write plug write pointer offset to the specified value. + * This aborts all plugged BIOs, which is fine as this function is called for + * a zone reset operation, a zone finish operation or if the zone needs a wp + * update from a report zone after a write error. */ -static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, - struct blk_zone_wplug *zwplug) -{ - unsigned int wp_offset = zwplug->wp_offset; - struct bio_list bl = BIO_EMPTY_LIST; - struct bio *bio; - - while ((bio = bio_list_pop(&zwplug->bio_list))) { - if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) || - (bio_op(bio) != REQ_OP_ZONE_APPEND && - bio_offset_from_zone_start(bio) != wp_offset)) { - blk_zone_wplug_bio_io_error(zwplug, bio); - continue; - } - - wp_offset += bio_sectors(bio); - bio_list_add(&bl, bio); - } - - bio_list_merge(&zwplug->bio_list, &bl); -} - -static inline void disk_zone_wplug_set_error(struct gendisk *disk, - struct blk_zone_wplug *zwplug) +static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, + struct blk_zone_wplug *zwplug, + unsigned int wp_offset) { - unsigned long flags; + lockdep_assert_held(&zwplug->lock); - if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) - return; + /* Update the zone write pointer and abort all plugged BIOs. */ + zwplug->flags &= ~BLK_ZONE_WPLUG_NEED_WP_UPDATE; + zwplug->wp_offset = wp_offset; + disk_zone_wplug_abort(zwplug); /* - * At this point, we already have a reference on the zone write plug. - * However, since we are going to add the plug to the disk zone write - * plugs work list, increase its reference count. This reference will - * be dropped in disk_zone_wplugs_work() once the error state is - * handled, or in disk_zone_wplug_clear_error() if the zone is reset or - * finished. + * The zone write plug now has no BIO plugged: remove it from the + * hash table so that it cannot be seen. The plug will be freed + * when the last reference is dropped. */ - zwplug->flags |= BLK_ZONE_WPLUG_ERROR; - refcount_inc(&zwplug->ref); - - spin_lock_irqsave(&disk->zone_wplugs_lock, flags); - list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list); - spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); + if (disk_should_remove_zone_wplug(disk, zwplug)) + disk_remove_zone_wplug(disk, zwplug); } -static inline void disk_zone_wplug_clear_error(struct gendisk *disk, - struct blk_zone_wplug *zwplug) +static unsigned int blk_zone_wp_offset(struct blk_zone *zone) { - unsigned long flags; - - if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) - return; - - /* - * We are racing with the error handling work which drops the reference - * on the zone write plug after handling the error state. So remove the - * plug from the error list and drop its reference count only if the - * error handling has not yet started, that is, if the zone write plug - * is still listed. - */ - spin_lock_irqsave(&disk->zone_wplugs_lock, flags); - if (!list_empty(&zwplug->link)) { - list_del_init(&zwplug->link); - zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; - disk_put_zone_wplug(zwplug); + switch (zone->cond) { + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + case BLK_ZONE_COND_CLOSED: + return zone->wp - zone->start; + case BLK_ZONE_COND_FULL: + return zone->len; + case BLK_ZONE_COND_EMPTY: + return 0; + case BLK_ZONE_COND_NOT_WP: + case BLK_ZONE_COND_OFFLINE: + case BLK_ZONE_COND_READONLY: + default: + /* + * Conventional, offline and read-only zones do not have a valid + * write pointer. + */ + return UINT_MAX; } - spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); } -/* - * Set a zone write plug write pointer offset to either 0 (zone reset case) - * or to the zone size (zone finish case). This aborts all plugged BIOs, which - * is fine to do as doing a zone reset or zone finish while writes are in-flight - * is a mistake from the user which will most likely cause all plugged BIOs to - * fail anyway. - */ -static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, - struct blk_zone_wplug *zwplug, - unsigned int wp_offset) +static void disk_zone_wplug_sync_wp_offset(struct gendisk *disk, + struct blk_zone *zone) { + struct blk_zone_wplug *zwplug; unsigned long flags; - spin_lock_irqsave(&zwplug->lock, flags); - - /* - * Make sure that a BIO completion or another zone reset or finish - * operation has not already removed the plug from the hash table. - */ - if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) { - spin_unlock_irqrestore(&zwplug->lock, flags); + zwplug = disk_get_zone_wplug(disk, zone->start); + if (!zwplug) return; - } - /* Update the zone write pointer and abort all plugged BIOs. */ - zwplug->wp_offset = wp_offset; - disk_zone_wplug_abort(zwplug); + spin_lock_irqsave(&zwplug->lock, flags); + if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) + disk_zone_wplug_set_wp_offset(disk, zwplug, + blk_zone_wp_offset(zone)); + spin_unlock_irqrestore(&zwplug->lock, flags); - /* - * Updating the write pointer offset puts back the zone - * in a good state. So clear the error flag and decrement the - * error count if we were in error state. - */ - disk_zone_wplug_clear_error(disk, zwplug); + disk_put_zone_wplug(zwplug); +} - /* - * The zone write plug now has no BIO plugged: remove it from the - * hash table so that it cannot be seen. The plug will be freed - * when the last reference is dropped. - */ - if (disk_should_remove_zone_wplug(disk, zwplug)) - disk_remove_zone_wplug(disk, zwplug); +static int disk_zone_sync_wp_offset(struct gendisk *disk, sector_t sector) +{ + struct disk_report_zones_cb_args args = { + .disk = disk, + }; - spin_unlock_irqrestore(&zwplug->lock, flags); + return disk->fops->report_zones(disk, sector, 1, + disk_report_zones_cb, &args); } static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, @@ -700,6 +682,7 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, struct gendisk *disk = bio->bi_bdev->bd_disk; sector_t sector = bio->bi_iter.bi_sector; struct blk_zone_wplug *zwplug; + unsigned long flags; /* Conventional zones cannot be reset nor finished. */ if (!bdev_zone_is_seq(bio->bi_bdev, sector)) { @@ -708,6 +691,15 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, } /* + * No-wait reset or finish BIOs do not make much sense as the callers + * issue these as blocking operations in most cases. To avoid issues + * the BIO execution potentially failing with BLK_STS_AGAIN, warn about + * REQ_NOWAIT being set and ignore that flag. + */ + if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) + bio->bi_opf &= ~REQ_NOWAIT; + + /* * If we have a zone write plug, set its write pointer offset to 0 * (reset case) or to the zone size (finish case). This will abort all * BIOs plugged for the target zone. It is fine as resetting or @@ -716,7 +708,9 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio, */ zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { + spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset); + spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } @@ -727,6 +721,7 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; struct blk_zone_wplug *zwplug; + unsigned long flags; sector_t sector; /* @@ -738,7 +733,9 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio) sector += disk->queue->limits.chunk_sectors) { zwplug = disk_get_zone_wplug(disk, sector); if (zwplug) { + spin_lock_irqsave(&zwplug->lock, flags); disk_zone_wplug_set_wp_offset(disk, zwplug, 0); + spin_unlock_irqrestore(&zwplug->lock, flags); disk_put_zone_wplug(zwplug); } } @@ -746,10 +743,26 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio) return false; } -static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug, - struct bio *bio, unsigned int nr_segs) +static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, + struct blk_zone_wplug *zwplug) { /* + * Take a reference on the zone write plug and schedule the submission + * of the next plugged BIO. blk_zone_wplug_bio_work() will release the + * reference we take here. + */ + WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); + refcount_inc(&zwplug->ref); + queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); +} + +static inline void disk_zone_wplug_add_bio(struct gendisk *disk, + struct blk_zone_wplug *zwplug, + struct bio *bio, unsigned int nr_segs) +{ + bool schedule_bio_work = false; + + /* * Grab an extra reference on the BIO request queue usage counter. * This reference will be reused to submit a request for the BIO for * blk-mq devices and dropped when the BIO is failed and after @@ -765,6 +778,16 @@ static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug, bio_clear_polled(bio); /* + * REQ_NOWAIT BIOs are always handled using the zone write plug BIO + * work, which can block. So clear the REQ_NOWAIT flag and schedule the + * work if this is the first BIO we are plugging. + */ + if (bio->bi_opf & REQ_NOWAIT) { + schedule_bio_work = !(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED); + bio->bi_opf &= ~REQ_NOWAIT; + } + + /* * Reuse the poll cookie field to store the number of segments when * split to the hardware limits. */ @@ -777,6 +800,11 @@ static inline void blk_zone_wplug_add_bio(struct blk_zone_wplug *zwplug, * at the tail of the list to preserve the sequential write order. */ bio_list_add(&zwplug->bio_list, bio); + + zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; + + if (schedule_bio_work) + disk_zone_wplug_schedule_bio_work(disk, zwplug); } /* @@ -890,12 +918,22 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, struct gendisk *disk = bio->bi_bdev->bd_disk; /* + * If we lost track of the zone write pointer due to a write error, + * the user must either execute a report zones, reset the zone or finish + * the to recover a reliable write pointer position. Fail BIOs if the + * user did not do that as we cannot handle emulated zone append + * otherwise. + */ + if (zwplug->flags & BLK_ZONE_WPLUG_NEED_WP_UPDATE) + return false; + + /* * Check that the user is not attempting to write to a full zone. * We know such BIO will fail, and that would potentially overflow our * write pointer offset beyond the end of the zone. */ if (disk_zone_wplug_is_full(disk, zwplug)) - goto err; + return false; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { /* @@ -914,24 +952,18 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, bio_set_flag(bio, BIO_EMULATES_ZONE_APPEND); } else { /* - * Check for non-sequential writes early because we avoid a - * whole lot of error handling trouble if we don't send it off - * to the driver. + * Check for non-sequential writes early as we know that BIOs + * with a start sector not unaligned to the zone write pointer + * will fail. */ if (bio_offset_from_zone_start(bio) != zwplug->wp_offset) - goto err; + return false; } /* Advance the zone write pointer offset. */ zwplug->wp_offset += bio_sectors(bio); return true; - -err: - /* We detected an invalid write BIO: schedule error recovery. */ - disk_zone_wplug_set_error(disk, zwplug); - kblockd_schedule_work(&disk->zone_wplugs_work); - return false; } static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) @@ -970,7 +1002,10 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) zwplug = disk_get_and_lock_zone_wplug(disk, sector, gfp_mask, &flags); if (!zwplug) { - bio_io_error(bio); + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); + else + bio_io_error(bio); return true; } @@ -978,18 +1013,20 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING); /* - * If the zone is already plugged or has a pending error, add the BIO - * to the plug BIO list. Otherwise, plug and let the BIO execute. + * If the zone is already plugged, add the BIO to the plug BIO list. + * Do the same for REQ_NOWAIT BIOs to ensure that we will not see a + * BLK_STS_AGAIN failure if we let the BIO execute. + * Otherwise, plug and let the BIO execute. */ - if (zwplug->flags & BLK_ZONE_WPLUG_BUSY) + if ((zwplug->flags & BLK_ZONE_WPLUG_PLUGGED) || + (bio->bi_opf & REQ_NOWAIT)) goto plug; - /* - * If an error is detected when preparing the BIO, add it to the BIO - * list so that error recovery can deal with it. - */ - if (!blk_zone_wplug_prepare_bio(zwplug, bio)) - goto plug; + if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { + spin_unlock_irqrestore(&zwplug->lock, flags); + bio_io_error(bio); + return true; + } zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; @@ -998,8 +1035,7 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs) return false; plug: - zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED; - blk_zone_wplug_add_bio(zwplug, bio, nr_segs); + disk_zone_wplug_add_bio(disk, zwplug, bio, nr_segs); spin_unlock_irqrestore(&zwplug->lock, flags); @@ -1083,19 +1119,6 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) } EXPORT_SYMBOL_GPL(blk_zone_plug_bio); -static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk, - struct blk_zone_wplug *zwplug) -{ - /* - * Take a reference on the zone write plug and schedule the submission - * of the next plugged BIO. blk_zone_wplug_bio_work() will release the - * reference we take here. - */ - WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED)); - refcount_inc(&zwplug->ref); - queue_work(disk->zone_wplugs_wq, &zwplug->bio_work); -} - static void disk_zone_wplug_unplug_bio(struct gendisk *disk, struct blk_zone_wplug *zwplug) { @@ -1103,16 +1126,6 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk, spin_lock_irqsave(&zwplug->lock, flags); - /* - * If we had an error, schedule error recovery. The recovery work - * will restart submission of plugged BIOs. - */ - if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) { - spin_unlock_irqrestore(&zwplug->lock, flags); - kblockd_schedule_work(&disk->zone_wplugs_work); - return; - } - /* Schedule submission of the next plugged BIO if we have one. */ if (!bio_list_empty(&zwplug->bio_list)) { disk_zone_wplug_schedule_bio_work(disk, zwplug); @@ -1155,12 +1168,13 @@ void blk_zone_write_plug_bio_endio(struct bio *bio) } /* - * If the BIO failed, mark the plug as having an error to trigger - * recovery. + * If the BIO failed, abort all plugged BIOs and mark the plug as + * needing a write pointer update. */ if (bio->bi_status != BLK_STS_OK) { spin_lock_irqsave(&zwplug->lock, flags); - disk_zone_wplug_set_error(disk, zwplug); + disk_zone_wplug_abort(zwplug); + zwplug->flags |= BLK_ZONE_WPLUG_NEED_WP_UPDATE; spin_unlock_irqrestore(&zwplug->lock, flags); } @@ -1216,6 +1230,7 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) */ spin_lock_irqsave(&zwplug->lock, flags); +again: bio = bio_list_pop(&zwplug->bio_list); if (!bio) { zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; @@ -1224,10 +1239,8 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) } if (!blk_zone_wplug_prepare_bio(zwplug, bio)) { - /* Error recovery will decide what to do with the BIO. */ - bio_list_add_head(&zwplug->bio_list, bio); - spin_unlock_irqrestore(&zwplug->lock, flags); - goto put_zwplug; + blk_zone_wplug_bio_io_error(zwplug, bio); + goto again; } spin_unlock_irqrestore(&zwplug->lock, flags); @@ -1249,120 +1262,6 @@ put_zwplug: disk_put_zone_wplug(zwplug); } -static unsigned int blk_zone_wp_offset(struct blk_zone *zone) -{ - switch (zone->cond) { - case BLK_ZONE_COND_IMP_OPEN: - case BLK_ZONE_COND_EXP_OPEN: - case BLK_ZONE_COND_CLOSED: - return zone->wp - zone->start; - case BLK_ZONE_COND_FULL: - return zone->len; - case BLK_ZONE_COND_EMPTY: - return 0; - case BLK_ZONE_COND_NOT_WP: - case BLK_ZONE_COND_OFFLINE: - case BLK_ZONE_COND_READONLY: - default: - /* - * Conventional, offline and read-only zones do not have a valid - * write pointer. - */ - return UINT_MAX; - } -} - -static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone, - unsigned int idx, void *data) -{ - struct blk_zone *zonep = data; - - *zonep = *zone; - return 0; -} - -static void disk_zone_wplug_handle_error(struct gendisk *disk, - struct blk_zone_wplug *zwplug) -{ - sector_t zone_start_sector = - bdev_zone_sectors(disk->part0) * zwplug->zone_no; - unsigned int noio_flag; - struct blk_zone zone; - unsigned long flags; - int ret; - - /* Get the current zone information from the device. */ - noio_flag = memalloc_noio_save(); - ret = disk->fops->report_zones(disk, zone_start_sector, 1, - blk_zone_wplug_report_zone_cb, &zone); - memalloc_noio_restore(noio_flag); - - spin_lock_irqsave(&zwplug->lock, flags); - - /* - * A zone reset or finish may have cleared the error already. In such - * case, do nothing as the report zones may have seen the "old" write - * pointer value before the reset/finish operation completed. - */ - if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) - goto unlock; - - zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; - - if (ret != 1) { - /* - * We failed to get the zone information, meaning that something - * is likely really wrong with the device. Abort all remaining - * plugged BIOs as otherwise we could endup waiting forever on - * plugged BIOs to complete if there is a queue freeze on-going. - */ - disk_zone_wplug_abort(zwplug); - goto unplug; - } - - /* Update the zone write pointer offset. */ - zwplug->wp_offset = blk_zone_wp_offset(&zone); - disk_zone_wplug_abort_unaligned(disk, zwplug); - - /* Restart BIO submission if we still have any BIO left. */ - if (!bio_list_empty(&zwplug->bio_list)) { - disk_zone_wplug_schedule_bio_work(disk, zwplug); - goto unlock; - } - -unplug: - zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; - if (disk_should_remove_zone_wplug(disk, zwplug)) - disk_remove_zone_wplug(disk, zwplug); - -unlock: - spin_unlock_irqrestore(&zwplug->lock, flags); -} - -static void disk_zone_wplugs_work(struct work_struct *work) -{ - struct gendisk *disk = - container_of(work, struct gendisk, zone_wplugs_work); - struct blk_zone_wplug *zwplug; - unsigned long flags; - - spin_lock_irqsave(&disk->zone_wplugs_lock, flags); - - while (!list_empty(&disk->zone_wplugs_err_list)) { - zwplug = list_first_entry(&disk->zone_wplugs_err_list, - struct blk_zone_wplug, link); - list_del_init(&zwplug->link); - spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); - - disk_zone_wplug_handle_error(disk, zwplug); - disk_put_zone_wplug(zwplug); - - spin_lock_irqsave(&disk->zone_wplugs_lock, flags); - } - - spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); -} - static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) { return 1U << disk->zone_wplugs_hash_bits; @@ -1371,8 +1270,6 @@ static inline unsigned int disk_zone_wplugs_hash_size(struct gendisk *disk) void disk_init_zone_resources(struct gendisk *disk) { spin_lock_init(&disk->zone_wplugs_lock); - INIT_LIST_HEAD(&disk->zone_wplugs_err_list); - INIT_WORK(&disk->zone_wplugs_work, disk_zone_wplugs_work); } /* @@ -1471,8 +1368,6 @@ void disk_free_zone_resources(struct gendisk *disk) if (!disk->zone_wplugs_pool) return; - cancel_work_sync(&disk->zone_wplugs_work); - if (disk->zone_wplugs_wq) { destroy_workqueue(disk->zone_wplugs_wq); disk->zone_wplugs_wq = NULL; @@ -1669,6 +1564,8 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, if (!disk->zone_wplugs_hash) return 0; + disk_zone_wplug_sync_wp_offset(disk, zone); + wp_offset = blk_zone_wp_offset(zone); if (!wp_offset || wp_offset >= zone->capacity) return 0; @@ -1799,6 +1696,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) memalloc_noio_restore(noio_flag); return ret; } + ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); if (!ret) { @@ -1835,6 +1733,48 @@ int blk_revalidate_disk_zones(struct gendisk *disk) } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); +/** + * blk_zone_issue_zeroout - zero-fill a block range in a zone + * @bdev: blockdev to write + * @sector: start sector + * @nr_sects: number of sectors to write + * @gfp_mask: memory allocation flags (for bio_alloc) + * + * Description: + * Zero-fill a block range in a zone (@sector must be equal to the zone write + * pointer), handling potential errors due to the (initially unknown) lack of + * hardware offload (See blkdev_issue_zeroout()). + */ +int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask) +{ + int ret; + + if (WARN_ON_ONCE(!bdev_is_zoned(bdev))) + return -EIO; + + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, + BLKDEV_ZERO_NOFALLBACK); + if (ret != -EOPNOTSUPP) + return ret; + + /* + * The failed call to blkdev_issue_zeroout() advanced the zone write + * pointer. Undo this using a report zone to update the zone write + * pointer to the correct current value. + */ + ret = disk_zone_sync_wp_offset(bdev->bd_disk, sector); + if (ret != 1) + return ret < 0 ? ret : -EIO; + + /* + * Retry without BLKDEV_ZERO_NOFALLBACK to force the fallback to a + * regular write with zero-pages. + */ + return blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask, 0); +} +EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout); + #ifdef CONFIG_BLK_DEBUG_FS int queue_zone_wplugs_show(void *data, struct seq_file *m) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 91b3789f710e7..5528347b5fcfc 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -698,8 +698,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, list_add(&rq->queuelist, &per_prio->dispatch); rq->fifo_time = jiffies; } else { - struct list_head *insert_before; - deadline_add_rq_rb(per_prio, rq); if (rq_mergeable(rq)) { @@ -712,8 +710,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * set expire time and add to fifo list */ rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; - insert_before = &per_prio->fifo_list[data_dir]; - list_add_tail(&rq->queuelist, insert_before); + list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]); } } diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c index 4d077fc960767..f68ffd338f483 100644 --- a/crypto/rsassa-pkcs1.c +++ b/crypto/rsassa-pkcs1.c @@ -163,10 +163,6 @@ static int rsassa_pkcs1_sign(struct crypto_sig *tfm, struct rsassa_pkcs1_inst_ctx *ictx = sig_instance_ctx(inst); const struct hash_prefix *hash_prefix = ictx->hash_prefix; struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); - unsigned int child_reqsize = crypto_akcipher_reqsize(ctx->child); - struct akcipher_request *child_req __free(kfree_sensitive) = NULL; - struct scatterlist in_sg[3], out_sg; - struct crypto_wait cwait; unsigned int pad_len; unsigned int ps_end; unsigned int len; @@ -187,37 +183,25 @@ static int rsassa_pkcs1_sign(struct crypto_sig *tfm, pad_len = ctx->key_size - slen - hash_prefix->size - 1; - child_req = kmalloc(sizeof(*child_req) + child_reqsize + pad_len, - GFP_KERNEL); - if (!child_req) - return -ENOMEM; - /* RFC 8017 sec 8.2.1 step 1 - EMSA-PKCS1-v1_5 encoding generation */ - in_buf = (u8 *)(child_req + 1) + child_reqsize; + in_buf = dst; + memmove(in_buf + pad_len + hash_prefix->size, src, slen); + memcpy(in_buf + pad_len, hash_prefix->data, hash_prefix->size); + ps_end = pad_len - 1; in_buf[0] = 0x01; memset(in_buf + 1, 0xff, ps_end - 1); in_buf[ps_end] = 0x00; - /* RFC 8017 sec 8.2.1 step 2 - RSA signature */ - crypto_init_wait(&cwait); - sg_init_table(in_sg, 3); - sg_set_buf(&in_sg[0], in_buf, pad_len); - sg_set_buf(&in_sg[1], hash_prefix->data, hash_prefix->size); - sg_set_buf(&in_sg[2], src, slen); - sg_init_one(&out_sg, dst, dlen); - akcipher_request_set_tfm(child_req, ctx->child); - akcipher_request_set_crypt(child_req, in_sg, &out_sg, - ctx->key_size - 1, dlen); - akcipher_request_set_callback(child_req, CRYPTO_TFM_REQ_MAY_SLEEP, - crypto_req_done, &cwait); - err = crypto_akcipher_decrypt(child_req); - err = crypto_wait_req(err, &cwait); - if (err) + /* RFC 8017 sec 8.2.1 step 2 - RSA signature */ + err = crypto_akcipher_sync_decrypt(ctx->child, in_buf, + ctx->key_size - 1, in_buf, + ctx->key_size); + if (err < 0) return err; - len = child_req->dst_len; + len = err; pad_len = ctx->key_size - len; /* Four billion to one */ @@ -239,8 +223,8 @@ static int rsassa_pkcs1_verify(struct crypto_sig *tfm, struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); unsigned int child_reqsize = crypto_akcipher_reqsize(ctx->child); struct akcipher_request *child_req __free(kfree_sensitive) = NULL; - struct scatterlist in_sg, out_sg; struct crypto_wait cwait; + struct scatterlist sg; unsigned int dst_len; unsigned int pos; u8 *out_buf; @@ -259,13 +243,12 @@ static int rsassa_pkcs1_verify(struct crypto_sig *tfm, return -ENOMEM; out_buf = (u8 *)(child_req + 1) + child_reqsize; + memcpy(out_buf, src, slen); crypto_init_wait(&cwait); - sg_init_one(&in_sg, src, slen); - sg_init_one(&out_sg, out_buf, ctx->key_size); + sg_init_one(&sg, out_buf, slen); akcipher_request_set_tfm(child_req, ctx->child); - akcipher_request_set_crypt(child_req, &in_sg, &out_sg, - slen, ctx->key_size); + akcipher_request_set_crypt(child_req, &sg, &sg, slen, slen); akcipher_request_set_callback(child_req, CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &cwait); diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 95f78383bbdba..bff2d099f4691 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -232,8 +232,6 @@ acpi_remove_address_space_handler(acpi_handle device, /* Now we can delete the handler object */ - acpi_os_release_mutex(handler_obj->address_space. - context_mutex); acpi_ut_remove_reference(handler_obj); goto unlock_and_exit; } diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 5429ec9ef06f0..a5d47819b3a4e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -454,8 +454,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (cmd_rc) *cmd_rc = -EINVAL; - if (cmd == ND_CMD_CALL) + if (cmd == ND_CMD_CALL) { + if (!buf || buf_len < sizeof(*call_pkg)) + return -EINVAL; + call_pkg = buf; + } + func = cmd_to_func(nfit_mem, cmd, call_pkg, &family); if (func < 0) return func; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7fe842dae1ec0..821867de43bea 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -250,6 +250,9 @@ static bool acpi_decode_space(struct resource_win *win, switch (addr->resource_type) { case ACPI_MEMORY_RANGE: acpi_dev_memresource_flags(res, len, wp); + + if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) + res->flags |= IORESOURCE_PREFETCH; break; case ACPI_IO_RANGE: acpi_dev_ioresource_flags(res, len, iodec, @@ -265,9 +268,6 @@ static bool acpi_decode_space(struct resource_win *win, if (addr->producer_consumer == ACPI_PRODUCER) res->flags |= IORESOURCE_WINDOW; - if (addr->info.mem.caching == ACPI_PREFETCHABLE_MEMORY) - res->flags |= IORESOURCE_PREFETCH; - return !(res->flags & IORESOURCE_DISABLED); } diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index b1b40e9551ded..c8c817c51230a 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -348,6 +348,7 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr) phy_nodes[phy] = phy_data.np; cphy_base[phy] = of_iomap(phy_nodes[phy], 0); if (cphy_base[phy] == NULL) { + of_node_put(phy_data.np); return 0; } phy_count += 1; diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 1b9b7bccdeff0..45e130b901eb5 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -192,7 +192,7 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->sqc) { - memcpy(&sqc, qm->sqc + qp_id * sizeof(struct qm_sqc), sizeof(struct qm_sqc)); + memcpy(&sqc, qm->sqc + qp_id, sizeof(struct qm_sqc)); sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &sqc, sizeof(struct qm_sqc), "SOFT SQC"); @@ -229,7 +229,7 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->cqc) { - memcpy(&cqc, qm->cqc + qp_id * sizeof(struct qm_cqc), sizeof(struct qm_cqc)); + memcpy(&cqc, qm->cqc + qp_id, sizeof(struct qm_cqc)); cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &cqc, sizeof(struct qm_cqc), "SOFT CQC"); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ddfbdb66b794d..5d356b7c45897 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3362,36 +3362,24 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt) static bool umc_ecc_enabled(struct amd64_pvt *pvt) { - u8 umc_en_mask = 0, ecc_en_mask = 0; - u16 nid = pvt->mc_node_id; struct amd64_umc *umc; - u8 ecc_en = 0, i; + bool ecc_en = false; + int i; + /* Check whether at least one UMC is enabled: */ for_each_umc(i) { umc = &pvt->umc[i]; - /* Only check enabled UMCs. */ - if (!(umc->sdp_ctrl & UMC_SDP_INIT)) - continue; - - umc_en_mask |= BIT(i); - - if (umc->umc_cap_hi & UMC_ECC_ENABLED) - ecc_en_mask |= BIT(i); + if (umc->sdp_ctrl & UMC_SDP_INIT && + umc->umc_cap_hi & UMC_ECC_ENABLED) { + ecc_en = true; + break; + } } - /* Check whether at least one UMC is enabled: */ - if (umc_en_mask) - ecc_en = umc_en_mask == ecc_en_mask; - else - edac_dbg(0, "Node %d: No enabled UMCs.\n", nid); - - edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled")); - if (!ecc_en) - return false; - else - return true; + return ecc_en; } static inline void diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c index eb17d03b66fec..dfda5ffc14db7 100644 --- a/drivers/firmware/arm_ffa/bus.c +++ b/drivers/firmware/arm_ffa/bus.c @@ -187,13 +187,18 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) return valid; } -struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, - const struct ffa_ops *ops) +struct ffa_device * +ffa_device_register(const struct ffa_partition_info *part_info, + const struct ffa_ops *ops) { int id, ret; + uuid_t uuid; struct device *dev; struct ffa_device *ffa_dev; + if (!part_info) + return NULL; + id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL); if (id < 0) return NULL; @@ -210,9 +215,11 @@ struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id); ffa_dev->id = id; - ffa_dev->vm_id = vm_id; + ffa_dev->vm_id = part_info->id; + ffa_dev->properties = part_info->properties; ffa_dev->ops = ops; - uuid_copy(&ffa_dev->uuid, uuid); + import_uuid(&uuid, (u8 *)part_info->uuid); + uuid_copy(&ffa_dev->uuid, &uuid); ret = device_register(&ffa_dev->dev); if (ret) { diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index b14cbdae94e82..2c2ec3c35f156 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -1387,7 +1387,6 @@ static struct notifier_block ffa_bus_nb = { static int ffa_setup_partitions(void) { int count, idx, ret; - uuid_t uuid; struct ffa_device *ffa_dev; struct ffa_dev_part_info *info; struct ffa_partition_info *pbuf, *tpbuf; @@ -1406,23 +1405,19 @@ static int ffa_setup_partitions(void) xa_init(&drv_info->partition_info); for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) { - import_uuid(&uuid, (u8 *)tpbuf->uuid); - /* Note that if the UUID will be uuid_null, that will require * ffa_bus_notifier() to find the UUID of this partition id * with help of ffa_device_match_uuid(). FF-A v1.1 and above * provides UUID here for each partition as part of the * discovery API and the same is passed. */ - ffa_dev = ffa_device_register(&uuid, tpbuf->id, &ffa_drv_ops); + ffa_dev = ffa_device_register(tpbuf, &ffa_drv_ops); if (!ffa_dev) { pr_err("%s: failed to register partition ID 0x%x\n", __func__, tpbuf->id); continue; } - ffa_dev->properties = tpbuf->properties; - if (drv_info->version > FFA_VERSION_1_0 && !(tpbuf->properties & FFA_PARTITION_AARCH64_EXEC)) ffa_mode_32bit_set(ffa_dev); diff --git a/drivers/firmware/arm_scmi/vendors/imx/Kconfig b/drivers/firmware/arm_scmi/vendors/imx/Kconfig index 2883ed24a84d6..a01bf5e47301d 100644 --- a/drivers/firmware/arm_scmi/vendors/imx/Kconfig +++ b/drivers/firmware/arm_scmi/vendors/imx/Kconfig @@ -15,6 +15,7 @@ config IMX_SCMI_BBM_EXT config IMX_SCMI_MISC_EXT tristate "i.MX SCMI MISC EXTENSION" depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF) + depends on IMX_SCMI_MISC_DRV default y if ARCH_MXC help This enables i.MX System MISC control logic such as gpio expander diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index e312d731f4a36..5fe61b9ab5f96 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -76,10 +76,6 @@ config EFI_ZBOOT bool "Enable the generic EFI decompressor" depends on EFI_GENERIC_STUB && !ARM select HAVE_KERNEL_GZIP - select HAVE_KERNEL_LZ4 - select HAVE_KERNEL_LZMA - select HAVE_KERNEL_LZO - select HAVE_KERNEL_XZ select HAVE_KERNEL_ZSTD help Create the bootable image as an EFI application that carries the diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 7a81c0ce47805..4bb7b0584bc90 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -75,8 +75,6 @@ static LIST_HEAD(entry_list); struct esre_attribute { struct attribute attr; ssize_t (*show)(struct esre_entry *entry, char *buf); - ssize_t (*store)(struct esre_entry *entry, - const char *buf, size_t count); }; static struct esre_entry *to_entry(struct kobject *kobj) diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 65ffd0b760b2f..48842b5c106b8 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -12,22 +12,16 @@ quiet_cmd_copy_and_pad = PAD $@ $(obj)/vmlinux.bin: $(obj)/$(EFI_ZBOOT_PAYLOAD) FORCE $(call if_changed,copy_and_pad) -comp-type-$(CONFIG_KERNEL_GZIP) := gzip -comp-type-$(CONFIG_KERNEL_LZ4) := lz4 -comp-type-$(CONFIG_KERNEL_LZMA) := lzma -comp-type-$(CONFIG_KERNEL_LZO) := lzo -comp-type-$(CONFIG_KERNEL_XZ) := xzkern -comp-type-$(CONFIG_KERNEL_ZSTD) := zstd22 - # in GZIP, the appended le32 carrying the uncompressed size is part of the # format, but in other cases, we just append it at the end for convenience, # causing the original tools to complain when checking image integrity. -# So disregard it when calculating the payload size in the zimage header. -zboot-method-y := $(comp-type-y)_with_size -zboot-size-len-y := 4 +comp-type-y := gzip +zboot-method-y := gzip +zboot-size-len-y := 0 -zboot-method-$(CONFIG_KERNEL_GZIP) := gzip -zboot-size-len-$(CONFIG_KERNEL_GZIP) := 0 +comp-type-$(CONFIG_KERNEL_ZSTD) := zstd +zboot-method-$(CONFIG_KERNEL_ZSTD) := zstd22_with_size +zboot-size-len-$(CONFIG_KERNEL_ZSTD) := 4 $(obj)/vmlinuz: $(obj)/vmlinux.bin FORCE $(call if_changed,$(zboot-method-y)) diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig index 477d3f32d99a6..907cd149c40a8 100644 --- a/drivers/firmware/imx/Kconfig +++ b/drivers/firmware/imx/Kconfig @@ -25,7 +25,6 @@ config IMX_SCU config IMX_SCMI_MISC_DRV tristate "IMX SCMI MISC Protocol driver" - depends on IMX_SCMI_MISC_EXT || COMPILE_TEST default y if ARCH_MXC help The System Controller Management Interface firmware (SCMI FW) is diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 56fee58e281e7..93ee3aa092f81 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -482,8 +482,9 @@ config GPIO_MT7621 Say yes here to support the Mediatek MT7621 SoC GPIO device. config GPIO_MVEBU - def_bool y + bool "Marvell Orion and EBU GPIO support" if COMPILE_TEST depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST + default PLAT_ORION || ARCH_MVEBU select GENERIC_IRQ_CHIP select REGMAP_MMIO diff --git a/drivers/gpio/gpio-graniterapids.c b/drivers/gpio/gpio-graniterapids.c index f2e911a3d2ca0..ad6a045fd3d2d 100644 --- a/drivers/gpio/gpio-graniterapids.c +++ b/drivers/gpio/gpio-graniterapids.c @@ -32,12 +32,14 @@ #define GNR_PINS_PER_REG 32 #define GNR_NUM_REGS DIV_ROUND_UP(GNR_NUM_PINS, GNR_PINS_PER_REG) -#define GNR_CFG_BAR 0x00 +#define GNR_CFG_PADBAR 0x00 #define GNR_CFG_LOCK_OFFSET 0x04 -#define GNR_GPI_STATUS_OFFSET 0x20 +#define GNR_GPI_STATUS_OFFSET 0x14 #define GNR_GPI_ENABLE_OFFSET 0x24 -#define GNR_CFG_DW_RX_MASK GENMASK(25, 22) +#define GNR_CFG_DW_HOSTSW_MODE BIT(27) +#define GNR_CFG_DW_RX_MASK GENMASK(23, 22) +#define GNR_CFG_DW_INTSEL_MASK GENMASK(21, 14) #define GNR_CFG_DW_RX_DISABLE FIELD_PREP(GNR_CFG_DW_RX_MASK, 2) #define GNR_CFG_DW_RX_EDGE FIELD_PREP(GNR_CFG_DW_RX_MASK, 1) #define GNR_CFG_DW_RX_LEVEL FIELD_PREP(GNR_CFG_DW_RX_MASK, 0) @@ -50,6 +52,7 @@ * struct gnr_gpio - Intel Granite Rapids-D vGPIO driver state * @gc: GPIO controller interface * @reg_base: base address of the GPIO registers + * @pad_base: base address of the vGPIO pad configuration registers * @ro_bitmap: bitmap of read-only pins * @lock: guard the registers * @pad_backup: backup of the register state for suspend @@ -57,6 +60,7 @@ struct gnr_gpio { struct gpio_chip gc; void __iomem *reg_base; + void __iomem *pad_base; DECLARE_BITMAP(ro_bitmap, GNR_NUM_PINS); raw_spinlock_t lock; u32 pad_backup[]; @@ -65,7 +69,7 @@ struct gnr_gpio { static void __iomem *gnr_gpio_get_padcfg_addr(const struct gnr_gpio *priv, unsigned int gpio) { - return priv->reg_base + gpio * sizeof(u32); + return priv->pad_base + gpio * sizeof(u32); } static int gnr_gpio_configure_line(struct gpio_chip *gc, unsigned int gpio, @@ -88,6 +92,20 @@ static int gnr_gpio_configure_line(struct gpio_chip *gc, unsigned int gpio, return 0; } +static int gnr_gpio_request(struct gpio_chip *gc, unsigned int gpio) +{ + struct gnr_gpio *priv = gpiochip_get_data(gc); + u32 dw; + + dw = readl(gnr_gpio_get_padcfg_addr(priv, gpio)); + if (!(dw & GNR_CFG_DW_HOSTSW_MODE)) { + dev_warn(gc->parent, "GPIO %u is not owned by host", gpio); + return -EBUSY; + } + + return 0; +} + static int gnr_gpio_get(struct gpio_chip *gc, unsigned int gpio) { const struct gnr_gpio *priv = gpiochip_get_data(gc); @@ -139,6 +157,7 @@ static int gnr_gpio_direction_output(struct gpio_chip *gc, unsigned int gpio, in static const struct gpio_chip gnr_gpio_chip = { .owner = THIS_MODULE, + .request = gnr_gpio_request, .get = gnr_gpio_get, .set = gnr_gpio_set, .get_direction = gnr_gpio_get_direction, @@ -166,7 +185,7 @@ static void gnr_gpio_irq_ack(struct irq_data *d) guard(raw_spinlock_irqsave)(&priv->lock); reg = readl(addr); - reg &= ~BIT(bit_idx); + reg |= BIT(bit_idx); writel(reg, addr); } @@ -209,10 +228,18 @@ static void gnr_gpio_irq_unmask(struct irq_data *d) static int gnr_gpio_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); - irq_hw_number_t pin = irqd_to_hwirq(d); - u32 mask = GNR_CFG_DW_RX_MASK; + struct gnr_gpio *priv = gpiochip_get_data(gc); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + u32 reg; u32 set; + /* Allow interrupts only if Interrupt Select field is non-zero */ + reg = readl(gnr_gpio_get_padcfg_addr(priv, hwirq)); + if (!(reg & GNR_CFG_DW_INTSEL_MASK)) { + dev_dbg(gc->parent, "GPIO %lu cannot be used as IRQ", hwirq); + return -EPERM; + } + /* Falling edge and level low triggers not supported by the GPIO controller */ switch (type) { case IRQ_TYPE_NONE: @@ -230,10 +257,11 @@ static int gnr_gpio_irq_set_type(struct irq_data *d, unsigned int type) return -EINVAL; } - return gnr_gpio_configure_line(gc, pin, mask, set); + return gnr_gpio_configure_line(gc, hwirq, GNR_CFG_DW_RX_MASK, set); } static const struct irq_chip gnr_gpio_irq_chip = { + .name = "gpio-graniterapids", .irq_ack = gnr_gpio_irq_ack, .irq_mask = gnr_gpio_irq_mask, .irq_unmask = gnr_gpio_irq_unmask, @@ -291,6 +319,7 @@ static int gnr_gpio_probe(struct platform_device *pdev) struct gnr_gpio *priv; void __iomem *regs; int irq, ret; + u32 offset; priv = devm_kzalloc(dev, struct_size(priv, pad_backup, num_backup_pins), GFP_KERNEL); if (!priv) @@ -302,6 +331,10 @@ static int gnr_gpio_probe(struct platform_device *pdev) if (IS_ERR(regs)) return PTR_ERR(regs); + priv->reg_base = regs; + offset = readl(priv->reg_base + GNR_CFG_PADBAR); + priv->pad_base = priv->reg_base + offset; + irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; @@ -311,8 +344,6 @@ static int gnr_gpio_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "failed to request interrupt\n"); - priv->reg_base = regs + readl(regs + GNR_CFG_BAR); - gnr_gpio_init_pin_ro_bits(dev, priv->reg_base + GNR_CFG_LOCK_OFFSET, priv->ro_bitmap); @@ -324,7 +355,6 @@ static int gnr_gpio_probe(struct platform_device *pdev) girq = &priv->gc.irq; gpio_irq_chip_set_chip(girq, &gnr_gpio_irq_chip); - girq->chip->name = dev_name(dev); girq->parent_handler = NULL; girq->num_parents = 0; girq->parents = NULL; diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c index 2c95125892972..0103be977c66b 100644 --- a/drivers/gpio/gpio-idio-16.c +++ b/drivers/gpio/gpio-idio-16.c @@ -3,6 +3,9 @@ * GPIO library for the ACCES IDIO-16 family * Copyright (C) 2022 William Breathitt Gray */ + +#define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" + #include <linux/bits.h> #include <linux/device.h> #include <linux/err.h> @@ -14,8 +17,6 @@ #include "gpio-idio-16.h" -#define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" - #define IDIO_16_DAT_BASE 0x0 #define IDIO_16_OUT_BASE IDIO_16_DAT_BASE #define IDIO_16_IN_BASE (IDIO_16_DAT_BASE + 1) diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 503d2441c58f2..817ecb12d5509 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -82,9 +82,9 @@ static int ljca_gpio_config(struct ljca_gpio_dev *ljca_gpio, u8 gpio_id, int ret; mutex_lock(&ljca_gpio->trans_lock); + packet->num = 1; packet->item[0].index = gpio_id; packet->item[0].value = config | ljca_gpio->connect_mode[gpio_id]; - packet->num = 1; ret = ljca_transfer(ljca_gpio->ljca, LJCA_GPIO_CONFIG, (u8 *)packet, struct_size(packet, item, packet->num), NULL, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d891ab779ca7f..5df21529b3b13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; + /* Make sure VRAM is allocated contigiously */ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - for (i = 0; i < (*bo)->placement.num_placement; i++) - (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; + if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && + !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { + + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; + } return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 96316111300a7..d272d95dd5b2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = { "LAST", }; -#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) /* * Default init level where all blocks are expected to be initialized. This is * the level of initialization expected by default and also after a full reset diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 31fd30dcd593b..65bb26215e867 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) for (i = 0; i < abo->placement.num_placement; ++i) { abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + if (abo->placements[i].mem_type == TTM_PL_VRAM) + abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8d9bf7a0857fd..ddd7f05e4db9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) - ring->funcs->emit_cleaner_shader(ring); - - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && + !(job->enforce_isolation && !job->vmid)) return 0; amdgpu_ring_ib_begin(ring); @@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e2b3dda57030c..54459254bd37b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -574,8 +576,12 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, { int err; - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_sjt_mec.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 079131aeb2f78..3c8ab8698af83 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = amdgpu_job_ring(job); unsigned i; /* No patching necessary for the first instance */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 7b826a136ceb9..e5324c5bc6c71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1423,6 +1423,7 @@ err: static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + bool cache_line_size_missing, struct kfd_gpu_cache_info *pcache_info) { struct amdgpu_device *adev = kdev->adev; @@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 64; i++; } /* GL1 Data Cache per SA */ @@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + if (cache_line_size_missing) + pcache_info[i].cache_line_size = 128; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* L3 Data Cache per GPU */ @@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + pcache_info[i].cache_line_size = 64; i++; } return i; @@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; + bool cache_line_size_missing = false; switch (kdev->adev->asic_type) { case CHIP_KAVERI: @@ -1692,10 +1703,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + /* Cacheline size not available in IP discovery for gc11. + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it + */ + cache_line_size_missing = true; + fallthrough; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, + cache_line_size_missing, + *pcache_info); break; default: *pcache_info = dummy_cache_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c79fe9069e220..16b5daaa272f1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (!down_read_trylock(&adev->reset_domain->sem)) return -EIO; + if (!pdd->proc_ctx_cpu_ptr) { + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (r) { + dev_err(adev->dev, + "failed to allocate process context bo\n"); + return r; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_id = qpd->pqm->process->pasid; queue_input.page_table_base_addr = qpd->page_table_base; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index eacfeb32f35d6..4b275937d05e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, src[i]); if (r) { dev_err(dev, "%s: fail %d dma_map_page\n", @@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, goto out_oom; } - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, dst[i]); if (r) { dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 87cd52cf4ee99..d0ee173acf824 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfd_free_process_doorbells(pdd->dev->kfd, pdd); - if (pdd->dev->kfd->shared_resources.enable_mes) + if (pdd->dev->kfd->shared_resources.enable_mes && + pdd->proc_ctx_cpu_ptr) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, &pdd->proc_ctx_bo); /* @@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, struct kfd_process *p) { struct kfd_process_device *pdd = NULL; - int retval = 0; if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) return NULL; @@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); - if (dev->kfd->shared_resources.enable_mes) { - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); - if (retval) { - dev_err(dev->adev->dev, - "failed to allocate process context bo\n"); - goto err_free_pdd; - } - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - } - p->pdds[p->n_pdds++] = pdd; if (kfd_dbg_is_per_vmid_supported(pdd->dev)) pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( @@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, idr_init(&pdd->alloc_idr); return pdd; - -err_free_pdd: - kfree(pdd); - return NULL; } /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c76db22a10005..59b92d66e9589 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -212,13 +212,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; - struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { - pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); - kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); - kfd_queue_release_buffers(pdd, &pqn->q->properties); + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, + pqm->process); + if (pdd) { + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); + } else { + WARN_ON(!pdd); + } pqm_clean_queue_resource(pqm, pqn); } diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 67a5de5739436..d7acdd42d80f6 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -164,6 +164,7 @@ enum amd_pp_task { }; enum PP_SMC_POWER_PROFILE { + PP_SMC_POWER_PROFILE_UNKNOWN = -1, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, PP_SMC_POWER_PROFILE_POWERSAVING = 0x2, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 5eae14fe79f17..21bd635bcdfc1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -764,6 +764,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block) smu->smu_baco.platform_support = false; smu->smu_baco.maco_support = false; smu->user_dpm_profile.fan_mode = -1; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN; mutex_init(&smu->message_lock); @@ -1248,6 +1249,21 @@ static bool smu_is_workload_profile_available(struct smu_context *smu, return smu->workload_map && smu->workload_map[profile].valid_mapping; } +static void smu_init_power_profile(struct smu_context *smu) +{ + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) { + if (smu->is_apu || + !smu_is_workload_profile_available( + smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + else + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu_power_profile_mode_get(smu, smu->power_profile_mode); +} + static int smu_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -1269,13 +1285,7 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - else - smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - smu_power_profile_mode_get(smu, smu->power_profile_mode); - + smu_init_power_profile(smu); smu->display_config = &adev->pm.pm_display_cfg; smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index f4ac403b8b360..aabb947960056 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2810,4 +2810,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) smu->workload_map = smu_v13_0_7_workload_map; smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; smu_v13_0_set_smu_mailbox_registers(smu); + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; } diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09500cddc009b..ef2d490965ba2 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -929,7 +929,6 @@ impl QrImage<'_> { /// * `tmp` must be valid for reading and writing for `tmp_size` bytes. /// /// They must remain valid for the duration of the function call. - #[no_mangle] pub unsafe extern "C" fn drm_panic_qr_generate( url: *const i8, diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 174753625bcad..7cd902bbd244e 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1343,6 +1343,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state, intel_de_write_fw(display, reg, val); } +static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state, + i915_reg_t reg, u32 val) +{ + struct intel_display *display = to_intel_display(crtc_state); + + if (crtc_state->dsb_color_vblank) + intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val); + else + intel_de_write_fw(display, reg, val); +} + static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, const struct drm_property_blob *blob) { @@ -1357,19 +1368,29 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, lut = blob->data; /* - * DSB fails to correctly load the legacy LUT - * unless we either write each entry twice, - * or use non-posted writes + * DSB fails to correctly load the legacy LUT unless + * we either write each entry twice when using posted + * writes, or we use non-posted writes. + * + * If palette anti-collision is active during LUT + * register writes: + * - posted writes simply get dropped and thus the LUT + * contents may not be correctly updated + * - non-posted writes are blocked and thus the LUT + * contents are always correct, but simultaneous CPU + * MMIO access will start to fail + * + * Choose the lesser of two evils and use posted writes. + * Using posted writes is also faster, even when having + * to write each register twice. */ - if (crtc_state->dsb_color_vblank) - intel_dsb_nonpost_start(crtc_state->dsb_color_vblank); - - for (i = 0; i < 256; i++) + for (i = 0; i < 256; i++) { ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i])); - - if (crtc_state->dsb_color_vblank) - intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); + if (crtc_state->dsb_color_vblank) + ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), + i9xx_lut_8(&lut[i])); + } } static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, @@ -1458,8 +1479,8 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state, prec_index); for (i = 0; i < lut_size; i++) - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_10(&lut[i])); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_10(&lut[i])); /* * Reset the index, otherwise it prevents the legacy palette to be @@ -1612,16 +1633,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state, * ToDo: Extend to max 7.0. Enable 32 bit input value * as compared to just 16 to achieve this. */ - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), - DISPLAY_VER(display) >= 14 ? - mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), + DISPLAY_VER(display) >= 14 ? + mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); } /* Clamp values > 1.0. */ while (i++ < glk_degamma_lut_size(display)) - ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), - DISPLAY_VER(display) >= 14 ? - 1 << 24 : 1 << 16); + ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe), + DISPLAY_VER(display) >= 14 ? + 1 << 24 : 1 << 16); ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0); } @@ -1687,10 +1708,10 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); } ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe), @@ -1726,10 +1747,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* @@ -1747,10 +1768,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), - ilk_lut_12p4_udw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } ilk_lut_write(crtc_state, PREC_PAL_INDEX(pipe), diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index b7b44399adaae..4d3785f5cb525 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -273,16 +273,20 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_ } /** - * intel_dsb_reg_write() - Emit register wriite to the DSB context + * intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context * @dsb: DSB context * @reg: register address. * @val: value. * * This function is used for writing register-value pair in command * buffer of DSB. + * + * Note that indexed writes are slower than normal MMIO writes + * for a small number (less than 5 or so) of writes to the same + * register. */ -void intel_dsb_reg_write(struct intel_dsb *dsb, - i915_reg_t reg, u32 val) +void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, + i915_reg_t reg, u32 val) { /* * For example the buffer will look like below for 3 dwords for auto @@ -340,6 +344,15 @@ void intel_dsb_reg_write(struct intel_dsb *dsb, } } +void intel_dsb_reg_write(struct intel_dsb *dsb, + i915_reg_t reg, u32 val) +{ + intel_dsb_emit(dsb, val, + (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + i915_mmio_reg_offset(reg)); +} + static u32 intel_dsb_mask_to_byte_en(u32 mask) { return (!!(mask & 0xff000000) << 3 | diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h index 33e0fc2ab3809..da6df07a3c839 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.h +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb); void intel_dsb_cleanup(struct intel_dsb *dsb); void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_reg_write_indexed(struct intel_dsb *dsb, + i915_reg_t reg, u32 val); void intel_dsb_reg_write_masked(struct intel_dsb *dsb, i915_reg_t reg, u32 mask, u32 val); void intel_dsb_noop(struct intel_dsb *dsb, int count); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4eb58887819a1..71c0daef19962 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1643,9 +1643,21 @@ capture_engine(struct intel_engine_cs *engine, return NULL; intel_engine_get_hung_entity(engine, &ce, &rq); - if (rq && !i915_request_started(rq)) - drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", - engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); + if (rq && !i915_request_started(rq)) { + /* + * We want to know also what is the guc_id of the context, + * but if we don't have the context reference, then skip + * printing it. + */ + if (ce) + drm_info(&engine->gt->i915->drm, + "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", + engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); + else + drm_info(&engine->gt->i915->drm, + "Got hung context on %s with active request %lld:%lld not yet started\n", + engine->name, rq->fence.context, rq->fence.seqno); + } if (rq) { capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 762127dd56c53..70a854557e6ec 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -506,6 +506,6 @@ int __init i915_scheduler_module_init(void) return 0; err_priorities: - kmem_cache_destroy(slab_priorities); + kmem_cache_destroy(slab_dependencies); return -ENOMEM; } diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 1a192a2a941b6..3bbdb362d6f0d 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -224,8 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_PINNED); if (IS_ERR(tiny)) { - KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", - PTR_ERR(pt)); + KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", + PTR_ERR(tiny)); goto free_pt; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 3cb228c773cd4..6146d1776bdac 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe __invalidation_fence_signal(xe, fence); } +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + if (WARN_ON_ONCE(!fence->gt)) + return; + + __invalidation_fence_signal(gt_to_xe(fence->gt), fence); +} + static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index f430d5797af70..00b1c6c01e8d9 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f27f579f4d85a..797576690356f 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1333,8 +1333,7 @@ static void invalidation_fence_cb(struct dma_fence *fence, queue_work(system_wq, &ifence->work); } else { ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); + xe_gt_tlb_invalidation_fence_signal(&ifence->base); } dma_fence_put(ifence->fence); } diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index e1a0e27cda14c..c13123008e903 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -27,46 +27,27 @@ #include "xe_reg_whitelist.h" #include "xe_rtp_types.h" -#define XE_REG_SR_GROW_STEP_DEFAULT 16 - static void reg_sr_fini(struct drm_device *drm, void *arg) { struct xe_reg_sr *sr = arg; + struct xe_reg_sr_entry *entry; + unsigned long reg; + + xa_for_each(&sr->xa, reg, entry) + kfree(entry); xa_destroy(&sr->xa); - kfree(sr->pool.arr); - memset(&sr->pool, 0, sizeof(sr->pool)); } int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) { xa_init(&sr->xa); - memset(&sr->pool, 0, sizeof(sr->pool)); - sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; sr->name = name; return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); } EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); -static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) -{ - if (sr->pool.used == sr->pool.allocated) { - struct xe_reg_sr_entry *arr; - - arr = krealloc_array(sr->pool.arr, - ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), - sizeof(*arr), GFP_KERNEL); - if (!arr) - return NULL; - - sr->pool.arr = arr; - sr->pool.allocated += sr->pool.grow_step; - } - - return &sr->pool.arr[sr->pool.used++]; -} - static bool compatible_entries(const struct xe_reg_sr_entry *e1, const struct xe_reg_sr_entry *e2) { @@ -112,7 +93,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, return 0; } - pentry = alloc_entry(sr); + pentry = kmalloc(sizeof(*pentry), GFP_KERNEL); if (!pentry) { ret = -ENOMEM; goto fail; diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index ad48a52b824a1..ebe11f237fa26 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -20,12 +20,6 @@ struct xe_reg_sr_entry { }; struct xe_reg_sr { - struct { - struct xe_reg_sr_entry *arr; - unsigned int used; - unsigned int allocated; - unsigned int grow_step; - } pool; struct xarray xa; const char *name; diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index efb33802804fd..d2877e4cc28d4 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -1075,6 +1075,7 @@ static const struct of_device_id nmk_i2c_eyeq_match_table[] = { .compatible = "mobileye,eyeq6h-i2c", .data = (void *)NMK_I2C_EYEQ_FLAG_32B_BUS, }, + { /* sentinel */ } }; static int nmk_i2c_probe(struct amba_device *adev, const struct amba_id *id) diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index d4d139b975131..9a1af5bbd604c 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -95,7 +95,7 @@ enum { static inline int wait_timeout(struct i2c_pnx_algo_data *data) { - long timeout = data->timeout; + long timeout = jiffies_to_msecs(data->timeout); while (timeout > 0 && (ioread32(I2C_REG_STS(data)) & mstatus_active)) { mdelay(1); @@ -106,7 +106,7 @@ static inline int wait_timeout(struct i2c_pnx_algo_data *data) static inline int wait_reset(struct i2c_pnx_algo_data *data) { - long timeout = data->timeout; + long timeout = jiffies_to_msecs(data->timeout); while (timeout > 0 && (ioread32(I2C_REG_CTL(data)) & mcntrl_reset)) { mdelay(1); diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index c218f73c36509..9264adc97ca9d 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -352,7 +352,7 @@ static int riic_init_hw(struct riic_dev *riic) if (brl <= (0x1F + 3)) break; - total_ticks /= 2; + total_ticks = DIV_ROUND_UP(total_ticks, 2); rate /= 2; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3f691e1fd22ce..16f40b8000d79 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1415,6 +1415,7 @@ static int domain_flush_pages_v2(struct protection_domain *pdom, struct iommu_cmd cmd; int ret = 0; + lockdep_assert_held(&pdom->lock); list_for_each_entry(dev_data, &pdom->dev_list, list) { struct amd_iommu *iommu = get_amd_iommu_from_dev(dev_data->dev); u16 domid = dev_data->gcr3_info.domid; @@ -1464,6 +1465,8 @@ static void __domain_flush_pages(struct protection_domain *domain, ioasid_t pasid = IOMMU_NO_PASID; bool gn = false; + lockdep_assert_held(&domain->lock); + if (pdom_is_v2_pgtbl_mode(domain)) { gn = true; ret = domain_flush_pages_v2(domain, address, size); @@ -1585,6 +1588,8 @@ void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; + lockdep_assert_held(&domain->lock); + list_for_each_entry(dev_data, &domain->dev_list, list) { struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); @@ -2073,6 +2078,7 @@ static int attach_device(struct device *dev, struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); struct pci_dev *pdev; + unsigned long flags; int ret = 0; mutex_lock(&dev_data->mutex); @@ -2113,7 +2119,9 @@ static int attach_device(struct device *dev, /* Update data structures */ dev_data->domain = domain; + spin_lock_irqsave(&domain->lock, flags); list_add(&dev_data->list, &domain->dev_list); + spin_unlock_irqrestore(&domain->lock, flags); /* Update device table */ dev_update_dte(dev_data, true); @@ -2160,6 +2168,7 @@ static void detach_device(struct device *dev) /* Flush IOTLB and wait for the flushes to finish */ spin_lock_irqsave(&domain->lock, flags); amd_iommu_domain_flush_all(domain); + list_del(&dev_data->list); spin_unlock_irqrestore(&domain->lock, flags); /* Clear GCR3 table */ @@ -2168,7 +2177,6 @@ static void detach_device(struct device *dev) /* Update data structures */ dev_data->domain = NULL; - list_del(&dev_data->list); /* decrease reference counters - needs to happen after the flushes */ pdom_detach_iommu(iommu, domain); diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index c8ec74f089f3d..6e41ddaa24d63 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -339,7 +339,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, * one CPU at a time can enter the process, while the others * will be spinning at the same lock. */ - lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + lidx = raw_smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; vcmdq = vintf->lvcmdqs[lidx]; if (!vcmdq || !READ_ONCE(vcmdq->enabled)) return NULL; diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index e5b89f728ad3b..09694cca8752d 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -105,12 +105,35 @@ static void cache_tag_unassign(struct dmar_domain *domain, u16 did, spin_unlock_irqrestore(&domain->cache_lock, flags); } +/* domain->qi_batch will be freed in iommu_free_domain() path. */ +static int domain_qi_batch_alloc(struct dmar_domain *domain) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&domain->cache_lock, flags); + if (domain->qi_batch) + goto out_unlock; + + domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC); + if (!domain->qi_batch) + ret = -ENOMEM; +out_unlock: + spin_unlock_irqrestore(&domain->cache_lock, flags); + + return ret; +} + static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; + ret = domain_qi_batch_alloc(domain); + if (ret) + return ret; + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); if (ret || !info->ats_enabled) return ret; @@ -139,6 +162,10 @@ static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; + ret = domain_qi_batch_alloc(domain); + if (ret) + return ret; + ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); if (ret || !info->ats_enabled) return ret; @@ -190,13 +217,6 @@ int cache_tag_assign_domain(struct dmar_domain *domain, u16 did = domain_get_id_for_dev(domain, dev); int ret; - /* domain->qi_bach will be freed in iommu_free_domain() path. */ - if (!domain->qi_batch) { - domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL); - if (!domain->qi_batch) - return -ENOMEM; - } - ret = __cache_tag_assign_domain(domain, did, dev, pasid); if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) return ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7d0acb74d5a54..79e0da9eb626c 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3220,6 +3220,9 @@ void device_block_translation(struct device *dev) struct intel_iommu *iommu = info->iommu; unsigned long flags; + if (info->domain) + cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); + iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) @@ -3236,7 +3239,6 @@ void device_block_translation(struct device *dev) list_del(&info->link); spin_unlock_irqrestore(&info->domain->lock, flags); - cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); domain_detach_iommu(info->domain, iommu); info->domain = NULL; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 0f2a926d3bd59..5b7d85f1e143c 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -265,7 +265,8 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); devtlb_invalidation_with_pasid(iommu, dev, pasid); - intel_iommu_drain_pasid_prq(dev, pasid); + if (!fault_ignore) + intel_iommu_drain_pasid_prq(dev, pasid); } /* diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 34db379d066a5..79d8cc80693c3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -161,7 +161,22 @@ static bool cpus_have_group0 __ro_after_init; static void __init gic_prio_init(void) { - cpus_have_security_disabled = gic_dist_security_disabled(); + bool ds; + + ds = gic_dist_security_disabled(); + if (!ds) { + u32 val; + + val = readl_relaxed(gic_data.dist_base + GICD_CTLR); + val |= GICD_CTLR_DS; + writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + + ds = gic_dist_security_disabled(); + if (ds) + pr_warn("Broken GIC integration, security disabled"); + } + + cpus_have_security_disabled = ds; cpus_have_group0 = gic_has_group0(); /* diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8fae6dc010241..6503573557fdf 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -64,7 +64,7 @@ static void gic_check_cpu_features(void) union gic_base { void __iomem *common_base; - void __percpu * __iomem *percpu_base; + void __iomem * __percpu *percpu_base; }; struct gic_chip_data { diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index d58db9a27e6cf..76e2c68685487 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -76,9 +76,9 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, * pointer and the requested position. */ nr_blocks = block - wp_block; - ret = blkdev_issue_zeroout(dev->bdev, - dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), - dmz_blk2sect(nr_blocks), GFP_NOIO, 0); + ret = blk_zone_issue_zeroout(dev->bdev, + dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block), + dmz_blk2sect(nr_blocks), GFP_NOIO); if (ret) { dmz_dev_err(dev, "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", diff --git a/drivers/platform/x86/dell/alienware-wmi.c b/drivers/platform/x86/dell/alienware-wmi.c index 77465ed9b449b..341d01d3e3e4c 100644 --- a/drivers/platform/x86/dell/alienware-wmi.c +++ b/drivers/platform/x86/dell/alienware-wmi.c @@ -190,7 +190,7 @@ static struct quirk_entry quirk_asm201 = { }; static struct quirk_entry quirk_g_series = { - .num_zones = 2, + .num_zones = 0, .hdmi_mux = 0, .amplifier = 0, .deepslp = 0, @@ -199,7 +199,7 @@ static struct quirk_entry quirk_g_series = { }; static struct quirk_entry quirk_x_series = { - .num_zones = 2, + .num_zones = 0, .hdmi_mux = 0, .amplifier = 0, .deepslp = 0, @@ -243,6 +243,15 @@ static const struct dmi_system_id alienware_quirks[] __initconst = { }, { .callback = dmi_matched, + .ident = "Alienware m16 R1 AMD", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), + }, + .driver_data = &quirk_x_series, + }, + { + .callback = dmi_matched, .ident = "Alienware m17 R5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), @@ -687,6 +696,9 @@ static void alienware_zone_exit(struct platform_device *dev) { u8 zone; + if (!quirks->num_zones) + return; + sysfs_remove_group(&dev->dev.kobj, &zone_attribute_group); led_classdev_unregister(&global_led); if (zone_dev_attrs) { @@ -1229,9 +1241,11 @@ static int __init alienware_wmi_init(void) goto fail_prep_thermal_profile; } - ret = alienware_zone_init(platform_device); - if (ret) - goto fail_prep_zones; + if (quirks->num_zones > 0) { + ret = alienware_zone_init(platform_device); + if (ret) + goto fail_prep_zones; + } return 0; diff --git a/drivers/platform/x86/intel/ifs/core.c b/drivers/platform/x86/intel/ifs/core.c index bc252b883210a..1ae50702bdb76 100644 --- a/drivers/platform/x86/intel/ifs/core.c +++ b/drivers/platform/x86/intel/ifs/core.c @@ -20,6 +20,7 @@ static const struct x86_cpu_id ifs_cpu_ids[] __initconst = { X86_MATCH(INTEL_GRANITERAPIDS_X, ARRAY_GEN0), X86_MATCH(INTEL_GRANITERAPIDS_D, ARRAY_GEN0), X86_MATCH(INTEL_ATOM_CRESTMONT_X, ARRAY_GEN1), + X86_MATCH(INTEL_ATOM_DARKMONT_X, ARRAY_GEN1), {} }; MODULE_DEVICE_TABLE(x86cpu, ifs_cpu_ids); diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index 4a85aad2475a7..8272f1dd0fbc0 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -423,6 +423,7 @@ static const struct intel_vsec_platform_info lnl_info = { #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d #define PCI_DEVICE_ID_INTEL_VSEC_LNL_M 0x647d +#define PCI_DEVICE_ID_INTEL_VSEC_PTL 0xb07d static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) }, @@ -432,6 +433,7 @@ static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_LNL_M, &lnl_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_PTL, &mtl_info) }, { } }; MODULE_DEVICE_TABLE(pci, intel_vsec_pci_ids); diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index d51eb0db06264..cbbb0f809704e 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -43,6 +43,7 @@ struct p2sb_res_cache { }; static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; +static bool p2sb_hidden_by_bios; static void p2sb_get_devfn(unsigned int *devfn) { @@ -97,6 +98,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + */ + pci_bus_write_config_dword(bus, devfn, P2SBC, 0); + /* Scan the P2SB device and cache its BAR0 */ p2sb_scan_and_cache_devfn(bus, devfn); @@ -104,6 +111,8 @@ static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) if (devfn == P2SB_DEVFN_GOLDMONT) p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); + pci_bus_write_config_dword(bus, devfn, P2SBC, P2SBC_HIDE); + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; @@ -129,7 +138,7 @@ static int p2sb_cache_resources(void) u32 value = P2SBC_HIDE; struct pci_bus *bus; u16 class; - int ret; + int ret = 0; /* Get devfn for P2SB device itself */ p2sb_get_devfn(&devfn_p2sb); @@ -152,22 +161,53 @@ static int p2sb_cache_resources(void) */ pci_lock_rescan_remove(); + pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); + p2sb_hidden_by_bios = value & P2SBC_HIDE; + /* - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * Unhide the P2SB device here, if needed. + * If the BIOS does not hide the P2SB device then its resources + * are accesilble. Cache them only if the P2SB device is hidden. */ - pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); + if (p2sb_hidden_by_bios) + ret = p2sb_scan_and_cache(bus, devfn_p2sb); - ret = p2sb_scan_and_cache(bus, devfn_p2sb); + pci_unlock_rescan_remove(); - /* Hide the P2SB device, if it was hidden */ - if (value & P2SBC_HIDE) - pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, P2SBC_HIDE); + return ret; +} - pci_unlock_rescan_remove(); +static int p2sb_read_from_cache(struct pci_bus *bus, unsigned int devfn, + struct resource *mem) +{ + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; + + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); + + return 0; +} + +static int p2sb_read_from_dev(struct pci_bus *bus, unsigned int devfn, + struct resource *mem) +{ + struct pci_dev *pdev; + int ret = 0; + + pdev = pci_get_slot(bus, devfn); + if (!pdev) + return -ENODEV; + + if (p2sb_valid_resource(pci_resource_n(pdev, 0))) + p2sb_read_bar0(pdev, mem); + else + ret = -ENOENT; + + pci_dev_put(pdev); return ret; } @@ -188,8 +228,6 @@ static int p2sb_cache_resources(void) */ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) { - struct p2sb_res_cache *cache; - bus = p2sb_get_bus(bus); if (!bus) return -ENODEV; @@ -197,15 +235,10 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) if (!devfn) p2sb_get_devfn(&devfn); - cache = &p2sb_resources[PCI_FUNC(devfn)]; - if (cache->bus_dev_id != bus->dev.id) - return -ENODEV; + if (p2sb_hidden_by_bios) + return p2sb_read_from_cache(bus, devfn, mem); - if (!p2sb_valid_resource(&cache->res)) - return -ENOENT; - - memcpy(mem, &cache->res, sizeof(*mem)); - return 0; + return p2sb_read_from_dev(bus, devfn, mem); } EXPORT_SYMBOL_GPL(p2sb_bar); diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0a39f68c641d1..bdc19cd8d3edf 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -855,6 +855,23 @@ static const struct ts_dmi_data rwc_nanote_next_data = { .properties = rwc_nanote_next_props, }; +static const struct property_entry sary_tab_3_props[] = { + PROPERTY_ENTRY_U32("touchscreen-size-x", 1730), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1151), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-sary-tab-3.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data sary_tab_3_data = { + .acpi_name = "MSSL1680:00", + .properties = sary_tab_3_props, +}; + static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -1616,6 +1633,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* SARY Tab 3 */ + .driver_data = (void *)&sary_tab_3_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SARY"), + DMI_MATCH(DMI_PRODUCT_NAME, "C210C"), + DMI_MATCH(DMI_PRODUCT_SKU, "TAB3"), + }, + }, + { /* Schneider SCT101CTM */ .driver_data = (void *)&schneider_sct101ctm_data, .matches = { diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index e3cc59b82ea61..dca99cfb7cbb8 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -371,8 +371,8 @@ .ops = &axp20x_ops, \ } -#define AXP_DESC(_family, _id, _match, _supply, _min, _max, _step, _vreg, \ - _vmask, _ereg, _emask) \ +#define AXP_DESC_DELAY(_family, _id, _match, _supply, _min, _max, _step, _vreg, \ + _vmask, _ereg, _emask, _ramp_delay) \ [_family##_##_id] = { \ .name = (_match), \ .supply_name = (_supply), \ @@ -388,9 +388,15 @@ .vsel_mask = (_vmask), \ .enable_reg = (_ereg), \ .enable_mask = (_emask), \ + .ramp_delay = (_ramp_delay), \ .ops = &axp20x_ops, \ } +#define AXP_DESC(_family, _id, _match, _supply, _min, _max, _step, _vreg, \ + _vmask, _ereg, _emask) \ + AXP_DESC_DELAY(_family, _id, _match, _supply, _min, _max, _step, _vreg, \ + _vmask, _ereg, _emask, 0) + #define AXP_DESC_SW(_family, _id, _match, _supply, _ereg, _emask) \ [_family##_##_id] = { \ .name = (_match), \ @@ -419,8 +425,8 @@ .ops = &axp20x_ops_fixed \ } -#define AXP_DESC_RANGES(_family, _id, _match, _supply, _ranges, _n_voltages, \ - _vreg, _vmask, _ereg, _emask) \ +#define AXP_DESC_RANGES_DELAY(_family, _id, _match, _supply, _ranges, _n_voltages, \ + _vreg, _vmask, _ereg, _emask, _ramp_delay) \ [_family##_##_id] = { \ .name = (_match), \ .supply_name = (_supply), \ @@ -436,9 +442,15 @@ .enable_mask = (_emask), \ .linear_ranges = (_ranges), \ .n_linear_ranges = ARRAY_SIZE(_ranges), \ + .ramp_delay = (_ramp_delay), \ .ops = &axp20x_ops_range, \ } +#define AXP_DESC_RANGES(_family, _id, _match, _supply, _ranges, _n_voltages, \ + _vreg, _vmask, _ereg, _emask) \ + AXP_DESC_RANGES_DELAY(_family, _id, _match, _supply, _ranges, \ + _n_voltages, _vreg, _vmask, _ereg, _emask, 0) + static const int axp209_dcdc2_ldo3_slew_rates[] = { 1600, 800, @@ -781,21 +793,21 @@ static const struct linear_range axp717_dcdc3_ranges[] = { }; static const struct regulator_desc axp717_regulators[] = { - AXP_DESC_RANGES(AXP717, DCDC1, "dcdc1", "vin1", + AXP_DESC_RANGES_DELAY(AXP717, DCDC1, "dcdc1", "vin1", axp717_dcdc1_ranges, AXP717_DCDC1_NUM_VOLTAGES, AXP717_DCDC1_CONTROL, AXP717_DCDC_V_OUT_MASK, - AXP717_DCDC_OUTPUT_CONTROL, BIT(0)), - AXP_DESC_RANGES(AXP717, DCDC2, "dcdc2", "vin2", + AXP717_DCDC_OUTPUT_CONTROL, BIT(0), 640), + AXP_DESC_RANGES_DELAY(AXP717, DCDC2, "dcdc2", "vin2", axp717_dcdc2_ranges, AXP717_DCDC2_NUM_VOLTAGES, AXP717_DCDC2_CONTROL, AXP717_DCDC_V_OUT_MASK, - AXP717_DCDC_OUTPUT_CONTROL, BIT(1)), - AXP_DESC_RANGES(AXP717, DCDC3, "dcdc3", "vin3", + AXP717_DCDC_OUTPUT_CONTROL, BIT(1), 640), + AXP_DESC_RANGES_DELAY(AXP717, DCDC3, "dcdc3", "vin3", axp717_dcdc3_ranges, AXP717_DCDC3_NUM_VOLTAGES, AXP717_DCDC3_CONTROL, AXP717_DCDC_V_OUT_MASK, - AXP717_DCDC_OUTPUT_CONTROL, BIT(2)), - AXP_DESC(AXP717, DCDC4, "dcdc4", "vin4", 1000, 3700, 100, + AXP717_DCDC_OUTPUT_CONTROL, BIT(2), 640), + AXP_DESC_DELAY(AXP717, DCDC4, "dcdc4", "vin4", 1000, 3700, 100, AXP717_DCDC4_CONTROL, AXP717_DCDC_V_OUT_MASK, - AXP717_DCDC_OUTPUT_CONTROL, BIT(3)), + AXP717_DCDC_OUTPUT_CONTROL, BIT(3), 6400), AXP_DESC(AXP717, ALDO1, "aldo1", "aldoin", 500, 3500, 100, AXP717_ALDO1_CONTROL, AXP717_LDO_V_OUT_MASK, AXP717_LDO0_OUTPUT_CONTROL, BIT(0)), diff --git a/drivers/spi/spi-aspeed-smc.c b/drivers/spi/spi-aspeed-smc.c index 8eb843ddb25f2..e9beae95ddeda 100644 --- a/drivers/spi/spi-aspeed-smc.c +++ b/drivers/spi/spi-aspeed-smc.c @@ -239,7 +239,7 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, offset, op->cmd.opcode); if (ret < 0) - return ret; + goto stop_user; if (op->dummy.buswidth && op->dummy.nbytes) { for (i = 0; i < op->dummy.nbytes / op->dummy.buswidth; i++) @@ -249,8 +249,9 @@ static ssize_t aspeed_spi_read_user(struct aspeed_spi_chip *chip, aspeed_spi_set_io_mode(chip, io_mode); aspeed_spi_read_from_ahb(buf, chip->ahb_base, len); +stop_user: aspeed_spi_stop_user(chip); - return 0; + return ret; } static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, @@ -261,10 +262,11 @@ static ssize_t aspeed_spi_write_user(struct aspeed_spi_chip *chip, aspeed_spi_start_user(chip); ret = aspeed_spi_send_cmd_addr(chip, op->addr.nbytes, op->addr.val, op->cmd.opcode); if (ret < 0) - return ret; + goto stop_user; aspeed_spi_write_to_ahb(chip->ahb_base, op->data.buf.out, op->data.nbytes); +stop_user: aspeed_spi_stop_user(chip); - return 0; + return ret; } /* support for 1-1-1, 1-1-2 or 1-1-4 */ diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 0b45b7b2b3ab3..a031ecb358e08 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -43,6 +43,7 @@ static_assert(CQSPI_MAX_CHIPSELECT <= SPI_CS_CNT_MAX); #define CQSPI_SLOW_SRAM BIT(4) #define CQSPI_NEEDS_APB_AHB_HAZARD_WAR BIT(5) #define CQSPI_RD_NO_IRQ BIT(6) +#define CQSPI_DISABLE_STIG_MODE BIT(7) /* Capabilities */ #define CQSPI_SUPPORTS_OCTAL BIT(0) @@ -103,6 +104,7 @@ struct cqspi_st { bool apb_ahb_hazard; bool is_jh7110; /* Flag for StarFive JH7110 SoC */ + bool disable_stig_mode; const struct cqspi_driver_platdata *ddata; }; @@ -1416,7 +1418,8 @@ static int cqspi_mem_process(struct spi_mem *mem, const struct spi_mem_op *op) * reads, prefer STIG mode for such small reads. */ if (!op->addr.nbytes || - op->data.nbytes <= CQSPI_STIG_DATA_LEN_MAX) + (op->data.nbytes <= CQSPI_STIG_DATA_LEN_MAX && + !cqspi->disable_stig_mode)) return cqspi_command_read(f_pdata, op); return cqspi_read(f_pdata, op); @@ -1880,6 +1883,8 @@ static int cqspi_probe(struct platform_device *pdev) if (ret) goto probe_reset_failed; } + if (ddata->quirks & CQSPI_DISABLE_STIG_MODE) + cqspi->disable_stig_mode = true; if (of_device_is_compatible(pdev->dev.of_node, "xlnx,versal-ospi-1.0")) { @@ -2043,7 +2048,8 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = { static const struct cqspi_driver_platdata socfpga_qspi = { .quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_NO_SUPPORT_WR_COMPLETION - | CQSPI_SLOW_SRAM, + | CQSPI_SLOW_SRAM + | CQSPI_DISABLE_STIG_MODE, }; static const struct cqspi_driver_platdata versal_ospi = { diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 864e581674173..1bc012fce7cb8 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -241,6 +241,20 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable) struct spi_controller *ctlr = spi->controller; struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); bool cs_asserted = spi->mode & SPI_CS_HIGH ? enable : !enable; + bool cs_actual; + + /* + * SPI subsystem tries to avoid no-op calls that would break the PM + * refcount below. It can't however for the first time it is used. + * To detect this case we read it here and bail out early for no-ops. + */ + if (spi_get_csgpiod(spi, 0)) + cs_actual = !!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & 1); + else + cs_actual = !!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & + BIT(spi_get_chipselect(spi, 0))); + if (unlikely(cs_actual == cs_asserted)) + return; if (cs_asserted) { /* Keep things powered as long as CS is asserted */ diff --git a/drivers/staging/gpib/Kconfig b/drivers/staging/gpib/Kconfig index 95308d15a5551..259f3ff336467 100644 --- a/drivers/staging/gpib/Kconfig +++ b/drivers/staging/gpib/Kconfig @@ -50,6 +50,7 @@ config GPIB_CEC_PCI tristate "CEC PCI board" depends on PCI depends on HAS_IOPORT + depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help @@ -62,6 +63,8 @@ config GPIB_CEC_PCI config GPIB_NI_PCI_ISA tristate "NI PCI/ISA compatible boards" depends on ISA_BUS || PCI || PCMCIA + depends on HAS_IOPORT + depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help @@ -85,6 +88,7 @@ config GPIB_CB7210 tristate "Measurement Computing compatible boards" depends on HAS_IOPORT depends on ISA_BUS || PCI || PCMCIA + depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help @@ -128,7 +132,7 @@ config GPIB_FMH tristate "FMH FPGA based devices" select GPIB_COMMON select GPIB_NEC7210 - depends on BROKEN + depends on !PPC depends on OF && PCI help GPIB driver for fmhess FPGA based devices @@ -174,6 +178,7 @@ config GPIB_INES tristate "INES" depends on PCI || ISA_BUS || PCMCIA depends on HAS_IOPORT + depends on !X86_PAE select GPIB_COMMON select GPIB_NEC7210 help diff --git a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c index 796c3a5be5451..267651a15fa00 100644 --- a/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c +++ b/drivers/staging/gpib/lpvo_usb_gpib/lpvo_usb_gpib.c @@ -901,7 +901,7 @@ static int usb_gpib_read(gpib_board_t *board, } else { /* we are in the closing <DLE><ETX> sequence */ - + c = nc; if (c == ETX) { c = one_char(board, &b); if (c == ACK) { diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 4d63d80e78a92..649e74e9b52f6 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -467,7 +467,8 @@ static void set_io_from_upio(struct uart_port *p) break; #endif default: - WARN(1, "Unsupported UART type %x\n", p->iotype); + WARN(p->iotype != UPIO_PORT || p->iobase, + "Unsupported UART type %x\n", p->iotype); p->serial_in = no_serial_in; p->serial_out = no_serial_out; } diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index df523c7444230..924b803af4400 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -157,6 +157,7 @@ struct sci_port { bool has_rtscts; bool autorts; + bool tx_occurred; }; #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS @@ -850,6 +851,7 @@ static void sci_transmit_chars(struct uart_port *port) { struct tty_port *tport = &port->state->port; unsigned int stopped = uart_tx_stopped(port); + struct sci_port *s = to_sci_port(port); unsigned short status; unsigned short ctrl; int count; @@ -885,6 +887,7 @@ static void sci_transmit_chars(struct uart_port *port) } sci_serial_out(port, SCxTDR, c); + s->tx_occurred = true; port->icount.tx++; } while (--count > 0); @@ -1241,6 +1244,8 @@ static void sci_dma_tx_complete(void *arg) if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) uart_write_wakeup(port); + s->tx_occurred = true; + if (!kfifo_is_empty(&tport->xmit_fifo)) { s->cookie_tx = 0; schedule_work(&s->work_tx); @@ -1731,6 +1736,19 @@ static void sci_flush_buffer(struct uart_port *port) s->cookie_tx = -EINVAL; } } + +static void sci_dma_check_tx_occurred(struct sci_port *s) +{ + struct dma_tx_state state; + enum dma_status status; + + if (!s->chan_tx) + return; + + status = dmaengine_tx_status(s->chan_tx, s->cookie_tx, &state); + if (status == DMA_COMPLETE || status == DMA_IN_PROGRESS) + s->tx_occurred = true; +} #else /* !CONFIG_SERIAL_SH_SCI_DMA */ static inline void sci_request_dma(struct uart_port *port) { @@ -1740,6 +1758,10 @@ static inline void sci_free_dma(struct uart_port *port) { } +static void sci_dma_check_tx_occurred(struct sci_port *s) +{ +} + #define sci_flush_buffer NULL #endif /* !CONFIG_SERIAL_SH_SCI_DMA */ @@ -2076,6 +2098,12 @@ static unsigned int sci_tx_empty(struct uart_port *port) { unsigned short status = sci_serial_in(port, SCxSR); unsigned short in_tx_fifo = sci_txfill(port); + struct sci_port *s = to_sci_port(port); + + sci_dma_check_tx_occurred(s); + + if (!s->tx_occurred) + return TIOCSER_TEMT; return (status & SCxSR_TEND(port)) && !in_tx_fifo ? TIOCSER_TEMT : 0; } @@ -2247,6 +2275,7 @@ static int sci_startup(struct uart_port *port) dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); + s->tx_occurred = false; sci_request_dma(port); ret = sci_request_irq(s); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index b7ec5797d5baa..8a01e4393159d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5556,6 +5556,7 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, lrbp = &hba->lrb[task_tag]; lrbp->compl_time_stamp = ktime_get(); + lrbp->compl_time_stamp_local_clock = local_clock(); cmd = lrbp->cmd; if (cmd) { if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 500dc35e64774..0b2490347b9fe 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2794,8 +2794,14 @@ int usb_add_hcd(struct usb_hcd *hcd, int retval; struct usb_device *rhdev; struct usb_hcd *shared_hcd; + int skip_phy_initialization; - if (!hcd->skip_phy_initialization) { + if (usb_hcd_is_primary_hcd(hcd)) + skip_phy_initialization = hcd->skip_phy_initialization; + else + skip_phy_initialization = hcd->primary_hcd->skip_phy_initialization; + + if (!skip_phy_initialization) { if (usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); if (IS_ERR(hcd->phy_roothub)) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index cb54390e7de48..8c3941ecaaf5d 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -3546,11 +3546,9 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq, port_status |= USB_PORT_STAT_C_OVERCURRENT << 16; } - if (!hsotg->flags.b.port_connect_status) { + if (dwc2_is_device_mode(hsotg)) { /* - * The port is disconnected, which means the core is - * either in device mode or it soon will be. Just - * return 0's for the remainder of the port status + * Just return 0's for the remainder of the port status * since the port register can't be read if the core * is in device mode. */ @@ -3620,13 +3618,11 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, u16 typereq, if (wvalue != USB_PORT_FEAT_TEST && (!windex || windex > 1)) goto error; - if (!hsotg->flags.b.port_connect_status) { + if (dwc2_is_device_mode(hsotg)) { /* - * The port is disconnected, which means the core is - * either in device mode or it soon will be. Just - * return without doing anything since the port - * register can't be written if the core is in device - * mode. + * Just return 0's for the remainder of the port status + * since the port register can't be read if the core + * is in device mode. */ break; } @@ -4349,7 +4345,7 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd) if (hsotg->bus_suspended) goto skip_power_saving; - if (hsotg->flags.b.port_connect_status == 0) + if (!(dwc2_read_hprt0(hsotg) & HPRT0_CONNSTS)) goto skip_power_saving; switch (hsotg->params.power_down) { @@ -4431,6 +4427,7 @@ static int _dwc2_hcd_resume(struct usb_hcd *hcd) * Power Down mode. */ if (hprt0 & HPRT0_CONNSTS) { + set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); hsotg->lx_state = DWC2_L0; goto unlock; } diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index 356812cbcd884..3edc5aca76f97 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -129,6 +129,16 @@ static void dwc3_imx8mp_wakeup_disable(struct dwc3_imx8mp *dwc3_imx) writel(val, dwc3_imx->hsio_blk_base + USB_WAKEUP_CTRL); } +static const struct property_entry dwc3_imx8mp_properties[] = { + PROPERTY_ENTRY_BOOL("xhci-missing-cas-quirk"), + PROPERTY_ENTRY_BOOL("xhci-skip-phy-init-quirk"), + {}, +}; + +static const struct software_node dwc3_imx8mp_swnode = { + .properties = dwc3_imx8mp_properties, +}; + static irqreturn_t dwc3_imx8mp_interrupt(int irq, void *_dwc3_imx) { struct dwc3_imx8mp *dwc3_imx = _dwc3_imx; @@ -148,17 +158,6 @@ static irqreturn_t dwc3_imx8mp_interrupt(int irq, void *_dwc3_imx) return IRQ_HANDLED; } -static int dwc3_imx8mp_set_software_node(struct device *dev) -{ - struct property_entry props[3] = { 0 }; - int prop_idx = 0; - - props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-missing-cas-quirk"); - props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-skip-phy-init-quirk"); - - return device_create_managed_software_node(dev, props, NULL); -} - static int dwc3_imx8mp_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -221,17 +220,17 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) if (err < 0) goto disable_rpm; - err = dwc3_imx8mp_set_software_node(dev); + err = device_add_software_node(dev, &dwc3_imx8mp_swnode); if (err) { err = -ENODEV; - dev_err(dev, "failed to create software node\n"); + dev_err(dev, "failed to add software node\n"); goto disable_rpm; } err = of_platform_populate(node, NULL, NULL, dev); if (err) { dev_err(&pdev->dev, "failed to create dwc3 core\n"); - goto disable_rpm; + goto remove_swnode; } dwc3_imx->dwc3 = of_find_device_by_node(dwc3_np); @@ -255,6 +254,8 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) depopulate: of_platform_depopulate(dev); +remove_swnode: + device_remove_software_node(dev); disable_rpm: pm_runtime_disable(dev); pm_runtime_put_noidle(dev); @@ -268,6 +269,7 @@ static void dwc3_imx8mp_remove(struct platform_device *pdev) pm_runtime_get_sync(dev); of_platform_depopulate(dev); + device_remove_software_node(dev); pm_runtime_disable(dev); pm_runtime_put_noidle(dev); diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index e3738e1610db2..a33a42ba0249a 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -121,8 +121,11 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) * in use but the usb3-phy entry is missing from the device tree. * Therefore, skip these operations in this case. */ - if (!priv_data->usb3_phy) + if (!priv_data->usb3_phy) { + /* Deselect the PIPE Clock Select bit in FPD PIPE Clock register */ + writel(PIPE_CLK_DESELECT, priv_data->regs + XLNX_USB_FPD_PIPE_CLK); goto skip_usb3_phy; + } crst = devm_reset_control_get_exclusive(dev, "usb_crst"); if (IS_ERR(crst)) { diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index ee3d9e3578f79..12e866fb311d6 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -1591,7 +1591,11 @@ static int f_midi2_create_card(struct f_midi2 *midi2) fb->info.midi_ci_version = b->midi_ci_version; fb->info.ui_hint = reverse_dir(b->ui_hint); fb->info.sysex8_streams = b->sysex8_streams; - fb->info.flags |= b->is_midi1; + if (b->is_midi1 < 2) + fb->info.flags |= b->is_midi1; + else + fb->info.flags |= SNDRV_UMP_BLOCK_IS_MIDI1 | + SNDRV_UMP_BLOCK_IS_LOWSPEED; strscpy(fb->info.name, ump_fb_name(b), sizeof(fb->info.name)); } diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 0a8c05b2746b4..53d9fc41acc52 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -579,9 +579,12 @@ static int gs_start_io(struct gs_port *port) * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { - gs_free_requests(ep, head, &port->read_allocated); - gs_free_requests(port->port_usb->in, &port->write_pool, - &port->write_allocated); + /* Free reqs only if we are still connected */ + if (port->port_usb) { + gs_free_requests(ep, head, &port->read_allocated); + gs_free_requests(port->port_usb->in, &port->write_pool, + &port->write_allocated); + } status = -EIO; } diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c index 5d0d972fb7b1b..2d23690d72c50 100644 --- a/drivers/usb/host/ehci-sh.c +++ b/drivers/usb/host/ehci-sh.c @@ -119,8 +119,12 @@ static int ehci_hcd_sh_probe(struct platform_device *pdev) if (IS_ERR(priv->iclk)) priv->iclk = NULL; - clk_enable(priv->fclk); - clk_enable(priv->iclk); + ret = clk_enable(priv->fclk); + if (ret) + goto fail_request_resource; + ret = clk_enable(priv->iclk); + if (ret) + goto fail_iclk; ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret != 0) { @@ -136,6 +140,7 @@ static int ehci_hcd_sh_probe(struct platform_device *pdev) fail_add_hcd: clk_disable(priv->iclk); +fail_iclk: clk_disable(priv->fclk); fail_request_resource: diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index 9fe4f48b18980..0881fdd1823e0 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -779,11 +779,17 @@ max3421_check_unlink(struct usb_hcd *hcd) retval = 1; dev_dbg(&spi->dev, "%s: URB %p unlinked=%d", __func__, urb, urb->unlinked); - usb_hcd_unlink_urb_from_ep(hcd, urb); - spin_unlock_irqrestore(&max3421_hcd->lock, - flags); - usb_hcd_giveback_urb(hcd, urb, 0); - spin_lock_irqsave(&max3421_hcd->lock, flags); + if (urb == max3421_hcd->curr_urb) { + max3421_hcd->urb_done = 1; + max3421_hcd->hien &= ~(BIT(MAX3421_HI_HXFRDN_BIT) | + BIT(MAX3421_HI_RCVDAV_BIT)); + } else { + usb_hcd_unlink_urb_from_ep(hcd, urb); + spin_unlock_irqrestore(&max3421_hcd->lock, + flags); + usb_hcd_giveback_urb(hcd, urb, 0); + spin_lock_irqsave(&max3421_hcd->lock, flags); + } } } } diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index 36b11127280f3..75ac3c6aa92d0 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -407,8 +407,10 @@ static int onboard_dev_probe(struct platform_device *pdev) } if (of_device_is_compatible(pdev->dev.of_node, "usb424,2744") || - of_device_is_compatible(pdev->dev.of_node, "usb424,5744")) + of_device_is_compatible(pdev->dev.of_node, "usb424,5744")) { err = onboard_dev_5744_i2c_init(client); + onboard_dev->always_powered_in_suspend = true; + } put_device(&client->dev); if (err < 0) diff --git a/drivers/usb/typec/anx7411.c b/drivers/usb/typec/anx7411.c index d1e7c487ddfbb..0ae0a5ee3fae0 100644 --- a/drivers/usb/typec/anx7411.c +++ b/drivers/usb/typec/anx7411.c @@ -290,6 +290,8 @@ struct anx7411_data { struct power_supply *psy; struct power_supply_desc psy_desc; struct device *dev; + struct fwnode_handle *switch_node; + struct fwnode_handle *mux_node; }; static u8 snk_identity[] = { @@ -1021,6 +1023,16 @@ static void anx7411_port_unregister_altmodes(struct typec_altmode **adev) } } +static void anx7411_port_unregister(struct typec_params *typecp) +{ + fwnode_handle_put(typecp->caps.fwnode); + anx7411_port_unregister_altmodes(typecp->port_amode); + if (typecp->port) + typec_unregister_port(typecp->port); + if (typecp->role_sw) + usb_role_switch_put(typecp->role_sw); +} + static int anx7411_usb_mux_set(struct typec_mux_dev *mux, struct typec_mux_state *state) { @@ -1089,6 +1101,7 @@ static void anx7411_unregister_mux(struct anx7411_data *ctx) if (ctx->typec.typec_mux) { typec_mux_unregister(ctx->typec.typec_mux); ctx->typec.typec_mux = NULL; + fwnode_handle_put(ctx->mux_node); } } @@ -1097,6 +1110,7 @@ static void anx7411_unregister_switch(struct anx7411_data *ctx) if (ctx->typec.typec_switch) { typec_switch_unregister(ctx->typec.typec_switch); ctx->typec.typec_switch = NULL; + fwnode_handle_put(ctx->switch_node); } } @@ -1104,28 +1118,29 @@ static int anx7411_typec_switch_probe(struct anx7411_data *ctx, struct device *dev) { int ret; - struct device_node *node; - node = of_get_child_by_name(dev->of_node, "orientation_switch"); - if (!node) + ctx->switch_node = device_get_named_child_node(dev, "orientation_switch"); + if (!ctx->switch_node) return 0; - ret = anx7411_register_switch(ctx, dev, &node->fwnode); + ret = anx7411_register_switch(ctx, dev, ctx->switch_node); if (ret) { dev_err(dev, "failed register switch"); + fwnode_handle_put(ctx->switch_node); return ret; } - node = of_get_child_by_name(dev->of_node, "mode_switch"); - if (!node) { + ctx->mux_node = device_get_named_child_node(dev, "mode_switch"); + if (!ctx->mux_node) { dev_err(dev, "no typec mux exist"); ret = -ENODEV; goto unregister_switch; } - ret = anx7411_register_mux(ctx, dev, &node->fwnode); + ret = anx7411_register_mux(ctx, dev, ctx->mux_node); if (ret) { dev_err(dev, "failed register mode switch"); + fwnode_handle_put(ctx->mux_node); ret = -ENODEV; goto unregister_switch; } @@ -1154,34 +1169,34 @@ static int anx7411_typec_port_probe(struct anx7411_data *ctx, ret = fwnode_property_read_string(fwnode, "power-role", &buf); if (ret) { dev_err(dev, "power-role not found: %d\n", ret); - return ret; + goto put_fwnode; } ret = typec_find_port_power_role(buf); if (ret < 0) - return ret; + goto put_fwnode; cap->type = ret; ret = fwnode_property_read_string(fwnode, "data-role", &buf); if (ret) { dev_err(dev, "data-role not found: %d\n", ret); - return ret; + goto put_fwnode; } ret = typec_find_port_data_role(buf); if (ret < 0) - return ret; + goto put_fwnode; cap->data = ret; ret = fwnode_property_read_string(fwnode, "try-power-role", &buf); if (ret) { dev_err(dev, "try-power-role not found: %d\n", ret); - return ret; + goto put_fwnode; } ret = typec_find_power_role(buf); if (ret < 0) - return ret; + goto put_fwnode; cap->prefer_role = ret; /* Get source pdos */ @@ -1193,7 +1208,7 @@ static int anx7411_typec_port_probe(struct anx7411_data *ctx, typecp->src_pdo_nr); if (ret < 0) { dev_err(dev, "source cap validate failed: %d\n", ret); - return -EINVAL; + goto put_fwnode; } typecp->caps_flags |= HAS_SOURCE_CAP; @@ -1207,7 +1222,7 @@ static int anx7411_typec_port_probe(struct anx7411_data *ctx, typecp->sink_pdo_nr); if (ret < 0) { dev_err(dev, "sink cap validate failed: %d\n", ret); - return -EINVAL; + goto put_fwnode; } for (i = 0; i < typecp->sink_pdo_nr; i++) { @@ -1251,13 +1266,21 @@ static int anx7411_typec_port_probe(struct anx7411_data *ctx, ret = PTR_ERR(ctx->typec.port); ctx->typec.port = NULL; dev_err(dev, "Failed to register type c port %d\n", ret); - return ret; + goto put_usb_role_switch; } typec_port_register_altmodes(ctx->typec.port, NULL, ctx, ctx->typec.port_amode, MAX_ALTMODE); return 0; + +put_usb_role_switch: + if (ctx->typec.role_sw) + usb_role_switch_put(ctx->typec.role_sw); +put_fwnode: + fwnode_handle_put(fwnode); + + return ret; } static int anx7411_typec_check_connection(struct anx7411_data *ctx) @@ -1523,8 +1546,7 @@ free_wq: destroy_workqueue(plat->workqueue); free_typec_port: - typec_unregister_port(plat->typec.port); - anx7411_port_unregister_altmodes(plat->typec.port_amode); + anx7411_port_unregister(&plat->typec); free_typec_switch: anx7411_unregister_switch(plat); @@ -1548,17 +1570,11 @@ static void anx7411_i2c_remove(struct i2c_client *client) i2c_unregister_device(plat->spi_client); - if (plat->typec.role_sw) - usb_role_switch_put(plat->typec.role_sw); - anx7411_unregister_mux(plat); anx7411_unregister_switch(plat); - if (plat->typec.port) - typec_unregister_port(plat->typec.port); - - anx7411_port_unregister_altmodes(plat->typec.port_amode); + anx7411_port_unregister(&plat->typec); } static const struct i2c_device_id anx7411_id[] = { diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index c435c0835744a..fcf499cc9458c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -46,11 +46,11 @@ void ucsi_notify_common(struct ucsi *ucsi, u32 cci) ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci)); if (cci & UCSI_CCI_ACK_COMPLETE && - test_bit(ACK_PENDING, &ucsi->flags)) + test_and_clear_bit(ACK_PENDING, &ucsi->flags)) complete(&ucsi->complete); if (cci & UCSI_CCI_COMMAND_COMPLETE && - test_bit(COMMAND_PENDING, &ucsi->flags)) + test_and_clear_bit(COMMAND_PENDING, &ucsi->flags)) complete(&ucsi->complete); } EXPORT_SYMBOL_GPL(ucsi_notify_common); @@ -65,6 +65,8 @@ int ucsi_sync_control_common(struct ucsi *ucsi, u64 command) else set_bit(COMMAND_PENDING, &ucsi->flags); + reinit_completion(&ucsi->complete); + ret = ucsi->ops->async_control(ucsi, command); if (ret) goto out_clear_bit; @@ -651,7 +653,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient) static int ucsi_get_connector_status(struct ucsi_connector *con, bool conn_ack) { u64 command = UCSI_GET_CONNECTOR_STATUS | UCSI_CONNECTOR_NUMBER(con->num); - size_t size = min(UCSI_GET_CONNECTOR_STATUS_SIZE, UCSI_MAX_DATA_LENGTH(con->ucsi)); + size_t size = min(sizeof(con->status), + UCSI_MAX_DATA_LENGTH(con->ucsi)); int ret; ret = ucsi_send_command_common(con->ucsi, command, &con->status, size, conn_ack); diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 586446e02ef72..ec23da8405ff8 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -51,7 +51,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb, * * VariableName-12345678-1234-1234-1234-1234567891bc */ -bool efivarfs_valid_name(const char *str, int len) +static bool efivarfs_valid_name(const char *str, int len) { const char *s = str + len - EFI_VARIABLE_GUID_LEN; diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index d71d2e08422f0..74f0602a9e016 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -60,7 +60,6 @@ bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, extern const struct file_operations efivarfs_file_operations; extern const struct inode_operations efivarfs_dir_inode_operations; -extern bool efivarfs_valid_name(const char *str, int len); extern struct inode *efivarfs_get_inode(struct super_block *sb, const struct inode *dir, int mode, dev_t dev, bool is_removable); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a929f1b613be8..beba15673be8d 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -144,9 +144,6 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) const unsigned char *s = qstr->name; unsigned int len = qstr->len; - if (!efivarfs_valid_name(s, len)) - return -EINVAL; - while (len-- > EFI_VARIABLE_GUID_LEN) hash = partial_name_hash(*s++, hash); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index c9f9b6e979648..9d96b833015c8 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -2043,6 +2043,7 @@ exit_cifs(void) destroy_workqueue(decrypt_wq); destroy_workqueue(fileinfo_put_wq); destroy_workqueue(serverclose_wq); + destroy_workqueue(cfid_put_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 668098d3b108e..f146e06c97eb6 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1947,6 +1947,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } + netfs_wait_for_outstanding_io(inode); cifs_close_deferred_file_under_dentry(tcon, full_path); #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & @@ -2464,8 +2465,10 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir, } cifs_close_deferred_file_under_dentry(tcon, from_name); - if (d_inode(target_dentry) != NULL) + if (d_inode(target_dentry) != NULL) { + netfs_wait_for_outstanding_io(d_inode(target_dentry)); cifs_close_deferred_file_under_dentry(tcon, to_name); + } rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 50b1aba200089..d88b41133e00c 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -676,7 +676,7 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf return -ENOMEM; symname_utf16_len = utf8s_to_utf16s(buf->PathBuffer, symname_utf8_len, UTF16_LITTLE_ENDIAN, - symname_utf16, symname_utf8_len * 2); + (wchar_t *) symname_utf16, symname_utf8_len * 2); if (symname_utf16_len < 0) { kfree(symname_utf16); return symname_utf16_len; diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 0bb77f9ec6861..3306fb6551368 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -488,11 +488,11 @@ cifs_ses_add_channel(struct cifs_ses *ses, if (iface->sockaddr.ss_family == AF_INET) cifs_dbg(FYI, "adding channel to ses %p (speed:%zu bps rdma:%s ip:%pI4)\n", - ses, iface->speed, iface->rdma_capable ? "yes" : "no", + ses, iface->speed, str_yes_no(iface->rdma_capable), &ipv4->sin_addr); else cifs_dbg(FYI, "adding channel to ses %p (speed:%zu bps rdma:%s ip:%pI6)\n", - ses, iface->speed, iface->rdma_capable ? "yes" : "no", + ses, iface->speed, str_yes_no(iface->rdma_capable), &ipv6->sin6_addr); /* diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2b5fc5fd16435..68ee1c299c25f 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -3557,14 +3557,31 @@ xfs_btree_insrec( xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); /* - * If we just inserted into a new tree block, we have to - * recalculate nkey here because nkey is out of date. + * Update btree keys to reflect the newly added record or keyptr. + * There are three cases here to be aware of. Normally, all we have to + * do is walk towards the root, updating keys as necessary. * - * Otherwise we're just updating an existing block (having shoved - * some records into the new tree block), so use the regular key - * update mechanism. + * If the caller had us target a full block for the insertion, we dealt + * with that by calling the _make_block_unfull function. If the + * "make unfull" function splits the block, it'll hand us back the key + * and pointer of the new block. We haven't yet added the new block to + * the next level up, so if we decide to add the new record to the new + * block (bp->b_bn != old_bn), we have to update the caller's pointer + * so that the caller adds the new block with the correct key. + * + * However, there is a third possibility-- if the selected block is the + * root block of an inode-rooted btree and cannot be expanded further, + * the "make unfull" function moves the root block contents to a new + * block and updates the root block to point to the new block. In this + * case, no block pointer is passed back because the block has already + * been added to the btree. In this case, we need to use the regular + * key update function, just like the first case. This is critical for + * overlapping btrees, because the high key must be updated to reflect + * the entire tree, not just the subtree accessible through the first + * child of the root (which is now two levels down from the root). */ - if (bp && xfs_buf_daddr(bp) != old_bn) { + if (!xfs_btree_ptr_is_null(cur, &nptr) && + bp && xfs_buf_daddr(bp) != old_bn) { xfs_btree_get_keys(cur, block, lkey); } else if (xfs_btree_needs_key_update(cur, optr)) { error = xfs_btree_update_keys(cur, level); @@ -5144,7 +5161,7 @@ xfs_btree_count_blocks_helper( int level, void *data) { - xfs_extlen_t *blocks = data; + xfs_filblks_t *blocks = data; (*blocks)++; return 0; @@ -5154,7 +5171,7 @@ xfs_btree_count_blocks_helper( int xfs_btree_count_blocks( struct xfs_btree_cur *cur, - xfs_extlen_t *blocks) + xfs_filblks_t *blocks) { *blocks = 0; return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 3b739459ebb0f..c5bff273cae25 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -484,7 +484,7 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, xfs_btree_visit_blocks_fn fn, unsigned int flags, void *data); -int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks); +int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_filblks_t *blocks); union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n, struct xfs_btree_block *block); diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 9b34896dd1a32..6f270d8f4270c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -744,6 +744,7 @@ xfs_finobt_count_blocks( { struct xfs_buf *agbp = NULL; struct xfs_btree_cur *cur; + xfs_filblks_t blocks; int error; error = xfs_ialloc_read_agi(pag, tp, 0, &agbp); @@ -751,9 +752,10 @@ xfs_finobt_count_blocks( return error; cur = xfs_finobt_init_cursor(pag, tp, agbp); - error = xfs_btree_count_blocks(cur, tree_blocks); + error = xfs_btree_count_blocks(cur, &blocks); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); + *tree_blocks = blocks; return error; } diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c index e74bb059f24fa..4f3bfc884aff2 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.c +++ b/fs/xfs/libxfs/xfs_rtgroup.c @@ -496,7 +496,7 @@ xfs_rtginode_create( error = xfs_metadir_create(&upd, S_IFREG); if (error) - return error; + goto out_cancel; xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index a809513a290cf..3b5623611eba0 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -494,12 +494,13 @@ xfs_validate_sb_common( return -EINVAL; } - if (!sbp->sb_spino_align || - sbp->sb_spino_align > sbp->sb_inoalignmt || - (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) { + if (sbp->sb_spino_align && + (sbp->sb_spino_align > sbp->sb_inoalignmt || + (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) { xfs_warn(mp, - "Sparse inode alignment (%u) is invalid.", - sbp->sb_spino_align); +"Sparse inode alignment (%u) is invalid, must be integer factor of (%u).", + sbp->sb_spino_align, + sbp->sb_inoalignmt); return -EINVAL; } } else if (sbp->sb_spino_align) { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index f228127a88ff2..fb47a76ead18c 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -92,8 +92,10 @@ xfs_symlink_verify( struct xfs_mount *mp = bp->b_mount; struct xfs_dsymlink_hdr *dsl = bp->b_addr; + /* no verification of non-crc buffers */ if (!xfs_has_crc(mp)) - return __this_address; + return NULL; + if (!xfs_verify_magic(bp, dsl->sl_magic)) return __this_address; if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid)) diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 61f80a6410c73..9f8c312dfd3c8 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -60,6 +60,32 @@ xchk_superblock_xref( } /* + * Calculate the ondisk superblock size in bytes given the feature set of the + * mounted filesystem (aka the primary sb). This is subtlely different from + * the logic in xfs_repair, which computes the size of a secondary sb given the + * featureset listed in the secondary sb. + */ +STATIC size_t +xchk_superblock_ondisk_size( + struct xfs_mount *mp) +{ + if (xfs_has_metadir(mp)) + return offsetofend(struct xfs_dsb, sb_pad); + if (xfs_has_metauuid(mp)) + return offsetofend(struct xfs_dsb, sb_meta_uuid); + if (xfs_has_crc(mp)) + return offsetofend(struct xfs_dsb, sb_lsn); + if (xfs_sb_version_hasmorebits(&mp->m_sb)) + return offsetofend(struct xfs_dsb, sb_bad_features2); + if (xfs_has_logv2(mp)) + return offsetofend(struct xfs_dsb, sb_logsunit); + if (xfs_has_sector(mp)) + return offsetofend(struct xfs_dsb, sb_logsectsize); + /* only support dirv2 or more recent */ + return offsetofend(struct xfs_dsb, sb_dirblklog); +} + +/* * Scrub the filesystem superblock. * * Note: We do /not/ attempt to check AG 0's superblock. Mount is @@ -75,6 +101,7 @@ xchk_superblock( struct xfs_buf *bp; struct xfs_dsb *sb; struct xfs_perag *pag; + size_t sblen; xfs_agnumber_t agno; uint32_t v2_ok; __be32 features_mask; @@ -145,8 +172,11 @@ xchk_superblock( xchk_block_set_preen(sc, bp); if (xfs_has_metadir(sc->mp)) { - if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) - xchk_block_set_preen(sc, bp); + if (sb->sb_rbmino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + + if (sb->sb_rsumino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); } else { if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) xchk_block_set_preen(sc, bp); @@ -229,7 +259,13 @@ xchk_superblock( * sb_icount, sb_ifree, sb_fdblocks, sb_frexents */ - if (!xfs_has_metadir(mp)) { + if (xfs_has_metadir(mp)) { + if (sb->sb_uquotino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + + if (sb->sb_gquotino != cpu_to_be64(0)) + xchk_block_set_preen(sc, bp); + } else { if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino)) xchk_block_set_preen(sc, bp); @@ -281,15 +317,8 @@ xchk_superblock( if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok))) xchk_block_set_corrupt(sc, bp); - if (xfs_has_metadir(mp)) { - if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog) - xchk_block_set_corrupt(sc, bp); - if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad))) - xchk_block_set_preen(sc, bp); - } else { - if (sb->sb_features2 != sb->sb_bad_features2) - xchk_block_set_preen(sc, bp); - } + if (sb->sb_features2 != sb->sb_bad_features2) + xchk_block_set_preen(sc, bp); } /* Check sb_features2 flags that are set at mkfs time. */ @@ -351,7 +380,10 @@ xchk_superblock( if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align)) xchk_block_set_corrupt(sc, bp); - if (!xfs_has_metadir(mp)) { + if (xfs_has_metadir(mp)) { + if (sb->sb_pquotino != cpu_to_be64(0)) + xchk_block_set_corrupt(sc, bp); + } else { if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino)) xchk_block_set_preen(sc, bp); } @@ -366,16 +398,25 @@ xchk_superblock( } if (xfs_has_metadir(mp)) { + if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) + xchk_block_set_preen(sc, bp); + if (sb->sb_rgcount != cpu_to_be32(mp->m_sb.sb_rgcount)) xchk_block_set_corrupt(sc, bp); if (sb->sb_rgextents != cpu_to_be32(mp->m_sb.sb_rgextents)) xchk_block_set_corrupt(sc, bp); + + if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog) + xchk_block_set_corrupt(sc, bp); + + if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad))) + xchk_block_set_corrupt(sc, bp); } /* Everything else must be zero. */ - if (memchr_inv(sb + 1, 0, - BBTOB(bp->b_length) - sizeof(struct xfs_dsb))) + sblen = xchk_superblock_ondisk_size(mp); + if (memchr_inv((char *)sb + sblen, 0, BBTOB(bp->b_length) - sblen)) xchk_block_set_corrupt(sc, bp); xchk_superblock_xref(sc, bp); @@ -458,7 +499,7 @@ xchk_agf_xref_btreeblks( { struct xfs_agf *agf = sc->sa.agf_bp->b_addr; struct xfs_mount *mp = sc->mp; - xfs_agblock_t blocks; + xfs_filblks_t blocks; xfs_agblock_t btreeblks; int error; @@ -507,7 +548,7 @@ xchk_agf_xref_refcblks( struct xfs_scrub *sc) { struct xfs_agf *agf = sc->sa.agf_bp->b_addr; - xfs_agblock_t blocks; + xfs_filblks_t blocks; int error; if (!sc->sa.refc_cur) @@ -840,7 +881,7 @@ xchk_agi_xref_fiblocks( struct xfs_scrub *sc) { struct xfs_agi *agi = sc->sa.agi_bp->b_addr; - xfs_agblock_t blocks; + xfs_filblks_t blocks; int error = 0; if (!xfs_has_inobtcounts(sc->mp)) diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0fad0baaba2f6..b45d2b32051a6 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -256,7 +256,7 @@ xrep_agf_calc_from_btrees( struct xfs_agf *agf = agf_bp->b_addr; struct xfs_mount *mp = sc->mp; xfs_agblock_t btreeblks; - xfs_agblock_t blocks; + xfs_filblks_t blocks; int error; /* Update the AGF counters from the bnobt. */ @@ -946,7 +946,7 @@ xrep_agi_calc_from_btrees( if (error) goto err; if (xfs_has_inobtcounts(mp)) { - xfs_agblock_t blocks; + xfs_filblks_t blocks; error = xfs_btree_count_blocks(cur, &blocks); if (error) @@ -959,7 +959,7 @@ xrep_agi_calc_from_btrees( agi->agi_freecount = cpu_to_be32(freecount); if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) { - xfs_agblock_t blocks; + xfs_filblks_t blocks; cur = xfs_finobt_init_cursor(sc->sa.pag, sc->tp, agi_bp); error = xfs_btree_count_blocks(cur, &blocks); diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 4a50f8e000409..ca23cf4db6c5e 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -261,7 +261,7 @@ xchk_fscount_btreeblks( struct xchk_fscounters *fsc, xfs_agnumber_t agno) { - xfs_extlen_t blocks; + xfs_filblks_t blocks; int error; error = xchk_ag_init_existing(sc, agno, &sc->sa); diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index ce86bdad37fa4..ccc6ca5934ca6 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -71,7 +71,8 @@ /* Map our scrub type to a sick mask and a set of health update functions. */ enum xchk_health_group { - XHG_FS = 1, + XHG_NONE = 1, + XHG_FS, XHG_AG, XHG_INO, XHG_RTGROUP, @@ -83,6 +84,7 @@ struct xchk_health_map { }; static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { + [XFS_SCRUB_TYPE_PROBE] = { XHG_NONE, 0 }, [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB }, [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF }, [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL }, @@ -133,7 +135,7 @@ xchk_mark_healthy_if_clean( { if (!(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | XFS_SCRUB_OFLAG_XCORRUPT))) - sc->sick_mask |= mask; + sc->healthy_mask |= mask; } /* @@ -189,6 +191,7 @@ xchk_update_health( { struct xfs_perag *pag; struct xfs_rtgroup *rtg; + unsigned int mask = sc->sick_mask; bool bad; /* @@ -203,50 +206,56 @@ xchk_update_health( return; } - if (!sc->sick_mask) - return; - bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | XFS_SCRUB_OFLAG_XCORRUPT)); + if (!bad) + mask |= sc->healthy_mask; switch (type_to_health_flag[sc->sm->sm_type].group) { + case XHG_NONE: + break; case XHG_AG: + if (!mask) + return; pag = xfs_perag_get(sc->mp, sc->sm->sm_agno); if (bad) - xfs_group_mark_corrupt(pag_group(pag), sc->sick_mask); + xfs_group_mark_corrupt(pag_group(pag), mask); else - xfs_group_mark_healthy(pag_group(pag), sc->sick_mask); + xfs_group_mark_healthy(pag_group(pag), mask); xfs_perag_put(pag); break; case XHG_INO: if (!sc->ip) return; - if (bad) { - unsigned int mask = sc->sick_mask; - - /* - * If we're coming in for repairs then we don't want - * sickness flags to propagate to the incore health - * status if the inode gets inactivated before we can - * fix it. - */ - if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) - mask |= XFS_SICK_INO_FORGET; + /* + * If we're coming in for repairs then we don't want sickness + * flags to propagate to the incore health status if the inode + * gets inactivated before we can fix it. + */ + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) + mask |= XFS_SICK_INO_FORGET; + if (!mask) + return; + if (bad) xfs_inode_mark_corrupt(sc->ip, mask); - } else - xfs_inode_mark_healthy(sc->ip, sc->sick_mask); + else + xfs_inode_mark_healthy(sc->ip, mask); break; case XHG_FS: + if (!mask) + return; if (bad) - xfs_fs_mark_corrupt(sc->mp, sc->sick_mask); + xfs_fs_mark_corrupt(sc->mp, mask); else - xfs_fs_mark_healthy(sc->mp, sc->sick_mask); + xfs_fs_mark_healthy(sc->mp, mask); break; case XHG_RTGROUP: + if (!mask) + return; rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno); if (bad) - xfs_group_mark_corrupt(rtg_group(rtg), sc->sick_mask); + xfs_group_mark_corrupt(rtg_group(rtg), mask); else - xfs_group_mark_healthy(rtg_group(rtg), sc->sick_mask); + xfs_group_mark_healthy(rtg_group(rtg), mask); xfs_rtgroup_put(rtg); break; default: diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index abad54c3621d4..4dc7c83dc08a4 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -650,8 +650,8 @@ xchk_iallocbt_xref_rmap_btreeblks( struct xfs_scrub *sc) { xfs_filblks_t blocks; - xfs_extlen_t inobt_blocks = 0; - xfs_extlen_t finobt_blocks = 0; + xfs_filblks_t inobt_blocks = 0; + xfs_filblks_t finobt_blocks = 0; int error; if (!sc->sa.ino_cur || !sc->sa.rmap_cur || diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c index b78db65134651..c678cba1ffc3f 100644 --- a/fs/xfs/scrub/metapath.c +++ b/fs/xfs/scrub/metapath.c @@ -171,23 +171,13 @@ static int xchk_setup_metapath_quotadir( struct xfs_scrub *sc) { - struct xfs_trans *tp; - struct xfs_inode *dp = NULL; - int error; - - error = xfs_trans_alloc_empty(sc->mp, &tp); - if (error) - return error; + struct xfs_quotainfo *qi = sc->mp->m_quotainfo; - error = xfs_dqinode_load_parent(tp, &dp); - xfs_trans_cancel(tp); - if (error) - return error; + if (!qi || !qi->qi_dirip) + return -ENOENT; - error = xchk_setup_metapath_scan(sc, sc->mp->m_metadirip, - kasprintf(GFP_KERNEL, "quota"), dp); - xfs_irele(dp); - return error; + return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip, + kstrdup("quota", GFP_KERNEL), qi->qi_dirip); } /* Scan a quota inode under the /quota directory. */ @@ -196,37 +186,31 @@ xchk_setup_metapath_dqinode( struct xfs_scrub *sc, xfs_dqtype_t type) { - struct xfs_trans *tp = NULL; - struct xfs_inode *dp = NULL; + struct xfs_quotainfo *qi = sc->mp->m_quotainfo; struct xfs_inode *ip = NULL; - const char *path; - int error; - - error = xfs_trans_alloc_empty(sc->mp, &tp); - if (error) - return error; - error = xfs_dqinode_load_parent(tp, &dp); - if (error) - goto out_cancel; - - error = xfs_dqinode_load(tp, dp, type, &ip); - if (error) - goto out_dp; - - xfs_trans_cancel(tp); - tp = NULL; + if (!qi) + return -ENOENT; - path = kasprintf(GFP_KERNEL, "%s", xfs_dqinode_path(type)); - error = xchk_setup_metapath_scan(sc, dp, path, ip); + switch (type) { + case XFS_DQTYPE_USER: + ip = qi->qi_uquotaip; + break; + case XFS_DQTYPE_GROUP: + ip = qi->qi_gquotaip; + break; + case XFS_DQTYPE_PROJ: + ip = qi->qi_pquotaip; + break; + default: + ASSERT(0); + return -EINVAL; + } + if (!ip) + return -ENOENT; - xfs_irele(ip); -out_dp: - xfs_irele(dp); -out_cancel: - if (tp) - xfs_trans_cancel(tp); - return error; + return xchk_setup_metapath_scan(sc, qi->qi_dirip, + kstrdup(xfs_dqinode_path(type), GFP_KERNEL), ip); } #else # define xchk_setup_metapath_quotadir(...) (-ENOENT) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 2b6be75e94241..1c5e45cc64190 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -491,7 +491,7 @@ xchk_refcount_xref_rmap( struct xfs_scrub *sc, xfs_filblks_t cow_blocks) { - xfs_extlen_t refcbt_blocks = 0; + xfs_filblks_t refcbt_blocks = 0; xfs_filblks_t blocks; int error; diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index a7fda3e2b0137..5dbbe93cb49bf 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -184,6 +184,12 @@ struct xfs_scrub { */ unsigned int sick_mask; + /* + * Clear these XFS_SICK_* flags but only if the scan is ok. Useful for + * removing ZAPPED flags after a repair. + */ + unsigned int healthy_mask; + /* next time we want to cond_resched() */ struct xchk_relax relax; diff --git a/fs/xfs/scrub/symlink_repair.c b/fs/xfs/scrub/symlink_repair.c index d015a86ef460f..953ce7be78dc2 100644 --- a/fs/xfs/scrub/symlink_repair.c +++ b/fs/xfs/scrub/symlink_repair.c @@ -36,6 +36,7 @@ #include "scrub/tempfile.h" #include "scrub/tempexch.h" #include "scrub/reap.h" +#include "scrub/health.h" /* * Symbolic Link Repair @@ -233,7 +234,7 @@ xrep_symlink_salvage( * target zapped flag. */ if (buflen == 0) { - sc->sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; + xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_SYMLINK_ZAPPED); sprintf(target_buf, DUMMY_TARGET); } diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 4b7f7860e37ec..2d7ca7e1bbca0 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -184,11 +184,18 @@ out_release_dquots: } /* + * Move sc->tempip from the regular directory tree to the metadata directory + * tree if sc->ip is part of the metadata directory tree and tempip has an + * eligible file mode. + * * Temporary files have to be created before we even know which inode we're * going to scrub, so we assume that they will be part of the regular directory * tree. If it turns out that we're actually scrubbing a file from the * metadata directory tree, we have to subtract the temp file from the root - * dquots and detach the dquots. + * dquots and detach the dquots prior to setting the METADATA iflag. However, + * the scrub setup functions grab sc->ip and create sc->tempip before we + * actually get around to checking if the file mode is the right type for the + * scrubber. */ int xrep_tempfile_adjust_directory_tree( @@ -204,6 +211,9 @@ xrep_tempfile_adjust_directory_tree( if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) return 0; + if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) && + !S_ISREG(VFS_I(sc->tempip)->i_mode)) + return 0; xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; @@ -223,6 +233,7 @@ xrep_tempfile_adjust_directory_tree( if (error) goto out_ilock; + xfs_iflags_set(sc->tempip, XFS_IRECOVERY); xfs_qm_dqdetach(sc->tempip); out_ilock: xrep_tempfile_iunlock(sc); @@ -246,6 +257,8 @@ xrep_tempfile_remove_metadir( ASSERT(sc->tp == NULL); + xfs_iflags_clear(sc->tempip, XFS_IRECOVERY); + xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; @@ -945,10 +958,13 @@ xrep_is_tempfile( /* * Files in the metadata directory tree also have S_PRIVATE set and - * IOP_XATTR unset, so we must distinguish them separately. + * IOP_XATTR unset, so we must distinguish them separately. We (ab)use + * the IRECOVERY flag to mark temporary metadir inodes knowing that the + * end of log recovery clears IRECOVERY, so the only ones that can + * exist during online repair are the ones we create. */ if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) - return false; + return __xfs_iflags_test(ip, XFS_IRECOVERY); if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) return true; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 9b38f5ad1eaf0..d2ae7e93acb08 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -605,7 +605,7 @@ TRACE_EVENT(xchk_ifork_btree_op_error, TP_fast_assign( xfs_fsblock_t fsbno = xchk_btree_cur_fsbno(cur, level); __entry->dev = sc->mp->m_super->s_dev; - __entry->ino = sc->ip->i_ino; + __entry->ino = cur->bc_ino.ip->i_ino; __entry->whichfork = cur->bc_ino.whichfork; __entry->type = sc->sm->sm_type; __assign_str(name); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index a59bbe767a7dc..0836fea2d6d81 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -103,7 +103,7 @@ xfs_bmap_count_blocks( struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_btree_cur *cur; - xfs_extlen_t btblocks = 0; + xfs_filblks_t btblocks = 0; int error; *nextents = 0; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index ff982d983989b..f11d475898f28 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -69,6 +69,30 @@ xfs_dquot_mark_sick( } /* + * Detach the dquot buffer if it's still attached, because we can get called + * through dqpurge after a log shutdown. Caller must hold the dqflock or have + * otherwise isolated the dquot. + */ +void +xfs_dquot_detach_buf( + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_buf *bp = NULL; + + spin_lock(&qlip->qli_lock); + if (qlip->qli_item.li_buf) { + bp = qlip->qli_item.li_buf; + qlip->qli_item.li_buf = NULL; + } + spin_unlock(&qlip->qli_lock); + if (bp) { + list_del_init(&qlip->qli_item.li_bio_list); + xfs_buf_rele(bp); + } +} + +/* * This is called to free all the memory associated with a dquot */ void @@ -76,6 +100,7 @@ xfs_qm_dqdestroy( struct xfs_dquot *dqp) { ASSERT(list_empty(&dqp->q_lru)); + ASSERT(dqp->q_logitem.qli_item.li_buf == NULL); kvfree(dqp->q_logitem.qli_item.li_lv_shadow); mutex_destroy(&dqp->q_qlock); @@ -1142,9 +1167,11 @@ static void xfs_qm_dqflush_done( struct xfs_log_item *lip) { - struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; - struct xfs_dquot *dqp = qip->qli_dquot; + struct xfs_dq_logitem *qlip = + container_of(lip, struct xfs_dq_logitem, qli_item); + struct xfs_dquot *dqp = qlip->qli_dquot; struct xfs_ail *ailp = lip->li_ailp; + struct xfs_buf *bp = NULL; xfs_lsn_t tail_lsn; /* @@ -1156,12 +1183,12 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && - ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_lsn == qlip->qli_flush_lsn || test_bit(XFS_LI_FAILED, &lip->li_flags))) { spin_lock(&ailp->ail_lock); xfs_clear_li_failed(lip); - if (lip->li_lsn == qip->qli_flush_lsn) { + if (lip->li_lsn == qlip->qli_flush_lsn) { /* xfs_ail_update_finish() drops the AIL lock */ tail_lsn = xfs_ail_delete_one(ailp, lip); xfs_ail_update_finish(ailp, tail_lsn); @@ -1171,6 +1198,20 @@ xfs_qm_dqflush_done( } /* + * If this dquot hasn't been dirtied since initiating the last dqflush, + * release the buffer reference. We already unlinked this dquot item + * from the buffer. + */ + spin_lock(&qlip->qli_lock); + if (!qlip->qli_dirty) { + bp = lip->li_buf; + lip->li_buf = NULL; + } + spin_unlock(&qlip->qli_lock); + if (bp) + xfs_buf_rele(bp); + + /* * Release the dq's flush lock since we're done with it. */ xfs_dqfunlock(dqp); @@ -1196,7 +1237,7 @@ xfs_buf_dquot_io_fail( spin_lock(&bp->b_mount->m_ail->ail_lock); list_for_each_entry(lip, &bp->b_li_list, li_bio_list) - xfs_set_li_failed(lip, bp); + set_bit(XFS_LI_FAILED, &lip->li_flags); spin_unlock(&bp->b_mount->m_ail->ail_lock); } @@ -1239,6 +1280,111 @@ xfs_qm_dqflush_check( } /* + * Get the buffer containing the on-disk dquot. + * + * Requires dquot flush lock, will clear the dirty flag, delete the quota log + * item from the AIL, and shut down the system if something goes wrong. + */ +static int +xfs_dquot_read_buf( + struct xfs_trans *tp, + struct xfs_dquot *dqp, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp = NULL; + int error; + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, + &bp, &xfs_dquot_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_dquot_mark_sick(dqp); + if (error) + goto out_abort; + + *bpp = bp; + return 0; + +out_abort: + dqp->q_flags &= ~XFS_DQFLAG_DIRTY; + xfs_trans_ail_delete(&dqp->q_logitem.qli_item, 0); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; +} + +/* + * Attach a dquot buffer to this dquot to avoid allocating a buffer during a + * dqflush, since dqflush can be called from reclaim context. + */ +int +xfs_dquot_attach_buf( + struct xfs_trans *tp, + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_log_item *lip = &qlip->qli_item; + int error; + + spin_lock(&qlip->qli_lock); + if (!lip->li_buf) { + struct xfs_buf *bp = NULL; + + spin_unlock(&qlip->qli_lock); + error = xfs_dquot_read_buf(tp, dqp, &bp); + if (error) + return error; + + /* + * Attach the dquot to the buffer so that the AIL does not have + * to read the dquot buffer to push this item. + */ + xfs_buf_hold(bp); + spin_lock(&qlip->qli_lock); + lip->li_buf = bp; + xfs_trans_brelse(tp, bp); + } + qlip->qli_dirty = true; + spin_unlock(&qlip->qli_lock); + + return 0; +} + +/* + * Get a new reference the dquot buffer attached to this dquot for a dqflush + * operation. + * + * Returns 0 and a NULL bp if none was attached to the dquot; 0 and a locked + * bp; or -EAGAIN if the buffer could not be locked. + */ +int +xfs_dquot_use_attached_buf( + struct xfs_dquot *dqp, + struct xfs_buf **bpp) +{ + struct xfs_buf *bp = dqp->q_logitem.qli_item.li_buf; + + /* + * A NULL buffer can happen if the dquot dirty flag was set but the + * filesystem shut down before transaction commit happened. In that + * case we're not going to flush anyway. + */ + if (!bp) { + ASSERT(xfs_is_shutdown(dqp->q_mount)); + + *bpp = NULL; + return 0; + } + + if (!xfs_buf_trylock(bp)) + return -EAGAIN; + + xfs_buf_hold(bp); + *bpp = bp; + return 0; +} + +/* * Write a modified dquot to disk. * The dquot must be locked and the flush lock too taken by caller. * The flush lock will not be unlocked until the dquot reaches the disk, @@ -1249,11 +1395,11 @@ xfs_qm_dqflush_check( int xfs_qm_dqflush( struct xfs_dquot *dqp, - struct xfs_buf **bpp) + struct xfs_buf *bp) { struct xfs_mount *mp = dqp->q_mount; - struct xfs_log_item *lip = &dqp->q_logitem.qli_item; - struct xfs_buf *bp; + struct xfs_dq_logitem *qlip = &dqp->q_logitem; + struct xfs_log_item *lip = &qlip->qli_item; struct xfs_dqblk *dqblk; xfs_failaddr_t fa; int error; @@ -1263,28 +1409,12 @@ xfs_qm_dqflush( trace_xfs_dqflush(dqp); - *bpp = NULL; - xfs_qm_dqunpin_wait(dqp); - /* - * Get the buffer containing the on-disk dquot - */ - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, - &bp, &xfs_dquot_buf_ops); - if (error == -EAGAIN) - goto out_unlock; - if (xfs_metadata_is_sick(error)) - xfs_dquot_mark_sick(dqp); - if (error) - goto out_abort; - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", dqp->q_id, fa); - xfs_buf_relse(bp); xfs_dquot_mark_sick(dqp); error = -EFSCORRUPTED; goto out_abort; @@ -1299,8 +1429,15 @@ xfs_qm_dqflush( */ dqp->q_flags &= ~XFS_DQFLAG_DIRTY; - xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, - &dqp->q_logitem.qli_item.li_lsn); + /* + * We hold the dquot lock, so nobody can dirty it while we're + * scheduling the write out. Clear the dirty-since-flush flag. + */ + spin_lock(&qlip->qli_lock); + qlip->qli_dirty = false; + spin_unlock(&qlip->qli_lock); + + xfs_trans_ail_copy_lsn(mp->m_ail, &qlip->qli_flush_lsn, &lip->li_lsn); /* * copy the lsn into the on-disk dquot now while we have the in memory @@ -1312,7 +1449,7 @@ xfs_qm_dqflush( * of a dquot without an up-to-date CRC getting to disk. */ if (xfs_has_crc(mp)) { - dqblk->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); + dqblk->dd_lsn = cpu_to_be64(lip->li_lsn); xfs_update_cksum((char *)dqblk, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } @@ -1322,7 +1459,7 @@ xfs_qm_dqflush( * the AIL and release the flush lock once the dquot is synced to disk. */ bp->b_flags |= _XBF_DQUOTS; - list_add_tail(&dqp->q_logitem.qli_item.li_bio_list, &bp->b_li_list); + list_add_tail(&lip->li_bio_list, &bp->b_li_list); /* * If the buffer is pinned then push on the log so we won't @@ -1334,14 +1471,12 @@ xfs_qm_dqflush( } trace_xfs_dqflush_done(dqp); - *bpp = bp; return 0; out_abort: dqp->q_flags &= ~XFS_DQFLAG_DIRTY; xfs_trans_ail_delete(lip, 0); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -out_unlock: xfs_dqfunlock(dqp); return error; } diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index d73d179df0095..c617bac75361b 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -214,7 +214,7 @@ void xfs_dquot_to_disk(struct xfs_disk_dquot *ddqp, struct xfs_dquot *dqp); #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->q_flags & XFS_DQFLAG_DIRTY) void xfs_qm_dqdestroy(struct xfs_dquot *dqp); -int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); +int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf *bp); void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); void xfs_qm_adjust_dqtimers(struct xfs_dquot *d); void xfs_qm_adjust_dqlimits(struct xfs_dquot *d); @@ -237,6 +237,10 @@ void xfs_dqlockn(struct xfs_dqtrx *q); void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); +int xfs_dquot_attach_buf(struct xfs_trans *tp, struct xfs_dquot *dqp); +int xfs_dquot_use_attached_buf(struct xfs_dquot *dqp, struct xfs_buf **bpp); +void xfs_dquot_detach_buf(struct xfs_dquot *dqp); + static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) { xfs_dqlock(dqp); diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 7d19091215b08..271b195ebb932 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -123,8 +123,9 @@ xfs_qm_dquot_logitem_push( __releases(&lip->li_ailp->ail_lock) __acquires(&lip->li_ailp->ail_lock) { - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = lip->li_buf; + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; uint rval = XFS_ITEM_SUCCESS; int error; @@ -155,14 +156,25 @@ xfs_qm_dquot_logitem_push( spin_unlock(&lip->li_ailp->ail_lock); - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (error == -EAGAIN) { + xfs_dqfunlock(dqp); + rval = XFS_ITEM_LOCKED; + goto out_relock_ail; + } + + /* + * dqflush completes dqflock on error, and the delwri ioend does it on + * success. + */ + error = xfs_qm_dqflush(dqp, bp); if (!error) { if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; - xfs_buf_relse(bp); - } else if (error == -EAGAIN) - rval = XFS_ITEM_LOCKED; + } + xfs_buf_relse(bp); +out_relock_ail: spin_lock(&lip->li_ailp->ail_lock); out_unlock: xfs_dqunlock(dqp); @@ -195,12 +207,10 @@ xfs_qm_dquot_logitem_committing( } #ifdef DEBUG_EXPENSIVE -static int -xfs_qm_dquot_logitem_precommit( - struct xfs_trans *tp, - struct xfs_log_item *lip) +static void +xfs_qm_dquot_logitem_precommit_check( + struct xfs_dquot *dqp) { - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; struct xfs_mount *mp = dqp->q_mount; struct xfs_disk_dquot ddq = { }; xfs_failaddr_t fa; @@ -216,13 +226,24 @@ xfs_qm_dquot_logitem_precommit( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ASSERT(fa == NULL); } - - return 0; } #else -# define xfs_qm_dquot_logitem_precommit NULL +# define xfs_qm_dquot_logitem_precommit_check(...) ((void)0) #endif +static int +xfs_qm_dquot_logitem_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + + xfs_qm_dquot_logitem_precommit_check(dqp); + + return xfs_dquot_attach_buf(tp, dqp); +} + static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = xfs_qm_dquot_logitem_size, .iop_precommit = xfs_qm_dquot_logitem_precommit, @@ -247,5 +268,7 @@ xfs_qm_dquot_logitem_init( xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); + spin_lock_init(&lp->qli_lock); lp->qli_dquot = dqp; + lp->qli_dirty = false; } diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 794710c244749..d66e52807d76d 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -14,6 +14,13 @@ struct xfs_dq_logitem { struct xfs_log_item qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + + /* + * We use this spinlock to coordinate access to the li_buf pointer in + * the log item and the qli_dirty flag. + */ + spinlock_t qli_lock; + bool qli_dirty; /* dirtied since last flush? */ }; void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index 9ab05ad224d12..265c424498933 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -854,7 +854,7 @@ xfs_ioc_start_commit( struct xfs_commit_range __user *argp) { struct xfs_commit_range args = { }; - struct timespec64 ts; + struct kstat kstat = { }; struct xfs_commit_range_fresh *kern_f; struct xfs_commit_range_fresh __user *user_f; struct inode *inode2 = file_inode(file); @@ -871,12 +871,12 @@ xfs_ioc_start_commit( memcpy(&kern_f->fsid, ip2->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); xfs_ilock(ip2, lockflags); - ts = inode_get_ctime(inode2); - kern_f->file2_ctime = ts.tv_sec; - kern_f->file2_ctime_nsec = ts.tv_nsec; - ts = inode_get_mtime(inode2); - kern_f->file2_mtime = ts.tv_sec; - kern_f->file2_mtime_nsec = ts.tv_nsec; + /* Force writing of a distinct ctime if any writes happen. */ + fill_mg_cmtime(&kstat, STATX_CTIME | STATX_MTIME, inode2); + kern_f->file2_ctime = kstat.ctime.tv_sec; + kern_f->file2_ctime_nsec = kstat.ctime.tv_nsec; + kern_f->file2_mtime = kstat.mtime.tv_sec; + kern_f->file2_mtime_nsec = kstat.mtime.tv_nsec; kern_f->file2_ino = ip2->i_ino; kern_f->file2_gen = inode2->i_generation; kern_f->magic = XCR_FRESH_MAGIC; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4a0b7de4f7aed..9a435b1ff2647 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1242,6 +1242,14 @@ out_unlock: xfs_iunlock2_remapping(src, dest); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + /* + * If the caller did not set CAN_SHORTEN, then it is not prepared to + * handle partial results -- either the whole remap succeeds, or we + * must say why it did not. In this case, any error should be returned + * to the caller. + */ + if (ret && remapped < len && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) + return ret; return remapped > 0 ? remapped : ret; } diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 82f2e0dd22499..3290dd8524a69 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -163,7 +163,8 @@ struct xfs_getfsmap_info { xfs_daddr_t next_daddr; /* next daddr we expect */ /* daddr of low fsmap key when we're using the rtbitmap */ xfs_daddr_t low_daddr; - xfs_daddr_t end_daddr; /* daddr of high fsmap key */ + /* daddr of high fsmap key, or the last daddr on the device */ + xfs_daddr_t end_daddr; u64 missing_owner; /* owner of holes */ u32 dev; /* device id */ /* @@ -387,8 +388,8 @@ xfs_getfsmap_group_helper( * we calculated from userspace's high key to synthesize the record. * Note that if the btree query found a mapping, there won't be a gap. */ - if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) - frec->start_daddr = info->end_daddr; + if (info->last) + frec->start_daddr = info->end_daddr + 1; else frec->start_daddr = xfs_gbno_to_daddr(xg, startblock); @@ -736,11 +737,10 @@ xfs_getfsmap_rtdev_rtbitmap_helper( * we calculated from userspace's high key to synthesize the record. * Note that if the btree query found a mapping, there won't be a gap. */ - if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) { - frec.start_daddr = info->end_daddr; - } else { + if (info->last) + frec.start_daddr = info->end_daddr + 1; + else frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb); - } frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount); return xfs_getfsmap_helper(tp, info, &frec); @@ -933,7 +933,10 @@ xfs_getfsmap( struct xfs_trans *tp = NULL; struct xfs_fsmap dkeys[2]; /* per-dev keys */ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; - struct xfs_getfsmap_info info = { NULL }; + struct xfs_getfsmap_info info = { + .fsmap_recs = fsmap_recs, + .head = head, + }; bool use_rmap; int i; int error = 0; @@ -998,9 +1001,6 @@ xfs_getfsmap( info.next_daddr = head->fmh_keys[0].fmr_physical + head->fmh_keys[0].fmr_length; - info.end_daddr = XFS_BUF_DADDR_NULL; - info.fsmap_recs = fsmap_recs; - info.head = head; /* For each device we support... */ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { @@ -1013,17 +1013,23 @@ xfs_getfsmap( break; /* - * If this device number matches the high key, we have - * to pass the high key to the handler to limit the - * query results. If the device number exceeds the - * low key, zero out the low key so that we get - * everything from the beginning. + * If this device number matches the high key, we have to pass + * the high key to the handler to limit the query results, and + * set the end_daddr so that we can synthesize records at the + * end of the query range or device. */ if (handlers[i].dev == head->fmh_keys[1].fmr_device) { dkeys[1] = head->fmh_keys[1]; info.end_daddr = min(handlers[i].nr_sectors - 1, dkeys[1].fmr_physical); + } else { + info.end_daddr = handlers[i].nr_sectors - 1; } + + /* + * If the device number exceeds the low key, zero out the low + * key so that we get everything from the beginning. + */ if (handlers[i].dev > head->fmh_keys[0].fmr_device) memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b0de3d924d4c1..1648dc5a80688 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -231,7 +231,7 @@ xfs_iflags_clear(xfs_inode_t *ip, unsigned long flags) } static inline int -__xfs_iflags_test(xfs_inode_t *ip, unsigned long flags) +__xfs_iflags_test(const struct xfs_inode *ip, unsigned long flags) { return (ip->i_flags & flags); } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 61ee110b47d7d..dc8b1010d4d33 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -148,17 +148,29 @@ xfs_qm_dqpurge( * We don't care about getting disk errors here. We need * to purge this dquot anyway, so we go ahead regardless. */ - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (error == -EAGAIN) { + xfs_dqfunlock(dqp); + dqp->q_flags &= ~XFS_DQFLAG_FREEING; + goto out_unlock; + } + if (!bp) + goto out_funlock; + + /* + * dqflush completes dqflock on error, and the bwrite ioend + * does it on success. + */ + error = xfs_qm_dqflush(dqp, bp); if (!error) { error = xfs_bwrite(bp); xfs_buf_relse(bp); - } else if (error == -EAGAIN) { - dqp->q_flags &= ~XFS_DQFLAG_FREEING; - goto out_unlock; } xfs_dqflock(dqp); } + xfs_dquot_detach_buf(dqp); +out_funlock: ASSERT(atomic_read(&dqp->q_pincount) == 0); ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) || !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags)); @@ -241,6 +253,10 @@ xfs_qm_destroy_quotainos( xfs_irele(qi->qi_pquotaip); qi->qi_pquotaip = NULL; } + if (qi->qi_dirip) { + xfs_irele(qi->qi_dirip); + qi->qi_dirip = NULL; + } } /* @@ -490,7 +506,17 @@ xfs_qm_dquot_isolate( /* we have to drop the LRU lock to flush the dquot */ spin_unlock(&lru->lock); - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); + if (!bp || error == -EAGAIN) { + xfs_dqfunlock(dqp); + goto out_unlock_dirty; + } + + /* + * dqflush completes dqflock on error, and the delwri ioend + * does it on success. + */ + error = xfs_qm_dqflush(dqp, bp); if (error) goto out_unlock_dirty; @@ -498,6 +524,8 @@ xfs_qm_dquot_isolate( xfs_buf_relse(bp); goto out_unlock_dirty; } + + xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); /* @@ -646,8 +674,7 @@ xfs_qm_init_timelimits( static int xfs_qm_load_metadir_qinos( struct xfs_mount *mp, - struct xfs_quotainfo *qi, - struct xfs_inode **dpp) + struct xfs_quotainfo *qi) { struct xfs_trans *tp; int error; @@ -656,7 +683,7 @@ xfs_qm_load_metadir_qinos( if (error) return error; - error = xfs_dqinode_load_parent(tp, dpp); + error = xfs_dqinode_load_parent(tp, &qi->qi_dirip); if (error == -ENOENT) { /* no quota dir directory, but we'll create one later */ error = 0; @@ -666,21 +693,21 @@ xfs_qm_load_metadir_qinos( goto out_trans; if (XFS_IS_UQUOTA_ON(mp)) { - error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_USER, + error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_USER, &qi->qi_uquotaip); if (error && error != -ENOENT) goto out_trans; } if (XFS_IS_GQUOTA_ON(mp)) { - error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_GROUP, + error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_GROUP, &qi->qi_gquotaip); if (error && error != -ENOENT) goto out_trans; } if (XFS_IS_PQUOTA_ON(mp)) { - error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_PROJ, + error = xfs_dqinode_load(tp, qi->qi_dirip, XFS_DQTYPE_PROJ, &qi->qi_pquotaip); if (error && error != -ENOENT) goto out_trans; @@ -696,34 +723,40 @@ out_trans: STATIC int xfs_qm_create_metadir_qinos( struct xfs_mount *mp, - struct xfs_quotainfo *qi, - struct xfs_inode **dpp) + struct xfs_quotainfo *qi) { int error; - if (!*dpp) { - error = xfs_dqinode_mkdir_parent(mp, dpp); + if (!qi->qi_dirip) { + error = xfs_dqinode_mkdir_parent(mp, &qi->qi_dirip); if (error && error != -EEXIST) return error; + /* + * If the /quotas dirent points to an inode that isn't + * loadable, qi_dirip will be NULL but mkdir_parent will return + * -EEXIST. In this case the metadir is corrupt, so bail out. + */ + if (XFS_IS_CORRUPT(mp, qi->qi_dirip == NULL)) + return -EFSCORRUPTED; } if (XFS_IS_UQUOTA_ON(mp) && !qi->qi_uquotaip) { - error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_USER, - &qi->qi_uquotaip); + error = xfs_dqinode_metadir_create(qi->qi_dirip, + XFS_DQTYPE_USER, &qi->qi_uquotaip); if (error) return error; } if (XFS_IS_GQUOTA_ON(mp) && !qi->qi_gquotaip) { - error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_GROUP, - &qi->qi_gquotaip); + error = xfs_dqinode_metadir_create(qi->qi_dirip, + XFS_DQTYPE_GROUP, &qi->qi_gquotaip); if (error) return error; } if (XFS_IS_PQUOTA_ON(mp) && !qi->qi_pquotaip) { - error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_PROJ, - &qi->qi_pquotaip); + error = xfs_dqinode_metadir_create(qi->qi_dirip, + XFS_DQTYPE_PROJ, &qi->qi_pquotaip); if (error) return error; } @@ -768,7 +801,6 @@ xfs_qm_init_metadir_qinos( struct xfs_mount *mp) { struct xfs_quotainfo *qi = mp->m_quotainfo; - struct xfs_inode *dp = NULL; int error; if (!xfs_has_quota(mp)) { @@ -777,20 +809,22 @@ xfs_qm_init_metadir_qinos( return error; } - error = xfs_qm_load_metadir_qinos(mp, qi, &dp); + error = xfs_qm_load_metadir_qinos(mp, qi); if (error) goto out_err; - error = xfs_qm_create_metadir_qinos(mp, qi, &dp); + error = xfs_qm_create_metadir_qinos(mp, qi); if (error) goto out_err; - xfs_irele(dp); + /* The only user of the quota dir inode is online fsck */ +#if !IS_ENABLED(CONFIG_XFS_ONLINE_SCRUB) + xfs_irele(qi->qi_dirip); + qi->qi_dirip = NULL; +#endif return 0; out_err: xfs_qm_destroy_quotainos(mp->m_quotainfo); - if (dp) - xfs_irele(dp); return error; } @@ -1304,6 +1338,10 @@ xfs_qm_quotacheck_dqadjust( return error; } + error = xfs_dquot_attach_buf(NULL, dqp); + if (error) + return error; + trace_xfs_dqadjust(dqp); /* @@ -1486,11 +1524,17 @@ xfs_qm_flush_one( goto out_unlock; } - error = xfs_qm_dqflush(dqp, &bp); + error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) goto out_unlock; + if (!bp) { + error = -EFSCORRUPTED; + goto out_unlock; + } - xfs_buf_delwri_queue(bp, buffer_list); + error = xfs_qm_dqflush(dqp, bp); + if (!error) + xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); out_unlock: xfs_dqunlock(dqp); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index e919c7f62f578..35b64bc3a7a86 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -55,6 +55,7 @@ struct xfs_quotainfo { struct xfs_inode *qi_uquotaip; /* user quota inode */ struct xfs_inode *qi_gquotaip; /* group quota inode */ struct xfs_inode *qi_pquotaip; /* project quota inode */ + struct xfs_inode *qi_dirip; /* quota metadir */ struct list_lru qi_lru; int qi_dquots; struct mutex qi_quotaofflock;/* to serialize quotaoff */ diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index fa1317cc396c9..d7565462af3dc 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -101,7 +101,8 @@ extern void xfs_trans_free_dqinfo(struct xfs_trans *); extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *, uint, int64_t); extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); -extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); +void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *tp, + bool already_locked); int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, @@ -173,7 +174,7 @@ static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp, { } #define xfs_trans_apply_dquot_deltas(tp) -#define xfs_trans_unreserve_and_mod_dquots(tp) +#define xfs_trans_unreserve_and_mod_dquots(tp, a) static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip, int64_t dblocks, int64_t rblocks, bool force) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 0cb534d71119a..fcfa6e0eb3ad2 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1827,7 +1827,7 @@ xfs_rtallocate_rtg( * For an allocation to an empty file at offset 0, pick an extent that * will space things out in the rt area. */ - if (bno_hint) + if (bno_hint != NULLFSBLOCK) start = xfs_rtb_to_rtx(args.mp, bno_hint); else if (!xfs_has_rtgroups(args.mp) && initial_user_data) start = xfs_rtpick_extent(args.rtg, tp, maxlen); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 30fbed27cf05c..4cd25717c9d13 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -860,29 +860,17 @@ __xfs_trans_commit( trace_xfs_trans_commit(tp, _RET_IP_); - error = xfs_trans_run_precommits(tp); - if (error) { - if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) - xfs_defer_cancel(tp); - goto out_unreserve; - } - /* - * Finish deferred items on final commit. Only permanent transactions - * should ever have deferred ops. + * Commit per-transaction changes that are not already tracked through + * log items. This can add dirty log items to the transaction. */ - WARN_ON_ONCE(!list_empty(&tp->t_dfops) && - !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); - if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { - error = xfs_defer_finish_noroll(&tp); - if (error) - goto out_unreserve; + if (tp->t_flags & XFS_TRANS_SB_DIRTY) + xfs_trans_apply_sb_deltas(tp); + xfs_trans_apply_dquot_deltas(tp); - /* Run precommits from final tx in defer chain. */ - error = xfs_trans_run_precommits(tp); - if (error) - goto out_unreserve; - } + error = xfs_trans_run_precommits(tp); + if (error) + goto out_unreserve; /* * If there is nothing to be logged by the transaction, @@ -907,13 +895,6 @@ __xfs_trans_commit( ASSERT(tp->t_ticket != NULL); - /* - * If we need to update the superblock, then do it now. - */ - if (tp->t_flags & XFS_TRANS_SB_DIRTY) - xfs_trans_apply_sb_deltas(tp); - xfs_trans_apply_dquot_deltas(tp); - xlog_cil_commit(log, tp, &commit_seq, regrant); xfs_trans_free(tp); @@ -939,7 +920,7 @@ out_unreserve: * the dqinfo portion to be. All that means is that we have some * (non-persistent) quota reservations that need to be unreserved. */ - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, true); if (tp->t_ticket) { if (regrant && !xlog_is_shutdown(log)) xfs_log_ticket_regrant(log, tp->t_ticket); @@ -958,6 +939,20 @@ int xfs_trans_commit( struct xfs_trans *tp) { + /* + * Finish deferred items on final commit. Only permanent transactions + * should ever have deferred ops. + */ + WARN_ON_ONCE(!list_empty(&tp->t_dfops) && + !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) { + int error = xfs_defer_finish_noroll(&tp); + if (error) { + xfs_trans_cancel(tp); + return error; + } + } + return __xfs_trans_commit(tp, false); } @@ -1019,7 +1014,7 @@ xfs_trans_cancel( } #endif xfs_trans_unreserve_and_mod_sb(tp); - xfs_trans_unreserve_and_mod_dquots(tp); + xfs_trans_unreserve_and_mod_dquots(tp, false); if (tp->t_ticket) { xfs_log_ticket_ungrant(log, tp->t_ticket); @@ -1435,5 +1430,8 @@ done: out_cancel: xfs_trans_cancel(tp); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (dp != ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 8ede9d099d1fe..f56d62dced97b 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -360,7 +360,7 @@ xfsaild_resubmit_item( /* protected by ail_lock */ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { - if (bp->b_flags & _XBF_INODES) + if (bp->b_flags & (_XBF_INODES | _XBF_DQUOTS)) clear_bit(XFS_LI_FAILED, &lip->li_flags); else xfs_clear_li_failed(lip); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 481ba3dc9f190..713b6d243e563 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -606,6 +606,24 @@ xfs_trans_apply_dquot_deltas( ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); + + /* + * We've applied the count changes and given back + * whatever reservation we didn't use. Zero out the + * dqtrx fields. + */ + qtrx->qt_blk_res = 0; + qtrx->qt_bcount_delta = 0; + qtrx->qt_delbcnt_delta = 0; + + qtrx->qt_rtblk_res = 0; + qtrx->qt_rtblk_res_used = 0; + qtrx->qt_rtbcount_delta = 0; + qtrx->qt_delrtb_delta = 0; + + qtrx->qt_ino_res = 0; + qtrx->qt_ino_res_used = 0; + qtrx->qt_icount_delta = 0; } } } @@ -642,7 +660,8 @@ xfs_trans_unreserve_and_mod_dquots_hook( */ void xfs_trans_unreserve_and_mod_dquots( - struct xfs_trans *tp) + struct xfs_trans *tp, + bool already_locked) { int i, j; struct xfs_dquot *dqp; @@ -671,10 +690,12 @@ xfs_trans_unreserve_and_mod_dquots( * about the number of blocks used field, or deltas. * Also we don't bother to zero the fields. */ - locked = false; + locked = already_locked; if (qtrx->qt_blk_res) { - xfs_dqlock(dqp); - locked = true; + if (!locked) { + xfs_dqlock(dqp); + locked = true; + } dqp->q_blk.reserved -= (xfs_qcnt_t)qtrx->qt_blk_res; } @@ -695,7 +716,7 @@ xfs_trans_unreserve_and_mod_dquots( dqp->q_rtb.reserved -= (xfs_qcnt_t)qtrx->qt_rtblk_res; } - if (locked) + if (locked && !already_locked) xfs_dqunlock(dqp); } diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index a28e2a6a13d05..74169dd0f6594 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -166,9 +166,12 @@ static inline void *ffa_dev_get_drvdata(struct ffa_device *fdev) return dev_get_drvdata(&fdev->dev); } +struct ffa_partition_info; + #if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT) -struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, - const struct ffa_ops *ops); +struct ffa_device * +ffa_device_register(const struct ffa_partition_info *part_info, + const struct ffa_ops *ops); void ffa_device_unregister(struct ffa_device *ffa_dev); int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name); @@ -176,9 +179,9 @@ void ffa_driver_unregister(struct ffa_driver *driver); bool ffa_device_is_valid(struct ffa_device *ffa_dev); #else -static inline -struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id, - const struct ffa_ops *ops) +static inline struct ffa_device * +ffa_device_register(const struct ffa_partition_info *part_info, + const struct ffa_ops *ops) { return NULL; } diff --git a/include/linux/bio.h b/include/linux/bio.h index 60830a6a59394..7a1b3b1a8fed0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -423,7 +423,7 @@ void __bio_add_page(struct bio *bio, struct page *page, void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); +void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 08a727b408164..378d3a1a22fca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,8 +200,6 @@ struct gendisk { spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; struct hlist_head *zone_wplugs_hash; - struct list_head zone_wplugs_err_list; - struct work_struct zone_wplugs_work; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -1421,6 +1419,9 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) return is_seq; } +int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask); + static inline unsigned int queue_dma_alignment(const struct request_queue *q) { return q->limits.dma_alignment; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eaee2a819f4c1..6e63dd3443b96 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1527,6 +1527,7 @@ struct bpf_prog_aux { bool is_extended; /* true if extended by freplace program */ bool jits_use_priv_stack; bool priv_stack_requested; + bool changes_pkt_data; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; @@ -2193,26 +2194,25 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; might_fault(); + RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held"); + + if (unlikely(!array)) + return ret; - rcu_read_lock_trace(); migrate_disable(); run_ctx.is_uprobe = true; - array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); - if (unlikely(!array)) - goto out; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -2227,9 +2227,7 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); -out: migrate_enable(); - rcu_read_unlock_trace(); return ret; } @@ -3516,10 +3514,4 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } -static inline bool bpf_prog_is_raw_tp(const struct bpf_prog *prog) -{ - return prog->type == BPF_PROG_TYPE_TRACING && - prog->expected_attach_type == BPF_TRACE_RAW_TP; -} - #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f4290c179bee0..48b7b2eeb7e21 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -659,6 +659,7 @@ struct bpf_subprog_info { bool args_cached: 1; /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; + bool changes_pkt_data: 1; enum priv_stack_mode priv_stack_mode; u8 arg_cnt; diff --git a/include/linux/filter.h b/include/linux/filter.h index 3a21947f2fd44..0477254bc2d30 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1122,7 +1122,7 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); -bool bpf_helper_changes_pkt_data(void *func); +bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) { diff --git a/include/linux/sched.h b/include/linux/sched.h index d380bffee2eff..66b311fbd5d60 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -656,6 +656,12 @@ struct sched_dl_entity { * @dl_defer_armed tells if the deferrable server is waiting * for the replenishment timer to activate it. * + * @dl_server_active tells if the dlserver is active(started). + * dlserver is started on first cfs enqueue on an idle runqueue + * and is stopped when a dequeue results in 0 cfs tasks on the + * runqueue. In other words, dlserver is active only when cpu's + * runqueue has atleast one cfs task. + * * @dl_defer_running tells if the deferrable server is actually * running, skipping the defer phase. */ @@ -664,6 +670,7 @@ struct sched_dl_entity { unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_server_active : 1; unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; diff --git a/include/soc/arc/aux.h b/include/soc/arc/arc_aux.h index 9c2eff6140b61..9c2eff6140b61 100644 --- a/include/soc/arc/aux.h +++ b/include/soc/arc/arc_aux.h diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h index d1a93c73f0062..a78dacd149f12 100644 --- a/include/soc/arc/mcip.h +++ b/include/soc/arc/mcip.h @@ -8,7 +8,7 @@ #ifndef __SOC_ARC_MCIP_H #define __SOC_ARC_MCIP_H -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> #define ARC_REG_MCIP_BCR 0x0d0 #define ARC_REG_MCIP_IDU_BCR 0x0D5 diff --git a/include/soc/arc/timers.h b/include/soc/arc/timers.h index ae99d3e855f1d..51a74166296c6 100644 --- a/include/soc/arc/timers.h +++ b/include/soc/arc/timers.h @@ -6,7 +6,7 @@ #ifndef __SOC_ARC_TIMERS_H #define __SOC_ARC_TIMERS_H -#include <soc/arc/aux.h> +#include <soc/arc/arc_aux.h> /* Timer related Aux registers */ #define ARC_REG_TIMER0_LIMIT 0x23 /* timer 0 limit */ diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 94e8185c4795f..3dc7a1551ac35 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -271,12 +271,6 @@ struct cs35l56_base { struct gpio_desc *reset_gpio; }; -/* Temporary to avoid a build break with the HDA driver */ -static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) -{ - return 0; -} - static inline bool cs35l56_is_otp_register(unsigned int reg) { return (reg >> 16) == 3; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index adaae86309322..077f84684c18a 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1036,8 +1036,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx out_put_free: i = data.nr; while (i--) { - io_buffer_unmap(src_ctx, data.nodes[i]); - kfree(data.nodes[i]); + if (data.nodes[i]) { + io_buffer_unmap(src_ctx, data.nodes[i]); + kfree(data.nodes[i]); + } } out_unlock: io_rsrc_data_free(ctx, &data); diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 9762bdddf1deb..410028633621c 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -53,3 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o + +CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_queue_stack_maps.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_lpm_trie.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ringbuf.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e7a59e6462a93..e5a5f023cedd5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6439,6 +6439,101 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, return off; } +struct bpf_raw_tp_null_args { + const char *func; + u64 mask; +}; + +static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { + /* sched */ + { "sched_pi_setprio", 0x10 }, + /* ... from sched_numa_pair_template event class */ + { "sched_stick_numa", 0x100 }, + { "sched_swap_numa", 0x100 }, + /* afs */ + { "afs_make_fs_call", 0x10 }, + { "afs_make_fs_calli", 0x10 }, + { "afs_make_fs_call1", 0x10 }, + { "afs_make_fs_call2", 0x10 }, + { "afs_protocol_error", 0x1 }, + { "afs_flock_ev", 0x10 }, + /* cachefiles */ + { "cachefiles_lookup", 0x1 | 0x200 }, + { "cachefiles_unlink", 0x1 }, + { "cachefiles_rename", 0x1 }, + { "cachefiles_prep_read", 0x1 }, + { "cachefiles_mark_active", 0x1 }, + { "cachefiles_mark_failed", 0x1 }, + { "cachefiles_mark_inactive", 0x1 }, + { "cachefiles_vfs_error", 0x1 }, + { "cachefiles_io_error", 0x1 }, + { "cachefiles_ondemand_open", 0x1 }, + { "cachefiles_ondemand_copen", 0x1 }, + { "cachefiles_ondemand_close", 0x1 }, + { "cachefiles_ondemand_read", 0x1 }, + { "cachefiles_ondemand_cread", 0x1 }, + { "cachefiles_ondemand_fd_write", 0x1 }, + { "cachefiles_ondemand_fd_release", 0x1 }, + /* ext4, from ext4__mballoc event class */ + { "ext4_mballoc_discard", 0x10 }, + { "ext4_mballoc_free", 0x10 }, + /* fib */ + { "fib_table_lookup", 0x100 }, + /* filelock */ + /* ... from filelock_lock event class */ + { "posix_lock_inode", 0x10 }, + { "fcntl_setlk", 0x10 }, + { "locks_remove_posix", 0x10 }, + { "flock_lock_inode", 0x10 }, + /* ... from filelock_lease event class */ + { "break_lease_noblock", 0x10 }, + { "break_lease_block", 0x10 }, + { "break_lease_unblock", 0x10 }, + { "generic_delete_lease", 0x10 }, + { "time_out_leases", 0x10 }, + /* host1x */ + { "host1x_cdma_push_gather", 0x10000 }, + /* huge_memory */ + { "mm_khugepaged_scan_pmd", 0x10 }, + { "mm_collapse_huge_page_isolate", 0x1 }, + { "mm_khugepaged_scan_file", 0x10 }, + { "mm_khugepaged_collapse_file", 0x10 }, + /* kmem */ + { "mm_page_alloc", 0x1 }, + { "mm_page_pcpu_drain", 0x1 }, + /* .. from mm_page event class */ + { "mm_page_alloc_zone_locked", 0x1 }, + /* netfs */ + { "netfs_failure", 0x10 }, + /* power */ + { "device_pm_callback_start", 0x10 }, + /* qdisc */ + { "qdisc_dequeue", 0x1000 }, + /* rxrpc */ + { "rxrpc_recvdata", 0x1 }, + { "rxrpc_resend", 0x10 }, + /* sunrpc */ + { "xs_stream_read_data", 0x1 }, + /* ... from xprt_cong_event event class */ + { "xprt_reserve_cong", 0x10 }, + { "xprt_release_cong", 0x10 }, + { "xprt_get_cong", 0x10 }, + { "xprt_put_cong", 0x10 }, + /* tcp */ + { "tcp_send_reset", 0x11 }, + /* tegra_apb_dma */ + { "tegra_dma_tx_status", 0x100 }, + /* timer_migration */ + { "tmigr_update_events", 0x1 }, + /* writeback, from writeback_folio_template event class */ + { "writeback_dirty_folio", 0x10 }, + { "folio_wait_writeback", 0x10 }, + /* rdma */ + { "mr_integ_alloc", 0x2000 }, + /* bpf_testmod */ + { "bpf_testmod_test_read", 0x0 }, +}; + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -6449,6 +6544,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + bool ptr_err_raw_tp = false; const char *tag_value; u32 nr_args, arg; int i, ret; @@ -6543,6 +6639,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return false; } + if (size != sizeof(u64)) { + bpf_log(log, "func '%s' size %d must be 8\n", + tname, size); + return false; + } + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; @@ -6588,12 +6690,42 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (prog_args_trusted(prog)) info->reg_type |= PTR_TRUSTED; - /* Raw tracepoint arguments always get marked as maybe NULL */ - if (bpf_prog_is_raw_tp(prog)) - info->reg_type |= PTR_MAYBE_NULL; - else if (btf_param_match_suffix(btf, &args[arg], "__nullable")) + if (btf_param_match_suffix(btf, &args[arg], "__nullable")) info->reg_type |= PTR_MAYBE_NULL; + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { + struct btf *btf = prog->aux->attach_btf; + const struct btf_type *t; + const char *tname; + + /* BTF lookups cannot fail, return false on error */ + t = btf_type_by_id(btf, prog->aux->attach_btf_id); + if (!t) + return false; + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) + return false; + /* Checked by bpf_check_attach_target */ + tname += sizeof("btf_trace_") - 1; + for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { + /* Is this a func with potential NULL args? */ + if (strcmp(tname, raw_tp_null_args[i].func)) + continue; + if (raw_tp_null_args[i].mask & (0x1 << (arg * 4))) + info->reg_type |= PTR_MAYBE_NULL; + /* Is the current arg IS_ERR? */ + if (raw_tp_null_args[i].mask & (0x2 << (arg * 4))) + ptr_err_raw_tp = true; + break; + } + /* If we don't know NULL-ness specification and the tracepoint + * is coming from a loadable module, be conservative and mark + * argument as PTR_MAYBE_NULL. + */ + if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf)) + info->reg_type |= PTR_MAYBE_NULL; + } + if (tgt_prog) { enum bpf_prog_type tgt_type; @@ -6638,6 +6770,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + + /* Perform all checks on the validity of type for this argument, but if + * we know it can be IS_ERR at runtime, scrub pointer type and mark as + * scalar. + */ + if (ptr_err_raw_tp) { + bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg); + info->reg_type = SCALAR_VALUE; + } return true; } EXPORT_SYMBOL_GPL(btf_ctx_access); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a2327c4fdc8bc..da729cbbaeb90 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -539,6 +539,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) { + int err; + /* Branch offsets can't overflow when program is shrinking, no need * to call bpf_adj_branches(..., true) here */ @@ -546,7 +548,9 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) sizeof(struct bpf_insn) * (prog->len - off - cnt)); prog->len -= cnt; - return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); + err = bpf_adj_branches(prog, off, off + cnt, off, false); + WARN_ON_ONCE(err); + return err; } static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) @@ -2936,7 +2940,7 @@ void __weak bpf_jit_compile(struct bpf_prog *prog) { } -bool __weak bpf_helper_changes_pkt_data(void *func) +bool __weak bpf_helper_changes_pkt_data(enum bpf_func_id func_id) { return false; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 01fbef9576e00..f7f892a52a374 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -420,25 +420,6 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) return rec; } -static bool mask_raw_tp_reg_cond(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { - return reg->type == (PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL) && - bpf_prog_is_raw_tp(env->prog) && !reg->ref_obj_id; -} - -static bool mask_raw_tp_reg(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) -{ - if (!mask_raw_tp_reg_cond(env, reg)) - return false; - reg->type &= ~PTR_MAYBE_NULL; - return true; -} - -static void unmask_raw_tp_reg(struct bpf_reg_state *reg, bool result) -{ - if (result) - reg->type |= PTR_MAYBE_NULL; -} - static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog) { struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux; @@ -2597,16 +2578,36 @@ static int cmp_subprogs(const void *a, const void *b) ((struct bpf_subprog_info *)b)->start; } +/* Find subprogram that contains instruction at 'off' */ +static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *vals = env->subprog_info; + int l, r, m; + + if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0) + return NULL; + + l = 0; + r = env->subprog_cnt - 1; + while (l < r) { + m = l + (r - l + 1) / 2; + if (vals[m].start <= off) + l = m; + else + r = m - 1; + } + return &vals[l]; +} + +/* Find subprogram that starts exactly at 'off' */ static int find_subprog(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *p; - p = bsearch(&off, env->subprog_info, env->subprog_cnt, - sizeof(env->subprog_info[0]), cmp_subprogs); - if (!p) + p = find_containing_subprog(env, off); + if (!p || p->start != off) return -ENOENT; return p - env->subprog_info; - } static int add_subprog(struct bpf_verifier_env *env, int off) @@ -6781,7 +6782,6 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, const char *field_name = NULL; enum bpf_type_flag flag = 0; u32 btf_id = 0; - bool mask; int ret; if (!env->allow_ptr_leaks) { @@ -6853,21 +6853,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - /* For raw_tp progs, we allow dereference of PTR_MAYBE_NULL - * trusted PTR_TO_BTF_ID, these are the ones that are possibly - * arguments to the raw_tp. Since internal checks in for trusted - * reg in check_ptr_to_btf_access would consider PTR_MAYBE_NULL - * modifier as problematic, mask it out temporarily for the - * check. Don't apply this to pointers with ref_obj_id > 0, as - * those won't be raw_tp args. - * - * We may end up applying this relaxation to other trusted - * PTR_TO_BTF_ID with maybe null flag, since we cannot - * distinguish PTR_MAYBE_NULL tagged for arguments vs normal - * tagging, but that should expand allowed behavior, and not - * cause regression for existing behavior. - */ - mask = mask_raw_tp_reg(env, reg); + if (ret != PTR_TO_BTF_ID) { /* just mark; */ @@ -6928,13 +6914,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, clear_trusted_flags(&flag); } - if (atype == BPF_READ && value_regno >= 0) { + if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); - /* We've assigned a new type to regno, so don't undo masking. */ - if (regno == value_regno) - mask = false; - } - unmask_raw_tp_reg(reg, mask); return 0; } @@ -7309,7 +7290,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); } else if (base_type(reg->type) == PTR_TO_BTF_ID && - (mask_raw_tp_reg_cond(env, reg) || !type_may_be_null(reg->type))) { + !type_may_be_null(reg->type)) { err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno); } else if (reg->type == CONST_PTR_TO_MAP) { @@ -9012,7 +8993,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, enum bpf_reg_type type = reg->type; u32 *arg_btf_id = NULL; int err = 0; - bool mask; if (arg_type == ARG_DONTCARE) return 0; @@ -9053,11 +9033,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) arg_btf_id = fn->arg_btf_id[arg]; - mask = mask_raw_tp_reg(env, reg); err = check_reg_type(env, regno, arg_type, arg_btf_id, meta); + if (err) + return err; - err = err ?: check_func_arg_reg_off(env, reg, regno, arg_type); - unmask_raw_tp_reg(reg, mask); + err = check_func_arg_reg_off(env, reg, regno, arg_type); if (err) return err; @@ -9852,17 +9832,14 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, return ret; } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { struct bpf_call_arg_meta meta; - bool mask; int err; if (register_is_null(reg) && type_may_be_null(arg->arg_type)) continue; memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ - mask = mask_raw_tp_reg(env, reg); err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta); err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type); - unmask_raw_tp_reg(reg, mask); if (err) return err; } else { @@ -10022,6 +9999,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, verbose(env, "Func#%d ('%s') is global and assumed valid.\n", subprog, sub_name); + if (env->subprog_info[subprog].changes_pkt_data) + clear_all_pkt_pointers(env); /* mark global subprog for verifying after main prog */ subprog_aux(env, subprog)->called = true; clear_caller_saved_regs(env, caller->regs); @@ -10708,7 +10687,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } /* With LD_ABS/IND some JITs save/restore skb from r1. */ - changes_data = bpf_helper_changes_pkt_data(fn->func); + changes_data = bpf_helper_changes_pkt_data(func_id); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id); @@ -12183,7 +12162,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type arg_type = ARG_DONTCARE; u32 regno = i + 1, ref_id, type_size; bool is_ret_buf_sz = false; - bool mask = false; int kf_arg_type; t = btf_type_skip_modifiers(btf, args[i].type, NULL); @@ -12242,15 +12220,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } - mask = mask_raw_tp_reg(env, reg); if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) && (register_is_null(reg) || type_may_be_null(reg->type)) && !is_kfunc_arg_nullable(meta->btf, &args[i])) { verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); - unmask_raw_tp_reg(reg, mask); return -EACCES; } - unmask_raw_tp_reg(reg, mask); if (reg->ref_obj_id) { if (is_kfunc_release(meta) && meta->ref_obj_id) { @@ -12308,24 +12283,16 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta)) break; - /* Allow passing maybe NULL raw_tp arguments to - * kfuncs for compatibility. Don't apply this to - * arguments with ref_obj_id > 0. - */ - mask = mask_raw_tp_reg(env, reg); if (!is_trusted_reg(reg)) { if (!is_kfunc_rcu(meta)) { verbose(env, "R%d must be referenced or trusted\n", regno); - unmask_raw_tp_reg(reg, mask); return -EINVAL; } if (!is_rcu_reg(reg)) { verbose(env, "R%d must be a rcu pointer\n", regno); - unmask_raw_tp_reg(reg, mask); return -EINVAL; } } - unmask_raw_tp_reg(reg, mask); fallthrough; case KF_ARG_PTR_TO_CTX: case KF_ARG_PTR_TO_DYNPTR: @@ -12348,9 +12315,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (is_kfunc_release(meta) && reg->ref_obj_id) arg_type |= OBJ_RELEASE; - mask = mask_raw_tp_reg(env, reg); ret = check_func_arg_reg_off(env, reg, regno, arg_type); - unmask_raw_tp_reg(reg, mask); if (ret < 0) return ret; @@ -12527,7 +12492,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ ref_tname = btf_name_by_offset(btf, ref_t->name_off); fallthrough; case KF_ARG_PTR_TO_BTF_ID: - mask = mask_raw_tp_reg(env, reg); /* Only base_type is checked, further checks are done here */ if ((base_type(reg->type) != PTR_TO_BTF_ID || (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) && @@ -12536,11 +12500,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "expected %s or socket\n", reg_type_str(env, base_type(reg->type) | (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); - unmask_raw_tp_reg(reg, mask); return -EINVAL; } ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i); - unmask_raw_tp_reg(reg, mask); if (ret < 0) return ret; break; @@ -13513,7 +13475,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env, */ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, - struct bpf_reg_state *ptr_reg, + const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg) { struct bpf_verifier_state *vstate = env->cur_state; @@ -13527,7 +13489,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_sanitize_info info = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; - bool mask; int ret; dst_reg = ®s[dst]; @@ -13554,14 +13515,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } - mask = mask_raw_tp_reg(env, ptr_reg); if (ptr_reg->type & PTR_MAYBE_NULL) { verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", dst, reg_type_str(env, ptr_reg->type)); - unmask_raw_tp_reg(ptr_reg, mask); return -EACCES; } - unmask_raw_tp_reg(ptr_reg, mask); switch (base_type(ptr_reg->type)) { case PTR_TO_CTX: @@ -16226,6 +16184,29 @@ enforce_retval: return 0; } +static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *subprog; + + subprog = find_containing_subprog(env, off); + subprog->changes_pkt_data = true; +} + +/* 't' is an index of a call-site. + * 'w' is a callee entry point. + * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. + * Rely on DFS traversal order and absence of recursive calls to guarantee that + * callee's change_pkt_data marks would be correct at that moment. + */ +static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) +{ + struct bpf_subprog_info *caller, *callee; + + caller = find_containing_subprog(env, t); + callee = find_containing_subprog(env, w); + caller->changes_pkt_data |= callee->changes_pkt_data; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -16359,6 +16340,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, bool visit_callee) { int ret, insn_sz; + int w; insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); @@ -16370,8 +16352,10 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, mark_jmp_point(env, t + insn_sz); if (visit_callee) { + w = t + insns[t].imm + 1; mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); + merge_callee_effects(env, t, w); + ret = push_insn(t, w, BRANCH, env); } return ret; } @@ -16688,6 +16672,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) mark_prune_point(env, t); mark_jmp_point(env, t); } + if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm)) + mark_subprog_changes_pkt_data(env, t); if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; @@ -16822,6 +16808,7 @@ walk_cfg: } } ret = 0; /* cfg looks good */ + env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; err_free: kvfree(insn_state); @@ -20075,7 +20062,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) * for this case. */ case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED: - case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL: if (type == BPF_READ) { if (BPF_MODE(insn->code) == BPF_MEM) insn->code = BPF_LDX | BPF_PROBE_MEM | @@ -20311,6 +20297,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->num_exentries = num_exentries; func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; + func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; if (!i) func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); @@ -22141,6 +22128,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, } if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; + bool tgt_changes_pkt_data; if (bpf_prog_is_dev_bound(prog->aux) && !bpf_prog_dev_bound_match(prog, tgt_prog)) { @@ -22175,6 +22163,14 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, "Extension programs should be JITed\n"); return -EINVAL; } + tgt_changes_pkt_data = aux->func + ? aux->func[subprog]->aux->changes_pkt_data + : aux->changes_pkt_data; + if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { + bpf_log(log, + "Extension program changes packet data, while original does not\n"); + return -EINVAL; + } } if (!tgt_prog->jited) { bpf_log(log, "Can attach to only JITed progs\n"); @@ -22640,10 +22636,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; - ret = check_attach_btf_id(env); - if (ret) - goto skip_full_check; - ret = resolve_pseudo_ldimm64(env); if (ret < 0) goto skip_full_check; @@ -22658,6 +22650,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; + ret = check_attach_btf_id(env); + if (ret) + goto skip_full_check; + ret = mark_fastcall_patterns(env); if (ret < 0) goto skip_full_check; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c6d8232ad9eea..3e5a6bf587f91 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1341,7 +1341,7 @@ bool sched_can_stop_tick(struct rq *rq) if (scx_enabled() && !scx_can_stop_tick(rq)) return false; - if (rq->cfs.nr_running > 1) + if (rq->cfs.h_nr_running > 1) return false; /* diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index db47f33cb7d2f..d94f2ed6d1f46 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (!dl_se->dl_runtime) return; + dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) resched_curr(dl_se->rq); @@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se) hrtimer_try_to_cancel(&dl_se->dl_timer); dl_se->dl_defer_armed = 0; dl_se->dl_throttled = 0; + dl_se->dl_server_active = 0; } void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, @@ -2421,8 +2423,10 @@ again: if (dl_server(dl_se)) { p = dl_se->server_pick_task(dl_se); if (!p) { - dl_se->dl_yielded = 1; - update_curr_dl_se(rq, dl_se, 0); + if (dl_server_active(dl_se)) { + dl_se->dl_yielded = 1; + update_curr_dl_se(rq, dl_se, 0); + } goto again; } rq->dl_server = dl_se; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index a48b2a701ec27..a1be00a988bf6 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -845,6 +845,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running); + SEQ_printf(m, " .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed); SEQ_printf(m, " .%-30s: %d\n", "idle_nr_running", cfs_rq->idle_nr_running); SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aa0238ee48572..3e9ca38512dee 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1159,8 +1159,6 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec) trace_sched_stat_runtime(p, delta_exec); account_group_exec_runtime(p, delta_exec); cgroup_account_cputime(p, delta_exec); - if (p->dl_server) - dl_server_update(p->dl_server, delta_exec); } static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr) @@ -1237,11 +1235,16 @@ static void update_curr(struct cfs_rq *cfs_rq) update_curr_task(p, delta_exec); /* - * Any fair task that runs outside of fair_server should - * account against fair_server such that it can account for - * this time and possibly avoid running this period. + * If the fair_server is active, we need to account for the + * fair_server time whether or not the task is running on + * behalf of fair_server or not: + * - If the task is running on behalf of fair_server, we need + * to limit its time based on the assigned runtime. + * - Fair task that runs outside of fair_server should account + * against fair_server such that it can account for this time + * and possibly avoid running this period. */ - if (p->dl_server != &rq->fair_server) + if (dl_server_active(&rq->fair_server)) dl_server_update(&rq->fair_server, delta_exec); } @@ -5471,9 +5474,33 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); -static inline void finish_delayed_dequeue_entity(struct sched_entity *se) +static void set_delayed(struct sched_entity *se) +{ + se->sched_delayed = 1; + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + cfs_rq->h_nr_delayed++; + if (cfs_rq_throttled(cfs_rq)) + break; + } +} + +static void clear_delayed(struct sched_entity *se) { se->sched_delayed = 0; + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + cfs_rq->h_nr_delayed--; + if (cfs_rq_throttled(cfs_rq)) + break; + } +} + +static inline void finish_delayed_dequeue_entity(struct sched_entity *se) +{ + clear_delayed(se); if (sched_feat(DELAY_ZERO) && se->vlag > 0) se->vlag = 0; } @@ -5484,6 +5511,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) bool sleep = flags & DEQUEUE_SLEEP; update_curr(cfs_rq); + clear_buddies(cfs_rq, se); if (flags & DEQUEUE_DELAYED) { SCHED_WARN_ON(!se->sched_delayed); @@ -5500,10 +5528,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) if (sched_feat(DELAY_DEQUEUE) && delay && !entity_eligible(cfs_rq, se)) { - if (cfs_rq->next == se) - cfs_rq->next = NULL; update_load_avg(cfs_rq, se, 0); - se->sched_delayed = 1; + set_delayed(se); return false; } } @@ -5526,8 +5552,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_stats_dequeue_fair(cfs_rq, se, flags); - clear_buddies(cfs_rq, se); - update_entity_lag(cfs_rq, se); if (sched_feat(PLACE_REL_DEADLINE) && !sleep) { se->deadline -= se->vruntime; @@ -5917,7 +5941,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - long task_delta, idle_task_delta, dequeue = 1; + long task_delta, idle_task_delta, delayed_delta, dequeue = 1; long rq_h_nr_running = rq->cfs.h_nr_running; raw_spin_lock(&cfs_b->lock); @@ -5950,6 +5974,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; + delayed_delta = cfs_rq->h_nr_delayed; for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); int flags; @@ -5973,6 +5998,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; + qcfs_rq->h_nr_delayed -= delayed_delta; if (qcfs_rq->load.weight) { /* Avoid re-evaluating load for this entity: */ @@ -5995,6 +6021,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; + qcfs_rq->h_nr_delayed -= delayed_delta; } /* At this point se is NULL and we are at root level*/ @@ -6020,7 +6047,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - long task_delta, idle_task_delta; + long task_delta, idle_task_delta, delayed_delta; long rq_h_nr_running = rq->cfs.h_nr_running; se = cfs_rq->tg->se[cpu_of(rq)]; @@ -6056,6 +6083,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; + delayed_delta = cfs_rq->h_nr_delayed; for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); @@ -6073,6 +6101,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running += task_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; + qcfs_rq->h_nr_delayed += delayed_delta; /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(qcfs_rq)) @@ -6090,6 +6119,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running += task_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; + qcfs_rq->h_nr_delayed += delayed_delta; /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(qcfs_rq)) @@ -6943,7 +6973,7 @@ requeue_delayed_entity(struct sched_entity *se) } update_load_avg(cfs_rq, se, 0); - se->sched_delayed = 0; + clear_delayed(se); } /* @@ -6957,6 +6987,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); + int h_nr_delayed = 0; int task_new = !(flags & ENQUEUE_WAKEUP); int rq_h_nr_running = rq->cfs.h_nr_running; u64 slice = 0; @@ -6983,6 +7014,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (p->in_iowait) cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); + if (task_new) + h_nr_delayed = !!se->sched_delayed; + for_each_sched_entity(se) { if (se->on_rq) { if (se->sched_delayed) @@ -7005,6 +7039,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; + cfs_rq->h_nr_delayed += h_nr_delayed; if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -7028,6 +7063,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; + cfs_rq->h_nr_delayed += h_nr_delayed; if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -7090,6 +7126,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) struct task_struct *p = NULL; int idle_h_nr_running = 0; int h_nr_running = 0; + int h_nr_delayed = 0; struct cfs_rq *cfs_rq; u64 slice = 0; @@ -7097,6 +7134,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) p = task_of(se); h_nr_running = 1; idle_h_nr_running = task_has_idle_policy(p); + if (!task_sleep && !task_delayed) + h_nr_delayed = !!se->sched_delayed; } else { cfs_rq = group_cfs_rq(se); slice = cfs_rq_min_slice(cfs_rq); @@ -7114,6 +7153,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) cfs_rq->h_nr_running -= h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running; + cfs_rq->h_nr_delayed -= h_nr_delayed; if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = h_nr_running; @@ -7152,6 +7192,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) cfs_rq->h_nr_running -= h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running; + cfs_rq->h_nr_delayed -= h_nr_delayed; if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = h_nr_running; @@ -8780,7 +8821,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int if (unlikely(throttled_hierarchy(cfs_rq_of(pse)))) return; - if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) { + if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) { set_next_buddy(pse); } diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index fc07382361a88..fee75cc2c47b6 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -321,7 +321,7 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq) { if (___update_load_sum(now, &cfs_rq->avg, scale_load_down(cfs_rq->load.weight), - cfs_rq->h_nr_running, + cfs_rq->h_nr_running - cfs_rq->h_nr_delayed, cfs_rq->curr != NULL)) { ___update_load_avg(&cfs_rq->avg, 1); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 76f5f53a645fc..c5d67a43fe524 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -398,6 +398,11 @@ extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq extern int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init); +static inline bool dl_server_active(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_server_active; +} + #ifdef CONFIG_CGROUP_SCHED extern struct list_head task_groups; @@ -649,6 +654,7 @@ struct cfs_rq { unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ + unsigned int h_nr_delayed; s64 avg_vruntime; u64 avg_load; @@ -898,8 +904,11 @@ struct dl_rq { static inline void se_update_runnable(struct sched_entity *se) { - if (!entity_is_task(se)) - se->runnable_weight = se->my_q->h_nr_running; + if (!entity_is_task(se)) { + struct cfs_rq *cfs_rq = se->my_q; + + se->runnable_weight = cfs_rq->h_nr_running - cfs_rq->h_nr_delayed; + } } static inline long se_runnable(struct sched_entity *se) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 949a3870946c3..1b8db5aee9d38 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2250,6 +2250,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event) goto unlock; old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); + if (!old_array) + goto put; + ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); @@ -2258,6 +2261,14 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_array_free_sleepable(old_array); } +put: + /* + * It could be that the bpf_prog is not sleepable (and will be freed + * via normal RCU), but is called from a point that supports sleepable + * programs and uses tasks-trace-RCU. + */ + synchronize_rcu_tasks_trace(); + bpf_prog_put(event->prog); event->prog = NULL; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index fed382b7881b8..4875e7f5de3db 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1402,9 +1402,13 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, #ifdef CONFIG_BPF_EVENTS if (bpf_prog_array_valid(call)) { + const struct bpf_prog_array *array; u32 ret; - ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run); + rcu_read_lock_trace(); + array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held()); + ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run); + rcu_read_unlock_trace(); if (!ret) return; } diff --git a/mm/slub.c b/mm/slub.c index 19980419b176c..c2151c9fee228 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2189,9 +2189,24 @@ bool memcg_slab_post_charge(void *p, gfp_t flags) folio = virt_to_folio(p); if (!folio_test_slab(folio)) { - return folio_memcg_kmem(folio) || - (__memcg_kmem_charge_page(folio_page(folio, 0), flags, - folio_order(folio)) == 0); + int size; + + if (folio_memcg_kmem(folio)) + return true; + + if (__memcg_kmem_charge_page(folio_page(folio, 0), flags, + folio_order(folio))) + return false; + + /* + * This folio has already been accounted in the global stats but + * not in the memcg stats. So, subtract from the global and use + * the interface which adds to both global and memcg stats. + */ + size = folio_size(folio); + node_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -size); + lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, size); + return true; } slab = folio_slab(folio); diff --git a/net/core/filter.c b/net/core/filter.c index 6625b3f563a4a..21131ec25f24a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7899,42 +7899,37 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = { #endif /* CONFIG_INET */ -bool bpf_helper_changes_pkt_data(void *func) -{ - if (func == bpf_skb_vlan_push || - func == bpf_skb_vlan_pop || - func == bpf_skb_store_bytes || - func == bpf_skb_change_proto || - func == bpf_skb_change_head || - func == sk_skb_change_head || - func == bpf_skb_change_tail || - func == sk_skb_change_tail || - func == bpf_skb_adjust_room || - func == sk_skb_adjust_room || - func == bpf_skb_pull_data || - func == sk_skb_pull_data || - func == bpf_clone_redirect || - func == bpf_l3_csum_replace || - func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head || - func == bpf_xdp_adjust_meta || - func == bpf_msg_pull_data || - func == bpf_msg_push_data || - func == bpf_msg_pop_data || - func == bpf_xdp_adjust_tail || -#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) - func == bpf_lwt_seg6_store_bytes || - func == bpf_lwt_seg6_adjust_srh || - func == bpf_lwt_seg6_action || -#endif -#ifdef CONFIG_INET - func == bpf_sock_ops_store_hdr_opt || -#endif - func == bpf_lwt_in_push_encap || - func == bpf_lwt_xmit_push_encap) +bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_clone_redirect: + case BPF_FUNC_l3_csum_replace: + case BPF_FUNC_l4_csum_replace: + case BPF_FUNC_lwt_push_encap: + case BPF_FUNC_lwt_seg6_action: + case BPF_FUNC_lwt_seg6_adjust_srh: + case BPF_FUNC_lwt_seg6_store_bytes: + case BPF_FUNC_msg_pop_data: + case BPF_FUNC_msg_pull_data: + case BPF_FUNC_msg_push_data: + case BPF_FUNC_skb_adjust_room: + case BPF_FUNC_skb_change_head: + case BPF_FUNC_skb_change_proto: + case BPF_FUNC_skb_change_tail: + case BPF_FUNC_skb_pull_data: + case BPF_FUNC_skb_store_bytes: + case BPF_FUNC_skb_vlan_pop: + case BPF_FUNC_skb_vlan_push: + case BPF_FUNC_store_hdr_opt: + case BPF_FUNC_xdp_adjust_head: + case BPF_FUNC_xdp_adjust_meta: + case BPF_FUNC_xdp_adjust_tail: + /* tail-called program could call any of the above */ + case BPF_FUNC_tail_call: return true; - - return false; + default: + return false; + } } const struct bpf_func_proto bpf_event_output_data_proto __weak; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 78347d7d25ef3..f1b9b3958792c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -159,6 +159,7 @@ static void sock_map_del_link(struct sock *sk, verdict_stop = true; list_del(&link->list); sk_psock_free_link(link); + break; } } spin_unlock_bh(&psock->link_lock); @@ -411,12 +412,11 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key) static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock **psk) { - struct sock *sk; + struct sock *sk = NULL; int err = 0; spin_lock_bh(&stab->lock); - sk = *psk; - if (!sk_test || sk_test == sk) + if (!sk_test || sk_test == *psk) sk = xchg(psk, NULL); if (likely(sk)) diff --git a/rust/Makefile b/rust/Makefile index 9da9042fd6279..a40a3936126d6 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -280,9 +280,22 @@ endif # architecture instead of generating `usize`. bindgen_c_flags_final = $(bindgen_c_flags_lto) -fno-builtin -D__BINDGEN__ +# Each `bindgen` release may upgrade the list of Rust target versions. By +# default, the highest stable release in their list is used. Thus we need to set +# a `--rust-target` to avoid future `bindgen` releases emitting code that +# `rustc` may not understand. On top of that, `bindgen` does not support passing +# an unknown Rust target version. +# +# Therefore, the Rust target for `bindgen` can be only as high as the minimum +# Rust version the kernel supports and only as high as the greatest stable Rust +# target supported by the minimum `bindgen` version the kernel supports (that +# is, if we do not test the actual `rustc`/`bindgen` versions running). +# +# Starting with `bindgen` 0.71.0, we will be able to set any future Rust version +# instead, i.e. we will be able to set here our minimum supported Rust version. quiet_cmd_bindgen = BINDGEN $@ cmd_bindgen = \ - $(BINDGEN) $< $(bindgen_target_flags) \ + $(BINDGEN) $< $(bindgen_target_flags) --rust-target 1.68 \ --use-core --with-derive-default --ctypes-prefix ffi --no-layout-tests \ --no-debug '.*' --enable-function-attribute-detection \ -o $@ -- $(bindgen_c_flags_final) -DMODULE \ diff --git a/scripts/kernel-doc b/scripts/kernel-doc index f66070176ba31..4ee843d3600e2 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -267,7 +267,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; my $doc_inline_end = '^\s*\*/\s*$'; my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; -my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*\w+\)\s*;'; +my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;'; my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; diff --git a/sound/core/control_led.c b/sound/core/control_led.c index 65a1ebe877768..e33dfcf863cf1 100644 --- a/sound/core/control_led.c +++ b/sound/core/control_led.c @@ -668,10 +668,16 @@ static void snd_ctl_led_sysfs_add(struct snd_card *card) goto cerr; led->cards[card->number] = led_card; snprintf(link_name, sizeof(link_name), "led-%s", led->name); - WARN(sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, link_name), - "can't create symlink to controlC%i device\n", card->number); - WARN(sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, "card"), - "can't create symlink to card%i\n", card->number); + if (sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, + link_name)) + dev_err(card->dev, + "%s: can't create symlink to controlC%i device\n", + __func__, card->number); + if (sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, + "card")) + dev_err(card->dev, + "%s: can't create symlink to card%i\n", + __func__, card->number); continue; cerr: diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index d96266c8cb38e..4ef7878e8fd4b 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -151,10 +151,6 @@ static int cs35l56_hda_runtime_resume(struct device *dev) } } - ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); - if (ret) - goto err; - return 0; err: @@ -1059,9 +1055,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config, ARRAY_SIZE(cs35l56_hda_dai_config)); - ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); - if (ret) - goto dsp_err; /* * By default only enable one ASP1TXn, where n=amplifier index, @@ -1087,7 +1080,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) pm_err: pm_runtime_disable(cs35l56->base.dev); -dsp_err: cs_dsp_remove(&cs35l56->cs_dsp); err: gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 0); diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index e4673a71551a3..d40197fb5fbd5 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1134,7 +1134,6 @@ struct ca0132_spec { struct hda_codec *codec; struct delayed_work unsol_hp_work; - int quirk; #ifdef ENABLE_TUNING_CONTROLS long cur_ctl_vals[TUNING_CTLS_COUNT]; @@ -1166,7 +1165,6 @@ struct ca0132_spec { * CA0132 quirks table */ enum { - QUIRK_NONE, QUIRK_ALIENWARE, QUIRK_ALIENWARE_M17XR4, QUIRK_SBZ, @@ -1176,10 +1174,11 @@ enum { QUIRK_R3D, QUIRK_AE5, QUIRK_AE7, + QUIRK_NONE = HDA_FIXUP_ID_NOT_SET, }; #ifdef CONFIG_PCI -#define ca0132_quirk(spec) ((spec)->quirk) +#define ca0132_quirk(spec) ((spec)->codec->fixup_id) #define ca0132_use_pci_mmio(spec) ((spec)->use_pci_mmio) #define ca0132_use_alt_functions(spec) ((spec)->use_alt_functions) #define ca0132_use_alt_controls(spec) ((spec)->use_alt_controls) @@ -1293,7 +1292,7 @@ static const struct hda_pintbl ae7_pincfgs[] = { {} }; -static const struct snd_pci_quirk ca0132_quirks[] = { +static const struct hda_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1028, 0x057b, "Alienware M17x R4", QUIRK_ALIENWARE_M17XR4), SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), @@ -1316,6 +1315,19 @@ static const struct snd_pci_quirk ca0132_quirks[] = { {} }; +static const struct hda_model_fixup ca0132_quirk_models[] = { + { .id = QUIRK_ALIENWARE, .name = "alienware" }, + { .id = QUIRK_ALIENWARE_M17XR4, .name = "alienware-m17xr4" }, + { .id = QUIRK_SBZ, .name = "sbz" }, + { .id = QUIRK_ZXR, .name = "zxr" }, + { .id = QUIRK_ZXR_DBPRO, .name = "zxr-dbpro" }, + { .id = QUIRK_R3DI, .name = "r3di" }, + { .id = QUIRK_R3D, .name = "r3d" }, + { .id = QUIRK_AE5, .name = "ae5" }, + { .id = QUIRK_AE7, .name = "ae7" }, + {} +}; + /* Output selection quirk info structures. */ #define MAX_QUIRK_MMIO_GPIO_SET_VALS 3 #define MAX_QUIRK_SCP_SET_VALS 2 @@ -9957,17 +9969,15 @@ static int ca0132_prepare_verbs(struct hda_codec *codec) */ static void sbz_detect_quirk(struct hda_codec *codec) { - struct ca0132_spec *spec = codec->spec; - switch (codec->core.subsystem_id) { case 0x11020033: - spec->quirk = QUIRK_ZXR; + codec->fixup_id = QUIRK_ZXR; break; case 0x1102003f: - spec->quirk = QUIRK_ZXR_DBPRO; + codec->fixup_id = QUIRK_ZXR_DBPRO; break; default: - spec->quirk = QUIRK_SBZ; + codec->fixup_id = QUIRK_SBZ; break; } } @@ -9976,7 +9986,6 @@ static int patch_ca0132(struct hda_codec *codec) { struct ca0132_spec *spec; int err; - const struct snd_pci_quirk *quirk; codec_dbg(codec, "patch_ca0132\n"); @@ -9987,11 +9996,7 @@ static int patch_ca0132(struct hda_codec *codec) spec->codec = codec; /* Detect codec quirk */ - quirk = snd_pci_quirk_lookup(codec->bus->pci, ca0132_quirks); - if (quirk) - spec->quirk = quirk->value; - else - spec->quirk = QUIRK_NONE; + snd_hda_pick_fixup(codec, ca0132_quirk_models, ca0132_quirks, NULL); if (ca0132_quirk(spec) == QUIRK_SBZ) sbz_detect_quirk(codec); @@ -10068,7 +10073,7 @@ static int patch_ca0132(struct hda_codec *codec) spec->mem_base = pci_iomap(codec->bus->pci, 2, 0xC20); if (spec->mem_base == NULL) { codec_warn(codec, "pci_iomap failed! Setting quirk to QUIRK_NONE."); - spec->quirk = QUIRK_NONE; + codec->fixup_id = QUIRK_NONE; } } #endif diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 17392b21d5bff..61ba5dc35b8b8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7714,6 +7714,7 @@ enum { ALC274_FIXUP_HP_MIC, ALC274_FIXUP_HP_HEADSET_MIC, ALC274_FIXUP_HP_ENVY_GPIO, + ALC274_FIXUP_ASUS_ZEN_AIO_27, ALC256_FIXUP_ASUS_HPE, ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, ALC287_FIXUP_HP_GPIO_LED, @@ -9516,6 +9517,26 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc274_fixup_hp_envy_gpio, }, + [ALC274_FIXUP_ASUS_ZEN_AIO_27] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x10 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc420 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x40 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x8800 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x49 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0249 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x4a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x202b }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x62 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xa007 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x6b }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x5060 }, + {} + }, + .chained = true, + .chain_id = ALC2XX_FIXUP_HEADSET_MIC, + }, [ALC256_FIXUP_ASUS_HPE] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -10142,6 +10163,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x159c, "Acer Nitro 5 AN515-58", ALC2XX_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), @@ -10630,6 +10652,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), @@ -11177,6 +11200,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {.id = ALC236_FIXUP_LENOVO_INV_DMIC, .name = "alc236-fixup-lenovo-inv-mic"}, + {.id = ALC2XX_FIXUP_HEADSET_MIC, .name = "alc2xx-fixup-headset-mic"}, {} }; #define ALC225_STANDARD_PINS \ diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index e38c5885dadfb..ecf57a6cb7c37 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -578,14 +578,19 @@ static int acp6x_probe(struct platform_device *pdev) handle = ACPI_HANDLE(pdev->dev.parent); ret = acpi_evaluate_integer(handle, "_WOV", NULL, &dmic_status); - if (!ACPI_FAILURE(ret)) + if (!ACPI_FAILURE(ret)) { wov_en = dmic_status; + if (!wov_en) + return -ENODEV; + } else { + /* Incase of ACPI method read failure then jump to check_dmi_entry */ + goto check_dmi_entry; + } - if (is_dmic_enable && wov_en) + if (is_dmic_enable) platform_set_drvdata(pdev, &acp6x_card); - else - return 0; +check_dmi_entry: /* check for any DMI overrides */ dmi_id = dmi_first_match(yc_acp_quirk_table); if (dmi_id) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index be2ca5eb6c938..fb8cd2284fe85 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -370,7 +370,7 @@ static void sngl_calib_start(struct tasdevice_priv *tas_priv, int i, tasdevice_dev_read(tas_priv, i, p[j].reg, (int *)&p[j].val[0]); } else { - switch (p[j].reg) { + switch (tas2781_cali_start_reg[j].reg) { case 0: { if (!reg[0]) continue; diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 8e88830e8e577..678540b782805 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -29,8 +29,8 @@ config SND_SOC_FSL_SAI config SND_SOC_FSL_MQS tristate "Medium Quality Sound (MQS) module support" depends on SND_SOC_FSL_SAI + depends on IMX_SCMI_MISC_DRV || !IMX_SCMI_MISC_DRV select REGMAP_MMIO - select IMX_SCMI_MISC_DRV if IMX_SCMI_MISC_EXT !=n help Say Y if you want to add Medium Quality Sound (MQS) support for the Freescale CPUs. diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index b6ff04f7138a2..ee946e0d3f496 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -1204,7 +1204,7 @@ static struct snd_kcontrol_new fsl_spdif_ctrls[] = { }, /* DPLL lock info get controller */ { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = RX_SAMPLE_RATE_KCONTROL, .access = SNDRV_CTL_ELEM_ACCESS_READ | SNDRV_CTL_ELEM_ACCESS_VOLATILE, diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index 1e0bfd59d5118..9c184ab734688 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -171,7 +171,7 @@ static int fsl_xcvr_capds_put(struct snd_kcontrol *kcontrol, } static struct snd_kcontrol_new fsl_xcvr_earc_capds_kctl = { - .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = "Capabilities Data Structure", .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .info = fsl_xcvr_type_capds_bytes_info, diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index 5280c1b20d85e..1f5c4e8ff1b90 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -771,7 +771,7 @@ static void graph_link_init(struct simple_util_priv *priv, of_node_get(port_codec); if (graph_lnk_is_multi(port_codec)) { ep_codec = graph_get_next_multi_ep(&port_codec); - of_node_put(port_cpu); + of_node_put(port_codec); port_codec = ep_to_port(ep_codec); } else { ep_codec = of_graph_get_next_port_endpoint(port_codec, NULL); diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 810be7c949a5c..f3369e569abc4 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1067,8 +1067,12 @@ static int sof_card_dai_links_create(struct snd_soc_card *card) return ret; } - /* One per DAI link, worst case is a DAI link for every endpoint */ - sof_dais = kcalloc(num_ends, sizeof(*sof_dais), GFP_KERNEL); + /* + * One per DAI link, worst case is a DAI link for every endpoint, also + * add one additional to act as a terminator such that code can iterate + * until it hits an uninitialised DAI. + */ + sof_dais = kcalloc(num_ends + 1, sizeof(*sof_dais), GFP_KERNEL); if (!sof_dais) return -ENOMEM; diff --git a/sound/usb/format.c b/sound/usb/format.c index 0cbf1d4fbe6ed..6049d957694ca 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -60,6 +60,8 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; /* flag potentially raw DSD capable altsettings */ fp->dsd_raw = true; + /* clear special format bit to avoid "unsupported format" msg below */ + format &= ~UAC2_FORMAT_TYPE_I_RAW_DATA; } format <<= 1; @@ -71,8 +73,11 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, sample_width = as->bBitResolution; sample_bytes = as->bSubslotSize; - if (format & UAC3_FORMAT_TYPE_I_RAW_DATA) + if (format & UAC3_FORMAT_TYPE_I_RAW_DATA) { pcm_formats |= SNDRV_PCM_FMTBIT_SPECIAL; + /* clear special format bit to avoid "unsupported format" msg below */ + format &= ~UAC3_FORMAT_TYPE_I_RAW_DATA; + } format <<= 1; break; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 00101875d9a8d..8ba0aff8be2ec 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2179,6 +2179,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_MIC_RES_384), DEVICE_FLG(0x046d, 0x09a4, /* Logitech QuickCam E 3500 */ QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), + DEVICE_FLG(0x0499, 0x1506, /* Yamaha THR5 */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ @@ -2323,6 +2325,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */ QUIRK_FLAG_SET_IFACE_FIRST), + DEVICE_FLG(0x262a, 0x9302, /* ddHiFi TC44C */ + QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index df3230fdac395..66ab2e0bae5fd 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -81,32 +81,31 @@ do_syscall: stp x27, x28, [sp, #96] // Set SVCR if we're doing SME - cbz x1, 1f + cbz x1, load_gpr adrp x2, svcr_in ldr x2, [x2, :lo12:svcr_in] msr S3_3_C4_C2_2, x2 -1: // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, load_gpr mov w12, #0 ldr x2, =za_in -2: _ldr_za 12, 2 +1: _ldr_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, load_gpr adrp x2, zt_in add x2, x2, :lo12:zt_in _ldr_zt 2 -1: +load_gpr: // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -125,9 +124,9 @@ do_syscall: str x30, [x2], #8 // LR // Load FPRs if we're not doing neither SVE nor streaming SVE - cbnz x0, 1f + cbnz x0, check_sve_in ldr x2, =svcr_in - tbnz x2, #SVCR_SM_SHIFT, 1f + tbnz x2, #SVCR_SM_SHIFT, check_sve_in ldr x2, =fpr_in ldp q0, q1, [x2] @@ -148,8 +147,8 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] b 2f -1: +check_sve_in: // Load the SVE registers if we're doing SVE/SME ldr x2, =z_in @@ -256,32 +255,31 @@ do_syscall: stp q30, q31, [x2, #16 * 30] // Save SVCR if we're doing SME - cbz x1, 1f + cbz x1, check_sve_out mrs x2, S3_3_C4_C2_2 adrp x3, svcr_out str x2, [x3, :lo12:svcr_out] -1: // Save ZA if it's enabled - uses x12 as scratch due to SME STR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, check_sve_out mov w12, #0 ldr x2, =za_out -2: _str_za 12, 2 +1: _str_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, check_sve_out adrp x2, zt_out add x2, x2, :lo12:zt_out _str_zt 2 -1: +check_sve_out: // Save the SVE state if we have some cbz x0, 1f diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 0000000000000..7526de3790814 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + */ +void test_changes_pkt_data_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c index 6fa19449297e9..43676a9922dce 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -3,11 +3,14 @@ #include <test_progs.h> #include "raw_tp_null.skel.h" +#include "raw_tp_null_fail.skel.h" void test_raw_tp_null(void) { struct raw_tp_null *skel; + RUN_TESTS(raw_tp_null_fail); + skel = raw_tp_null__open_and_load(); if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index fdff0652d7ef5..248754296d972 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -934,8 +934,10 @@ static void test_sockmap_same_sock(void) err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); - if (err) + if (err) { + close(tcp); goto out; + } for (i = 0; i < 2; i++) { err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); @@ -954,14 +956,14 @@ static void test_sockmap_same_sock(void) ASSERT_OK(err, "bpf_map_update_elem(tcp)"); } + close(tcp); err = bpf_map_delete_elem(map, &zero); - ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + ASSERT_ERR(err, "bpf_map_delete_elem(entry)"); close(stream[0]); close(stream[1]); out: close(dgram); - close(tcp); close(udp); test_sockmap_pass_prog__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 0000000000000..43cada48b28ad --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 0000000000000..f9a622705f1b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 457f34c151e32..5927054b6dd96 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) if (task->pid != tid) return 0; - i = i + skb->mark + 1; - /* The compiler may move the NULL check before this deref, which causes - * the load to fail as deref of scalar. Prevent that by using a barrier. + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. */ - barrier(); - /* If dead code elimination kicks in, the increment below will - * be removed. For raw_tp programs, we mark input arguments as - * PTR_MAYBE_NULL, so branch prediction should never kick in. - */ - if (!skb) - i += 2; + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); return 0; } diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c new file mode 100644 index 0000000000000..38d669957bf1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Ensure module parameter has PTR_MAYBE_NULL */ +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} + +/* Check NULL marking */ +SEC("tp_btf/sched_pi_setprio") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index d1a57f7d09bd8..fe6249d99b315 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb) __sink(skb); __sink(ret); + /* let verifier know that 'subprog_tc' can change pointers to skb->data */ + bpf_skb_change_proto(skb, 0, 0); return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 5aaf2b065f86c..bba3e37f749b8 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -7,11 +7,7 @@ #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") -/* This used to be a failure test, but raw_tp nullable arguments can now - * directly be dereferenced, whether they have nullable annotation or not, - * and don't need to be explicitly checked. - */ -__success +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index a570e48b917ac..28b939572cdad 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void btf_ctx_access_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); @@ -23,7 +23,43 @@ __success __retval(0) __naked void ctx_access_u32_pointer_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u32") +__failure __msg("size 4 must be 8") +__naked void ctx_access_u32_pointer_reject_32(void) +{ + asm volatile (" \ + r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u16") +__failure __msg("size 2 must be 8") +__naked void ctx_access_u32_pointer_reject_16(void) +{ + asm volatile (" \ + r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u8") +__failure __msg("size 1 must be 8") +__naked void ctx_access_u32_pointer_reject_8(void) +{ + asm volatile (" \ + r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\ r0 = 0; \ exit; \ " ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c index ec79cbcfde91e..87e51a215558f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_d_path.c +++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void d_path_accept(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ @@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe") __naked void d_path_reject(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index d3e70e38e4420..0d5e56dffabb8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -1037,4 +1044,53 @@ __naked void sock_create_read_src_port(void) : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; |