summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu1
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst1
-rw-r--r--Documentation/admin-guide/hw-vuln/vmscape.rst110
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt11
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml2
-rw-r--r--Documentation/networking/can.rst2
-rw-r--r--Documentation/networking/mptcp.rst8
-rw-r--r--MAINTAINERS21
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c2
-rw-r--r--arch/s390/kernel/kexec_elf.c2
-rw-r--r--arch/s390/kernel/kexec_image.c2
-rw-r--r--arch/s390/kernel/machine_kexec_file.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c4
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c4
-rw-r--r--arch/s390/kernel/perf_pai_ext.c2
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/entry-common.h7
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/kernel/cpu/bugs.c285
-rw-r--r--arch/x86/kernel/cpu/common.c86
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--block/fops.c13
-rw-r--r--drivers/base/cpu.c3
-rw-r--r--drivers/cpufreq/amd-pstate.c19
-rw-r--r--drivers/cpufreq/intel_pstate.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c3
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c34
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c74
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c115
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c78
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c6
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_drv.c23
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c15
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c35
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h9
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c8
-rw-r--r--drivers/gpu/drm/panthor/panthor_drv.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c10
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c16
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c2
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c9
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c42
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c3
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c42
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules3
-rw-r--r--drivers/gpu/nova-core/Kconfig1
-rw-r--r--drivers/mtd/devices/Kconfig4
-rw-r--r--drivers/mtd/nand/raw/atmel/nand-controller.c16
-rw-r--r--drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c4
-rw-r--r--drivers/mtd/nand/raw/stm32_fmc2_nand.c46
-rw-r--r--drivers/mtd/nand/spi/winbond.c37
-rw-r--r--drivers/net/can/rcar/rcar_can.c8
-rw-r--r--drivers/net/can/xilinx_can.c16
-rw-r--r--drivers/net/dsa/b53/b53_common.c17
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c34
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c5
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c3
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c20
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c4
-rw-r--r--drivers/net/macsec.c1
-rw-r--r--drivers/net/phy/Kconfig2
-rw-r--r--drivers/net/phy/phy.c12
-rw-r--r--drivers/net/phy/phy_device.c5
-rw-r--r--drivers/net/phy/phylink.c28
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c122
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c26
-rw-r--r--drivers/net/wireless/virtual/virt_wifi.c4
-rw-r--r--drivers/pci/controller/pci-mvebu.c21
-rw-r--r--drivers/regulator/sy7636a-regulator.c7
-rw-r--r--fs/btrfs/extent_io.c40
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/super.c9
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fhandle.c8
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/fuse/fuse_i.h14
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/fuse/passthrough.c5
-rw-r--r--fs/fuse/virtio_fs.c2
-rw-r--r--fs/kernfs/file.c58
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c40
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c21
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/io.c13
-rw-r--r--fs/nfs/localio.c21
-rw-r--r--fs/nfs/nfs42proc.c35
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c7
-rw-r--r--fs/nfs/nfstrace.h1
-rw-r--r--fs/nfs/write.c53
-rw-r--r--fs/ocfs2/extent_map.c10
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/smb/client/cifsglob.h13
-rw-r--r--fs/smb/client/file.c18
-rw-r--r--fs/smb/client/inode.c86
-rw-r--r--fs/smb/client/smb2glob.h3
-rw-r--r--fs/smb/client/smb2inode.c204
-rw-r--r--fs/smb/client/smb2ops.c32
-rw-r--r--fs/smb/client/smb2proto.h3
-rw-r--r--fs/smb/client/trace.h9
-rw-r--r--include/linux/compiler-clang.h29
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/energy_model.h10
-rw-r--r--include/linux/ethtool.h4
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/kasan.h6
-rw-r--r--include/net/netfilter/nf_tables.h1
-rw-r--r--include/net/netfilter/nf_tables_core.h10
-rw-r--r--include/net/netns/nftables.h1
-rw-r--r--init/main.c2
-rw-r--r--io_uring/rw.c3
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/core.c21
-rw-r--r--kernel/bpf/cpumap.c4
-rw-r--r--kernel/bpf/crypto.c2
-rw-r--r--kernel/bpf/helpers.c16
-rw-r--r--kernel/bpf/rqspinlock.c2
-rw-r--r--kernel/bpf/verifier.c6
-rw-r--r--kernel/dma/debug.c48
-rw-r--r--kernel/dma/debug.h20
-rw-r--r--kernel/dma/mapping.c4
-rw-r--r--kernel/power/energy_model.c29
-rw-r--r--kernel/power/hibernate.c1
-rw-r--r--kernel/trace/fgraph.c3
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace_events_user.c2
-rw-r--r--kernel/trace/trace_osnoise.c3
-rw-r--r--mm/damon/core.c4
-rw-r--r--mm/damon/lru_sort.c5
-rw-r--r--mm/damon/reclaim.c5
-rw-r--r--mm/damon/sysfs.c14
-rw-r--r--mm/hugetlb.c9
-rw-r--r--mm/kasan/shadow.c31
-rw-r--r--mm/khugepaged.c4
-rw-r--r--mm/memory-failure.c20
-rw-r--r--mm/memory_hotplug.c10
-rw-r--r--mm/mremap.c9
-rw-r--r--mm/percpu.c20
-rw-r--r--mm/vmalloc.c8
-rw-r--r--net/bridge/br.c7
-rw-r--r--net/can/j1939/bus.c5
-rw-r--r--net/can/j1939/j1939-priv.h1
-rw-r--r--net/can/j1939/main.c3
-rw-r--r--net/can/j1939/socket.c52
-rw-r--r--net/core/dev_ioctl.c22
-rw-r--r--net/hsr/hsr_device.c28
-rw-r--r--net/hsr/hsr_main.c4
-rw-r--r--net/hsr/hsr_main.h3
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/tcp_bpf.c5
-rw-r--r--net/mptcp/sockopt.c11
-rw-r--r--net/netfilter/nf_tables_api.c66
-rw-r--r--net/netfilter/nft_lookup.c46
-rw-r--r--net/netfilter/nft_set_bitmap.c3
-rw-r--r--net/netfilter/nft_set_pipapo.c20
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c4
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netlink/genetlink.c3
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/wireless/nl80211.c13
-rw-r--r--net/xdp/xsk.c113
-rw-r--r--net/xdp/xsk_queue.h12
-rw-r--r--rust/kernel/device.rs5
-rw-r--r--samples/ftrace/ftrace-direct-modify.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h4
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c46
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c8
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c17
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c18
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rw-r--r--tools/testing/selftests/net/can/config3
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh2
219 files changed, 2504 insertions, 1130 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index ab8cd337f43a..8aed6d94c4cd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -586,6 +586,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsa
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ /sys/devices/system/cpu/vulnerabilities/vmscape
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 89ca636081b7..55d747511f83 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -26,3 +26,4 @@ are configurable at compile, boot or run time.
rsb
old_microcode
indirect-target-selection
+ vmscape
diff --git a/Documentation/admin-guide/hw-vuln/vmscape.rst b/Documentation/admin-guide/hw-vuln/vmscape.rst
new file mode 100644
index 000000000000..d9b9a2b6c114
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/vmscape.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+VMSCAPE
+=======
+
+VMSCAPE is a vulnerability that may allow a guest to influence the branch
+prediction in host userspace. It particularly affects hypervisors like QEMU.
+
+Even if a hypervisor may not have any sensitive data like disk encryption keys,
+guest-userspace may be able to attack the guest-kernel using the hypervisor as
+a confused deputy.
+
+Affected processors
+-------------------
+
+The following CPU families are affected by VMSCAPE:
+
+**Intel processors:**
+ - Skylake generation (Parts without Enhanced-IBRS)
+ - Cascade Lake generation - (Parts affected by ITS guest/host separation)
+ - Alder Lake and newer (Parts affected by BHI)
+
+Note that, BHI affected parts that use BHB clearing software mitigation e.g.
+Icelake are not vulnerable to VMSCAPE.
+
+**AMD processors:**
+ - Zen series (families 0x17, 0x19, 0x1a)
+
+** Hygon processors:**
+ - Family 0x18
+
+Mitigation
+----------
+
+Conditional IBPB
+----------------
+
+Kernel tracks when a CPU has run a potentially malicious guest and issues an
+IBPB before the first exit to userspace after VM-exit. If userspace did not run
+between VM-exit and the next VM-entry, no IBPB is issued.
+
+Note that the existing userspace mitigation against Spectre-v2 is effective in
+protecting the userspace. They are insufficient to protect the userspace VMMs
+from a malicious guest. This is because Spectre-v2 mitigations are applied at
+context switch time, while the userspace VMM can run after a VM-exit without a
+context switch.
+
+Vulnerability enumeration and mitigation is not applied inside a guest. This is
+because nested hypervisors should already be deploying IBPB to isolate
+themselves from nested guests.
+
+SMT considerations
+------------------
+
+When Simultaneous Multi-Threading (SMT) is enabled, hypervisors can be
+vulnerable to cross-thread attacks. For complete protection against VMSCAPE
+attacks in SMT environments, STIBP should be enabled.
+
+The kernel will issue a warning if SMT is enabled without adequate STIBP
+protection. Warning is not issued when:
+
+- SMT is disabled
+- STIBP is enabled system-wide
+- Intel eIBRS is enabled (which implies STIBP protection)
+
+System information and options
+------------------------------
+
+The sysfs file showing VMSCAPE mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/vmscape
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+ The processor is not vulnerable to VMSCAPE attacks.
+
+ * 'Vulnerable':
+
+ The processor is vulnerable and no mitigation has been applied.
+
+ * 'Mitigation: IBPB before exit to userspace':
+
+ Conditional IBPB mitigation is enabled. The kernel tracks when a CPU has
+ run a potentially malicious guest and issues an IBPB before the first
+ exit to userspace after VM-exit.
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+ IBPB is issued on every VM-exit. This occurs when other mitigations like
+ RETBLEED or SRSO are already issuing IBPB on VM-exit.
+
+Mitigation control on the kernel command line
+----------------------------------------------
+
+The mitigation can be controlled via the ``vmscape=`` command line parameter:
+
+ * ``vmscape=off``:
+
+ Disable the VMSCAPE mitigation.
+
+ * ``vmscape=ibpb``:
+
+ Enable conditional IBPB mitigation (default when CONFIG_MITIGATION_VMSCAPE=y).
+
+ * ``vmscape=force``:
+
+ Force vulnerability detection and mitigation even on processors that are
+ not known to be affected.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..5a7a83c411e9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3829,6 +3829,7 @@
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
tsx_async_abort=off [X86]
+ vmscape=off [X86]
Exceptions:
This does not have any effect on
@@ -8041,6 +8042,16 @@
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
Format: <command>
+ vmscape= [X86] Controls mitigation for VMscape attacks.
+ VMscape attacks can leak information from a userspace
+ hypervisor to a guest via speculative side-channels.
+
+ off - disable the mitigation
+ ibpb - use Indirect Branch Prediction Barrier
+ (IBPB) mitigation (default)
+ force - force vulnerability detection even on
+ unaffected processors
+
vsyscall= [X86-64,EARLY]
Controls the behavior of vsyscalls (i.e. calls to
fixed addresses of 0xffffffffff600x00 from legacy
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index 02f1ddcfbf1c..d15335684ec3 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -256,7 +256,7 @@ attribute-sets:
type: u32
-
name: if-idx
- type: u32
+ type: s32
-
name: reset-reason
type: u32
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index bc1b585355f7..7650c4b5be5f 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -742,7 +742,7 @@ The broadcast manager sends responses to user space in the same form:
struct timeval ival1, ival2; /* count and subsequent interval */
canid_t can_id; /* unique can_id for task */
__u32 nframes; /* number of can_frames following */
- struct can_frame frames[0];
+ struct can_frame frames[];
};
The aligned payload 'frames' uses the same basic CAN frame structure defined
diff --git a/Documentation/networking/mptcp.rst b/Documentation/networking/mptcp.rst
index 17f2bab61164..2e31038d6462 100644
--- a/Documentation/networking/mptcp.rst
+++ b/Documentation/networking/mptcp.rst
@@ -60,10 +60,10 @@ address announcements. Typically, it is the client side that initiates subflows,
and the server side that announces additional addresses via the ``ADD_ADDR`` and
``REMOVE_ADDR`` options.
-Path managers are controlled by the ``net.mptcp.pm_type`` sysctl knob -- see
-mptcp-sysctl.rst. There are two types: the in-kernel one (type ``0``) where the
-same rules are applied for all the connections (see: ``ip mptcp``) ; and the
-userspace one (type ``1``), controlled by a userspace daemon (i.e. `mptcpd
+Path managers are controlled by the ``net.mptcp.path_manager`` sysctl knob --
+see mptcp-sysctl.rst. There are two types: the in-kernel one (``kernel``) where
+the same rules are applied for all the connections (see: ``ip mptcp``) ; and the
+userspace one (``userspace``), controlled by a userspace daemon (i.e. `mptcpd
<https://mptcpd.mptcp.dev/>`_) where different rules can be applied for each
connection. The path managers can be controlled via a Netlink API; see
netlink_spec/mptcp_pm.rst.
diff --git a/MAINTAINERS b/MAINTAINERS
index cd7ff55b5d32..659a3a251f00 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4683,7 +4683,6 @@ F: security/bpf/
BPF [SELFTESTS] (Test Runners & Infrastructure)
M: Andrii Nakryiko <andrii@kernel.org>
M: Eduard Zingerman <eddyz87@gmail.com>
-R: Mykola Lysenko <mykolal@fb.com>
L: bpf@vger.kernel.org
S: Maintained
F: tools/testing/selftests/bpf/
@@ -5259,7 +5258,6 @@ F: drivers/gpio/gpio-bt8xx.c
BTRFS FILE SYSTEM
M: Chris Mason <clm@fb.com>
-M: Josef Bacik <josef@toxicpanda.com>
M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org
S: Maintained
@@ -7240,15 +7238,15 @@ F: include/linux/swiotlb.h
F: kernel/dma/
DMA MAPPING HELPERS DEVICE DRIVER API [RUST]
-M: Abdiel Janulgue <abdiel.janulgue@gmail.com>
M: Danilo Krummrich <dakr@kernel.org>
+R: Abdiel Janulgue <abdiel.janulgue@gmail.com>
R: Daniel Almeida <daniel.almeida@collabora.com>
R: Robin Murphy <robin.murphy@arm.com>
R: Andreas Hindborg <a.hindborg@kernel.org>
L: rust-for-linux@vger.kernel.org
S: Supported
W: https://rust-for-linux.com
-T: git https://github.com/Rust-for-Linux/linux.git alloc-next
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git
F: rust/helpers/dma.c
F: rust/kernel/dma.rs
F: samples/rust/rust_dma.rs
@@ -8080,7 +8078,6 @@ F: Documentation/devicetree/bindings/gpu/
F: Documentation/gpu/
F: drivers/gpu/drm/
F: drivers/gpu/vga/
-F: rust/kernel/drm/
F: include/drm/drm
F: include/linux/vga*
F: include/uapi/drm/
@@ -8092,11 +8089,21 @@ X: drivers/gpu/drm/i915/
X: drivers/gpu/drm/kmb/
X: drivers/gpu/drm/mediatek/
X: drivers/gpu/drm/msm/
-X: drivers/gpu/drm/nouveau/
+X: drivers/gpu/drm/nova/
X: drivers/gpu/drm/radeon/
X: drivers/gpu/drm/tegra/
X: drivers/gpu/drm/xe/
+DRM DRIVERS AND COMMON INFRASTRUCTURE [RUST]
+M: Danilo Krummrich <dakr@kernel.org>
+M: Alice Ryhl <aliceryhl@google.com>
+S: Supported
+W: https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html
+T: git https://gitlab.freedesktop.org/drm/rust/kernel.git
+F: drivers/gpu/drm/nova/
+F: drivers/gpu/nova-core/
+F: rust/kernel/drm/
+
DRM DRIVERS FOR ALLWINNER A10
M: Maxime Ripard <mripard@kernel.org>
M: Chen-Yu Tsai <wens@csie.org>
@@ -16128,6 +16135,7 @@ M: Andrew Morton <akpm@linux-foundation.org>
M: Mike Rapoport <rppt@kernel.org>
L: linux-mm@kvack.org
S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git
F: include/linux/numa_memblks.h
F: mm/numa.c
F: mm/numa_emulation.c
@@ -17480,6 +17488,7 @@ NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@netfilter.org>
M: Florian Westphal <fw@strlen.de>
+R: Phil Sutter <phil@nwl.cc>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
S: Maintained
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index af1ca875c52c..410060ebd86d 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline)
{
- struct kexec_buf kbuf;
+ struct kexec_buf kbuf = {};
void *dtb = NULL;
unsigned long initrd_load_addr = 0, dtb_len,
orig_segments = image->nr_segments;
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 4d364de43799..143e34a4eca5 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -16,7 +16,7 @@
static int kexec_file_add_kernel_elf(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
Elf_Addr entry;
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index a32ce8bea745..9a439175723c 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -16,7 +16,7 @@
static int kexec_file_add_kernel_image(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
buf.image = image;
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index c2bac14dd668..a36d7311c668 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
static int kexec_file_add_purgatory(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
int ret;
buf.image = image;
@@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image,
static int kexec_file_add_initrd(struct kimage *image,
struct s390_load_data *data)
{
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
int ret;
buf.image = image;
@@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
{
__u32 *lc_ipl_parmblock_ptr;
unsigned int len, ncerts;
- struct kexec_buf buf;
+ struct kexec_buf buf = {};
unsigned long addr;
void *ptr, *end;
int ret;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 4d09954ebf49..04457d88e589 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -760,8 +760,6 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
break;
case PERF_TYPE_HARDWARE:
- if (is_sampling_event(event)) /* No sampling support */
- return -ENOENT;
ev = attr->config;
if (!attr->exclude_user && attr->exclude_kernel) {
/*
@@ -859,6 +857,8 @@ static int cpumf_pmu_event_init(struct perf_event *event)
unsigned int type = event->attr.type;
int err = -ENOENT;
+ if (is_sampling_event(event)) /* No sampling support */
+ return err;
if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index f373a1009c45..9455f213dc20 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -285,10 +285,10 @@ static int paicrypt_event_init(struct perf_event *event)
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
return -ENOENT;
- /* PAI crypto event must be in valid range */
+ /* PAI crypto event must be in valid range, try others if not */
if (a->config < PAI_CRYPTO_BASE ||
a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
- return -EINVAL;
+ return -ENOENT;
/* Allow only CRYPTO_ALL for sampling */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index d827473e7f87..7b32935273ce 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -265,7 +265,7 @@ static int paiext_event_valid(struct perf_event *event)
event->hw.config_base = offsetof(struct paiext_cb, acc);
return 0;
}
- return -EINVAL;
+ return -ENOENT;
}
/* Might be called on different CPU than the one the event is intended for. */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 60688be4e876..50eb57c976bc 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -335,7 +335,6 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
int nodat;
struct mm_struct *mm = vma->vm_mm;
- preempt_disable();
pgste = ptep_xchg_start(mm, addr, ptep);
nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT);
old = ptep_flush_lazy(mm, addr, ptep, nodat);
@@ -360,7 +359,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
} else {
set_pte(ptep, pte);
}
- preempt_enable();
}
static inline void pmdp_idte_local(struct mm_struct *mm,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 58d890fe2100..52c8910ba2ef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2701,6 +2701,15 @@ config MITIGATION_TSA
security vulnerability on AMD CPUs which can lead to forwarding of
invalid info to subsequent instructions and thus can affect their
timing and thereby cause a leakage.
+
+config MITIGATION_VMSCAPE
+ bool "Mitigate VMSCAPE"
+ depends on KVM
+ default y
+ help
+ Enable mitigation for VMSCAPE attacks. VMSCAPE is a hardware security
+ vulnerability on Intel and AMD CPUs that may allow a guest to do
+ Spectre v2 style attacks on userspace hypervisor.
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 06fc0479a23f..751ca35386b0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -495,6 +495,7 @@
#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
/*
* BUG word(s)
@@ -551,4 +552,5 @@
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index d535a97c7284..ce3eb6d5fdf9 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -93,6 +93,13 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
* 8 (ia32) bits.
*/
choose_random_kstack_offset(rdtsc());
+
+ /* Avoid unnecessary reads of 'x86_ibpb_exit_to_user' */
+ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER) &&
+ this_cpu_read(x86_ibpb_exit_to_user)) {
+ indirect_branch_prediction_barrier();
+ this_cpu_write(x86_ibpb_exit_to_user, false);
+ }
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 10f261678749..e29f82466f43 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -530,6 +530,8 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
+DECLARE_PER_CPU(bool, x86_ibpb_exit_to_user);
+
static inline void indirect_branch_prediction_barrier(void)
{
asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index af838b8d845c..36dcfc5105be 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -96,6 +96,9 @@ static void __init its_update_mitigation(void);
static void __init its_apply_mitigation(void);
static void __init tsa_select_mitigation(void);
static void __init tsa_apply_mitigation(void);
+static void __init vmscape_select_mitigation(void);
+static void __init vmscape_update_mitigation(void);
+static void __init vmscape_apply_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -105,6 +108,14 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
+/*
+ * Set when the CPU has run a potentially malicious guest. An IBPB will
+ * be needed to before running userspace. That IBPB will flush the branch
+ * predictor content.
+ */
+DEFINE_PER_CPU(bool, x86_ibpb_exit_to_user);
+EXPORT_PER_CPU_SYMBOL_GPL(x86_ibpb_exit_to_user);
+
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
static u64 __ro_after_init x86_arch_cap_msr;
@@ -262,6 +273,7 @@ void __init cpu_select_mitigations(void)
its_select_mitigation();
bhi_select_mitigation();
tsa_select_mitigation();
+ vmscape_select_mitigation();
/*
* After mitigations are selected, some may need to update their
@@ -293,6 +305,7 @@ void __init cpu_select_mitigations(void)
bhi_update_mitigation();
/* srso_update_mitigation() depends on retbleed_update_mitigation(). */
srso_update_mitigation();
+ vmscape_update_mitigation();
spectre_v1_apply_mitigation();
spectre_v2_apply_mitigation();
@@ -310,6 +323,7 @@ void __init cpu_select_mitigations(void)
its_apply_mitigation();
bhi_apply_mitigation();
tsa_apply_mitigation();
+ vmscape_apply_mitigation();
}
/*
@@ -2538,88 +2552,6 @@ static void update_mds_branch_idle(void)
}
}
-#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
-#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
-#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
-
-void cpu_bugs_smt_update(void)
-{
- mutex_lock(&spec_ctrl_mutex);
-
- if (sched_smt_active() && unprivileged_ebpf_enabled() &&
- spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
- pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
-
- switch (spectre_v2_user_stibp) {
- case SPECTRE_V2_USER_NONE:
- break;
- case SPECTRE_V2_USER_STRICT:
- case SPECTRE_V2_USER_STRICT_PREFERRED:
- update_stibp_strict();
- break;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
- update_indir_branch_cond();
- break;
- }
-
- switch (mds_mitigation) {
- case MDS_MITIGATION_FULL:
- case MDS_MITIGATION_AUTO:
- case MDS_MITIGATION_VMWERV:
- if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
- pr_warn_once(MDS_MSG_SMT);
- update_mds_branch_idle();
- break;
- case MDS_MITIGATION_OFF:
- break;
- }
-
- switch (taa_mitigation) {
- case TAA_MITIGATION_VERW:
- case TAA_MITIGATION_AUTO:
- case TAA_MITIGATION_UCODE_NEEDED:
- if (sched_smt_active())
- pr_warn_once(TAA_MSG_SMT);
- break;
- case TAA_MITIGATION_TSX_DISABLED:
- case TAA_MITIGATION_OFF:
- break;
- }
-
- switch (mmio_mitigation) {
- case MMIO_MITIGATION_VERW:
- case MMIO_MITIGATION_AUTO:
- case MMIO_MITIGATION_UCODE_NEEDED:
- if (sched_smt_active())
- pr_warn_once(MMIO_MSG_SMT);
- break;
- case MMIO_MITIGATION_OFF:
- break;
- }
-
- switch (tsa_mitigation) {
- case TSA_MITIGATION_USER_KERNEL:
- case TSA_MITIGATION_VM:
- case TSA_MITIGATION_AUTO:
- case TSA_MITIGATION_FULL:
- /*
- * TSA-SQ can potentially lead to info leakage between
- * SMT threads.
- */
- if (sched_smt_active())
- static_branch_enable(&cpu_buf_idle_clear);
- else
- static_branch_disable(&cpu_buf_idle_clear);
- break;
- case TSA_MITIGATION_NONE:
- case TSA_MITIGATION_UCODE_NEEDED:
- break;
- }
-
- mutex_unlock(&spec_ctrl_mutex);
-}
-
#undef pr_fmt
#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
@@ -3331,8 +3263,184 @@ static void __init srso_apply_mitigation(void)
}
#undef pr_fmt
+#define pr_fmt(fmt) "VMSCAPE: " fmt
+
+enum vmscape_mitigations {
+ VMSCAPE_MITIGATION_NONE,
+ VMSCAPE_MITIGATION_AUTO,
+ VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER,
+ VMSCAPE_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+static const char * const vmscape_strings[] = {
+ [VMSCAPE_MITIGATION_NONE] = "Vulnerable",
+ /* [VMSCAPE_MITIGATION_AUTO] */
+ [VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER] = "Mitigation: IBPB before exit to userspace",
+ [VMSCAPE_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT",
+};
+
+static enum vmscape_mitigations vmscape_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_VMSCAPE) ? VMSCAPE_MITIGATION_AUTO : VMSCAPE_MITIGATION_NONE;
+
+static int __init vmscape_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ } else if (!strcmp(str, "ibpb")) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+ } else if (!strcmp(str, "force")) {
+ setup_force_cpu_bug(X86_BUG_VMSCAPE);
+ vmscape_mitigation = VMSCAPE_MITIGATION_AUTO;
+ } else {
+ pr_err("Ignoring unknown vmscape=%s option.\n", str);
+ }
+
+ return 0;
+}
+early_param("vmscape", vmscape_parse_cmdline);
+
+static void __init vmscape_select_mitigation(void)
+{
+ if (cpu_mitigations_off() ||
+ !boot_cpu_has_bug(X86_BUG_VMSCAPE) ||
+ !boot_cpu_has(X86_FEATURE_IBPB)) {
+ vmscape_mitigation = VMSCAPE_MITIGATION_NONE;
+ return;
+ }
+
+ if (vmscape_mitigation == VMSCAPE_MITIGATION_AUTO)
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER;
+}
+
+static void __init vmscape_update_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_VMSCAPE))
+ return;
+
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB ||
+ srso_mitigation == SRSO_MITIGATION_IBPB_ON_VMEXIT)
+ vmscape_mitigation = VMSCAPE_MITIGATION_IBPB_ON_VMEXIT;
+
+ pr_info("%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
+static void __init vmscape_apply_mitigation(void)
+{
+ if (vmscape_mitigation == VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER)
+ setup_force_cpu_cap(X86_FEATURE_IBPB_EXIT_TO_USER);
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) fmt
+#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
+#define VMSCAPE_MSG_SMT "VMSCAPE: SMT on, STIBP is required for full protection. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/vmscape.html for more details.\n"
+
+void cpu_bugs_smt_update(void)
+{
+ mutex_lock(&spec_ctrl_mutex);
+
+ if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+ spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+ pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
+ switch (spectre_v2_user_stibp) {
+ case SPECTRE_V2_USER_NONE:
+ break;
+ case SPECTRE_V2_USER_STRICT:
+ case SPECTRE_V2_USER_STRICT_PREFERRED:
+ update_stibp_strict();
+ break;
+ case SPECTRE_V2_USER_PRCTL:
+ case SPECTRE_V2_USER_SECCOMP:
+ update_indir_branch_cond();
+ break;
+ }
+
+ switch (mds_mitigation) {
+ case MDS_MITIGATION_FULL:
+ case MDS_MITIGATION_AUTO:
+ case MDS_MITIGATION_VMWERV:
+ if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
+ pr_warn_once(MDS_MSG_SMT);
+ update_mds_branch_idle();
+ break;
+ case MDS_MITIGATION_OFF:
+ break;
+ }
+
+ switch (taa_mitigation) {
+ case TAA_MITIGATION_VERW:
+ case TAA_MITIGATION_AUTO:
+ case TAA_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(TAA_MSG_SMT);
+ break;
+ case TAA_MITIGATION_TSX_DISABLED:
+ case TAA_MITIGATION_OFF:
+ break;
+ }
+
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_AUTO:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_AUTO:
+ case TSA_MITIGATION_FULL:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ break;
+ }
+
+ switch (vmscape_mitigation) {
+ case VMSCAPE_MITIGATION_NONE:
+ case VMSCAPE_MITIGATION_AUTO:
+ break;
+ case VMSCAPE_MITIGATION_IBPB_ON_VMEXIT:
+ case VMSCAPE_MITIGATION_IBPB_EXIT_TO_USER:
+ /*
+ * Hypervisors can be attacked across-threads, warn for SMT when
+ * STIBP is not already enabled system-wide.
+ *
+ * Intel eIBRS (!AUTOIBRS) implies STIBP on.
+ */
+ if (!sched_smt_active() ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
+ break;
+ pr_warn_once(VMSCAPE_MSG_SMT);
+ break;
+ }
+
+ mutex_unlock(&spec_ctrl_mutex);
+}
+
#ifdef CONFIG_SYSFS
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
@@ -3578,6 +3686,11 @@ static ssize_t tsa_show_state(char *buf)
return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
}
+static ssize_t vmscape_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", vmscape_strings[vmscape_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -3644,6 +3757,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_TSA:
return tsa_show_state(buf);
+ case X86_BUG_VMSCAPE:
+ return vmscape_show_state(buf);
+
default:
break;
}
@@ -3735,6 +3851,11 @@ ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
}
+
+ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_VMSCAPE);
+}
#endif
void __warn_thunk(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 34a054181c4d..f98ec9c7fc07 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1236,55 +1236,71 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define ITS_NATIVE_ONLY BIT(9)
/* CPU is affected by Transient Scheduler Attacks */
#define TSA BIT(10)
+/* CPU is affected by VMSCAPE */
+#define VMSCAPE BIT(11)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
- VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO),
- VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED),
+ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SANDYBRIDGE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_L, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_HASWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_D, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS),
- VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED),
VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
- VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS),
- VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS),
- VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_P, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_RAPTORLAKE_S, X86_STEP_MAX, RFDS | VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_METEORLAKE_L, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_H, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ARROWLAKE_U, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_LUNARLAKE_M, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_SAPPHIRERAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_GRANITERAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_EMERALDRAPIDS_X, X86_STEP_MAX, VMSCAPE),
+ VULNBL_INTEL_STEPS(INTEL_ATOM_GRACEMONT, X86_STEP_MAX, RFDS | VMSCAPE),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MAX, MMIO | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_TREMONT_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_D, X86_STEP_MAX, RFDS),
VULNBL_INTEL_STEPS(INTEL_ATOM_GOLDMONT_PLUS, X86_STEP_MAX, RFDS),
+ VULNBL_INTEL_STEPS(INTEL_ATOM_CRESTMONT_X, X86_STEP_MAX, VMSCAPE),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
- VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO | TSA),
- VULNBL_AMD(0x1a, SRSO),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
+ VULNBL_AMD(0x1a, SRSO | VMSCAPE),
{}
};
@@ -1543,6 +1559,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
}
+ /*
+ * Set the bug only on bare-metal. A nested hypervisor should already be
+ * deploying IBPB to isolate itself from nested guests.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, VMSCAPE) &&
+ !boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ setup_force_cpu_bug(X86_BUG_VMSCAPE);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 604490b1cb19..706b6fd56d3c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11011,6 +11011,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
wrmsrq(MSR_IA32_XFD_ERR, 0);
/*
+ * Mark this CPU as needing a branch predictor flush before running
+ * userspace. Must be done before enabling preemption to ensure it gets
+ * set for the CPU that actually ran the guest, and not the CPU that it
+ * may migrate to.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_IBPB_EXIT_TO_USER))
+ this_cpu_write(x86_ibpb_exit_to_user, true);
+
+ /*
* Consume any pending interrupts, including the possible source of
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
* An instruction is required after local_irq_enable() to fully unblock
diff --git a/block/fops.c b/block/fops.c
index 82451ac8ff25..ddbc69c0922b 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/uio.h>
@@ -54,7 +55,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct bio bio;
ssize_t ret;
- WARN_ON_ONCE(iocb->ki_flags & IOCB_HAS_METADATA);
if (nr_pages <= DIO_INLINE_BIO_VECS)
vecs = inline_vecs;
else {
@@ -131,7 +131,7 @@ static void blkdev_bio_end_io(struct bio *bio)
if (bio->bi_status && !dio->bio.bi_status)
dio->bio.bi_status = bio->bi_status;
- if (!is_sync && (dio->iocb->ki_flags & IOCB_HAS_METADATA))
+ if (bio_integrity(bio))
bio_integrity_unmap_user(bio);
if (atomic_dec_and_test(&dio->ref)) {
@@ -233,7 +233,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
}
bio->bi_opf |= REQ_NOWAIT;
}
- if (!is_sync && (iocb->ki_flags & IOCB_HAS_METADATA)) {
+ if (iocb->ki_flags & IOCB_HAS_METADATA) {
ret = bio_integrity_map_iter(bio, iocb->private);
if (unlikely(ret))
goto fail;
@@ -301,7 +301,7 @@ static void blkdev_bio_end_io_async(struct bio *bio)
ret = blk_status_to_errno(bio->bi_status);
}
- if (iocb->ki_flags & IOCB_HAS_METADATA)
+ if (bio_integrity(bio))
bio_integrity_unmap_user(bio);
iocb->ki_complete(iocb, ret);
@@ -422,7 +422,8 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- if (likely(nr_pages <= BIO_MAX_VECS)) {
+ if (likely(nr_pages <= BIO_MAX_VECS &&
+ !(iocb->ki_flags & IOCB_HAS_METADATA))) {
if (is_sync_kiocb(iocb))
return __blkdev_direct_IO_simple(iocb, iter, bdev,
nr_pages);
@@ -687,6 +688,8 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (bdev_can_atomic_write(bdev))
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
+ if (blk_get_integrity(bdev->bd_disk))
+ filp->f_mode |= FMODE_HAS_METADATA;
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
if (ret)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index efc575a00edd..008da0354fba 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -603,6 +603,7 @@ CPU_SHOW_VULN_FALLBACK(ghostwrite);
CPU_SHOW_VULN_FALLBACK(old_microcode);
CPU_SHOW_VULN_FALLBACK(indirect_target_selection);
CPU_SHOW_VULN_FALLBACK(tsa);
+CPU_SHOW_VULN_FALLBACK(vmscape);
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -622,6 +623,7 @@ static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL);
static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL);
static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL);
static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL);
+static DEVICE_ATTR(vmscape, 0444, cpu_show_vmscape, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -642,6 +644,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_old_microcode.attr,
&dev_attr_indirect_target_selection.attr,
&dev_attr_tsa.attr,
+ &dev_attr_vmscape.attr,
NULL
};
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index bbc27ef9edf7..b4c79fde1979 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1554,13 +1554,15 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
pr_debug("CPU %d exiting\n", policy->cpu);
}
-static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
+static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy, bool policy_change)
{
struct amd_cpudata *cpudata = policy->driver_data;
union perf_cached perf;
u8 epp;
- if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq)
+ if (policy_change ||
+ policy->min != cpudata->min_limit_freq ||
+ policy->max != cpudata->max_limit_freq)
amd_pstate_update_min_max_limit(policy);
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
@@ -1584,7 +1586,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
cpudata->policy = policy->policy;
- ret = amd_pstate_epp_update_limit(policy);
+ ret = amd_pstate_epp_update_limit(policy, true);
if (ret)
return ret;
@@ -1626,13 +1628,14 @@ static int amd_pstate_suspend(struct cpufreq_policy *policy)
* min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
* the limits, epp and desired perf will get reset to the cached values in cpudata struct
*/
- ret = amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+ ret = amd_pstate_update_perf(policy, perf.bios_min_perf,
+ FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached),
+ false);
if (ret)
return ret;
- /* invalidate to ensure it's rewritten during resume */
- cpudata->cppc_req_cached = 0;
-
/* set this flag to avoid setting core offline*/
cpudata->suspended = true;
@@ -1658,7 +1661,7 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
int ret;
/* enable amd pstate from suspend state*/
- ret = amd_pstate_epp_update_limit(policy);
+ ret = amd_pstate_epp_update_limit(policy, false);
if (ret)
return ret;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f366d35c5840..0d5d283a5429 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1034,8 +1034,8 @@ static bool hybrid_register_perf_domain(unsigned int cpu)
if (!cpu_dev)
return false;
- if (em_dev_register_perf_domain(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
- cpumask_of(cpu), false))
+ if (em_dev_register_pd_no_update(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
+ cpumask_of(cpu), false))
return false;
cpudata->pd_registered = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 260165bbe373..b16cce7c22c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit) {
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+ if (!no_system_mem_limit) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
- if ((kfd_mem_limit.system_mem_used + system_mem_needed >
- kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
- kfd_mem_limit.max_ttm_mem_limit) ||
- (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
- vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
+ if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+ kfd_mem_limit.max_ttm_mem_limit) {
ret = -ENOMEM;
goto release;
}
+ /*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+ * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+ * VRAM check since ttm_mem_limit check already cover this allocation
+ */
+
+ if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+ uint64_t vram_available =
+ vram_size - reserved_for_pt - reserved_for_ras -
+ atomic64_read(&adev->vram_pin_size);
+ if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+ ret = -ENOMEM;
+ goto release;
+ }
+ }
+
/* Update memory accounting by decreasing available system
* memory, TTM memory and GPU memory as computed above
*/
@@ -1626,11 +1642,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
uint64_t vram_available, system_mem_available, ttm_mem_available;
spin_lock(&kfd_mem_limit.mem_limit_lock);
- vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- - adev->kfd.vram_used_aligned[xcp_id]
- - atomic64_read(&adev->vram_pin_size)
- - reserved_for_pt
- - reserved_for_ras;
+ if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id];
+ else
+ vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+ - adev->kfd.vram_used_aligned[xcp_id]
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt
+ - reserved_for_ras;
if (adev->apu_prefer_gtt) {
system_mem_available = no_system_mem_limit ?
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6379bb25bf5c..486c3646710c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -421,8 +421,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
-
- ring->adev->rings[ring->idx] = NULL;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
index a887df520414..4258d3e0b706 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -29,6 +29,8 @@
#include "amdgpu.h"
#include "isp_v4_1_1.h"
+MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin");
+
#define ISP_PERFORMANCE_STATE_LOW 0
#define ISP_PERFORMANCE_STATE_HIGH 1
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 6cc05d36e359..64b240b51f1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -149,12 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)
int ret;
int retry_loop;
- for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ for (retry_loop = 0; retry_loop < 20; retry_loop++) {
/* Wait for bootloader to signify that is
ready having bit 31 of C2PMSG_35 set to 1 */
ret = psp_wait_for(
psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
- 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE);
+ 0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE);
if (ret == 0)
return 0;
@@ -397,18 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
msleep(500);
- offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
-
- ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK,
- 0);
-
- if (ret) {
- DRM_INFO("psp mode 1 reset failed!\n");
- return -EINVAL;
- }
-
- DRM_INFO("psp mode1 reset succeed \n");
-
return 0;
}
@@ -665,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = {
.ring_get_wptr = psp_v11_0_ring_get_wptr,
.ring_set_wptr = psp_v11_0_ring_set_wptr,
.load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw,
- .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw
+ .read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw,
+ .wait_for_bootloader = psp_v11_0_wait_for_bootloader
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 4b8f4407047f..2811226b0ea5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1888,15 +1888,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
+ struct dma_fence *fence;
/* if VCN0 is harvested, we can't support AV1 */
if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 1924e075b66f..706f3b2f484f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1808,15 +1808,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
-
- /* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
- return -EINVAL;
+ struct dma_fence *fence;
/* if VCN0 is harvested, we can't support AV1 */
if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
+ /* wait for all jobs to finish before switching to instance 0 */
+ fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
+ if (fence) {
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
+
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
@@ -1907,22 +1911,16 @@ out:
#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
-
#define RADEON_VCN_ENGINE_INFO (0x30000001)
-#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
-
#define RENCODE_ENCODE_STANDARD_AV1 2
#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
-#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
-/* return the offset in ib if id is found, -1 otherwise
- * to speed up the searching we only search upto max_offset
- */
-static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+/* return the offset in ib if id is found, -1 otherwise */
+static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start)
{
int i;
- for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
+ for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) {
if (ib->ptr[i + 1] == id)
return i;
}
@@ -1937,33 +1935,29 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_vcn_decode_buffer *decode_buffer;
uint64_t addr;
uint32_t val;
- int idx;
+ int idx = 0, sidx;
/* The first instance can decode anything */
if (!ring->me)
return 0;
- /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
- idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
- RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
- if (idx < 0) /* engine info is missing */
- return 0;
-
- val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
- if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
-
- if (!(decode_buffer->valid_buf_flag & 0x1))
- return 0;
-
- addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo;
- return vcn_v4_0_dec_msg(p, job, addr);
- } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
- idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
- RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
- if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
- return vcn_v4_0_limit_sched(p, job);
+ while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) {
+ val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
+
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
+ } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
+ sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx);
+ if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1)
+ return vcn_v4_0_limit_sched(p, job);
+ }
+ idx += ib->ptr[idx] / 4;
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 4ec73f33535e..720b20e842ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1587,7 +1587,8 @@ static int kfd_dev_create_p2p_links(void)
break;
if (!dev->gpu || !dev->gpu->adev ||
(dev->gpu->kfd->hive_id &&
- dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id))
+ dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id &&
+ amdgpu_xgmi_get_is_sharing_enabled(dev->gpu->adev, new_dev->gpu->adev)))
goto next;
/* check if node(s) is/are peer accessible in one direction or bi-direction */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7808a647a306..4e86370ae705 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2913,6 +2913,17 @@ static int dm_oem_i2c_hw_init(struct amdgpu_device *adev)
return 0;
}
+static void dm_oem_i2c_hw_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_display_manager *dm = &adev->dm;
+
+ if (dm->oem_i2c) {
+ i2c_del_adapter(&dm->oem_i2c->base);
+ kfree(dm->oem_i2c);
+ dm->oem_i2c = NULL;
+ }
+}
+
/**
* dm_hw_init() - Initialize DC device
* @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
@@ -2963,7 +2974,7 @@ static int dm_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- kfree(adev->dm.oem_i2c);
+ dm_oem_i2c_hw_fini(adev);
amdgpu_dm_hpd_fini(adev);
@@ -3127,25 +3138,6 @@ static void dm_destroy_cached_state(struct amdgpu_device *adev)
dm->cached_state = NULL;
}
-static void dm_complete(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
-
- dm_destroy_cached_state(adev);
-}
-
-static int dm_prepare_suspend(struct amdgpu_ip_block *ip_block)
-{
- struct amdgpu_device *adev = ip_block->adev;
-
- if (amdgpu_in_reset(adev))
- return 0;
-
- WARN_ON(adev->dm.cached_state);
-
- return dm_cache_state(adev);
-}
-
static int dm_suspend(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@@ -3571,10 +3563,8 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.early_fini = amdgpu_dm_early_fini,
.hw_init = dm_hw_init,
.hw_fini = dm_hw_fini,
- .prepare_suspend = dm_prepare_suspend,
.suspend = dm_suspend,
.resume = dm_resume,
- .complete = dm_complete,
.is_idle = dm_is_idle,
.wait_for_idle = dm_wait_for_idle,
.check_soft_reset = dm_check_soft_reset,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 7187d5aedf0a..77a9d2c7d318 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -809,6 +809,7 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
drm_dp_aux_init(&aconnector->dm_dp_aux.aux);
drm_dp_cec_register_connector(&aconnector->dm_dp_aux.aux,
&aconnector->base);
+ drm_dp_dpcd_set_probe(&aconnector->dm_dp_aux.aux, false);
if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
return;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 59c07756130d..f24e1da68269 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -1145,6 +1145,7 @@ struct dc_debug_options {
bool enable_hblank_borrow;
bool force_subvp_df_throttle;
uint32_t acpi_transition_bitmasks[MAX_PIPES];
+ bool enable_pg_cntl_debug_logs;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index 58c84f555c0f..0ce9489ac6b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -133,30 +133,34 @@ enum dsc_clk_source {
};
-static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool enable)
+static void dccg35_set_dsc_clk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
- if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && enable)
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dsc && allow_rcg)
return;
switch (inst) {
case 0:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 1:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 2:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 3:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
default:
BREAK_TO_DEBUGGER();
return;
}
+
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
}
static void dccg35_set_symclk32_se_rcg(
@@ -385,35 +389,34 @@ static void dccg35_set_dtbclk_p_rcg(struct dccg *dccg, int inst, bool enable)
}
}
-static void dccg35_set_dppclk_rcg(struct dccg *dccg,
- int inst, bool enable)
+static void dccg35_set_dppclk_rcg(struct dccg *dccg, int inst, bool allow_rcg)
{
-
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
-
- if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && enable)
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && allow_rcg)
return;
switch (inst) {
case 0:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 1:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 2:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
case 3:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable ? 0 : 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, allow_rcg ? 0 : 1);
break;
default:
BREAK_TO_DEBUGGER();
break;
}
- //DC_LOG_DEBUG("%s: inst(%d) DPPCLK rcg_disable: %d\n", __func__, inst, enable ? 0 : 1);
+ /* Wait for clock to ramp */
+ if (!allow_rcg)
+ udelay(10);
}
static void dccg35_set_dpstreamclk_rcg(
@@ -1177,32 +1180,34 @@ static void dccg35_update_dpp_dto(struct dccg *dccg, int dpp_inst,
}
static void dccg35_set_dppclk_root_clock_gating(struct dccg *dccg,
- uint32_t dpp_inst, uint32_t enable)
+ uint32_t dpp_inst, uint32_t disallow_rcg)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
- if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
+ if (!dccg->ctx->dc->debug.root_clock_optimization.bits.dpp && !disallow_rcg)
return;
switch (dpp_inst) {
case 0:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, enable);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK0_ROOT_GATE_DISABLE, disallow_rcg);
break;
case 1:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, enable);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK1_ROOT_GATE_DISABLE, disallow_rcg);
break;
case 2:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, enable);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK2_ROOT_GATE_DISABLE, disallow_rcg);
break;
case 3:
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, enable);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DPPCLK3_ROOT_GATE_DISABLE, disallow_rcg);
break;
default:
break;
}
- //DC_LOG_DEBUG("%s: dpp_inst(%d) rcg: %d\n", __func__, dpp_inst, enable);
+ /* Wait for clock to ramp */
+ if (disallow_rcg)
+ udelay(10);
}
static void dccg35_get_pixel_rate_div(
@@ -1782,8 +1787,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
//Disable DTO
switch (inst) {
case 0:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK0_ROOT_GATE_DISABLE, 1);
REG_UPDATE_2(DSCCLK0_DTO_PARAM,
DSCCLK0_DTO_PHASE, 0,
@@ -1791,8 +1795,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK0_EN, 1);
break;
case 1:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK1_ROOT_GATE_DISABLE, 1);
REG_UPDATE_2(DSCCLK1_DTO_PARAM,
DSCCLK1_DTO_PHASE, 0,
@@ -1800,8 +1803,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK1_EN, 1);
break;
case 2:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK2_ROOT_GATE_DISABLE, 1);
REG_UPDATE_2(DSCCLK2_DTO_PARAM,
DSCCLK2_DTO_PHASE, 0,
@@ -1809,8 +1811,7 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
REG_UPDATE(DSCCLK_DTO_CTRL, DSCCLK2_EN, 1);
break;
case 3:
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dsc)
- REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
+ REG_UPDATE(DCCG_GATE_DISABLE_CNTL6, DSCCLK3_ROOT_GATE_DISABLE, 1);
REG_UPDATE_2(DSCCLK3_DTO_PARAM,
DSCCLK3_DTO_PHASE, 0,
@@ -1821,6 +1822,9 @@ static void dccg35_enable_dscclk(struct dccg *dccg, int inst)
BREAK_TO_DEBUGGER();
return;
}
+
+ /* Wait for clock to ramp */
+ udelay(10);
}
static void dccg35_disable_dscclk(struct dccg *dccg,
@@ -1864,6 +1868,9 @@ static void dccg35_disable_dscclk(struct dccg *dccg,
default:
return;
}
+
+ /* Wait for clock ramp */
+ udelay(10);
}
static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
@@ -2349,10 +2356,7 @@ static void dccg35_disable_symclk_se_cb(
void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating)
{
-
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) {
- dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
- }
+ dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
}
static const struct dccg_funcs dccg35_funcs_new = {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index 3207addbd4eb..b7c2d3095b25 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -955,7 +955,7 @@ enum dc_status dcn20_enable_stream_timing(
return DC_ERROR_UNEXPECTED;
}
- fsleep(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz));
+ udelay(stream->timing.v_total * (stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz));
params.vertical_total_min = stream->adjust.v_total_min;
params.vertical_total_max = stream->adjust.v_total_max;
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index a267f574b619..764eff6a4ec6 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -113,6 +113,14 @@ static void enable_memory_low_power(struct dc *dc)
}
#endif
+static void print_pg_status(struct dc *dc, const char *debug_func, const char *debug_log)
+{
+ if (dc->debug.enable_pg_cntl_debug_logs && dc->res_pool->pg_cntl) {
+ if (dc->res_pool->pg_cntl->funcs->print_pg_status)
+ dc->res_pool->pg_cntl->funcs->print_pg_status(dc->res_pool->pg_cntl, debug_func, debug_log);
+ }
+}
+
void dcn35_set_dmu_fgcg(struct dce_hwseq *hws, bool enable)
{
REG_UPDATE_3(DMU_CLK_CNTL,
@@ -137,6 +145,8 @@ void dcn35_init_hw(struct dc *dc)
uint32_t user_level = MAX_BACKLIGHT_LEVEL;
int i;
+ print_pg_status(dc, __func__, ": start");
+
if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks)
dc->clk_mgr->funcs->init_clocks(dc->clk_mgr);
@@ -200,10 +210,7 @@ void dcn35_init_hw(struct dc *dc)
/* we want to turn off all dp displays before doing detection */
dc->link_srv->blank_all_dp_displays(dc);
-/*
- if (hws->funcs.enable_power_gating_plane)
- hws->funcs.enable_power_gating_plane(dc->hwseq, true);
-*/
+
if (res_pool->hubbub && res_pool->hubbub->funcs->dchubbub_init)
res_pool->hubbub->funcs->dchubbub_init(dc->res_pool->hubbub);
/* If taking control over from VBIOS, we may want to optimize our first
@@ -236,6 +243,8 @@ void dcn35_init_hw(struct dc *dc)
}
hws->funcs.init_pipes(dc, dc->current_state);
+ print_pg_status(dc, __func__, ": after init_pipes");
+
if (dc->res_pool->hubbub->funcs->allow_self_refresh_control &&
!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter)
dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
@@ -312,6 +321,7 @@ void dcn35_init_hw(struct dc *dc)
if (dc->res_pool->pg_cntl->funcs->init_pg_status)
dc->res_pool->pg_cntl->funcs->init_pg_status(dc->res_pool->pg_cntl);
}
+ print_pg_status(dc, __func__, ": after init_pg_status");
}
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
@@ -500,97 +510,6 @@ void dcn35_physymclk_root_clock_control(struct dce_hwseq *hws, unsigned int phy_
}
}
-void dcn35_dsc_pg_control(
- struct dce_hwseq *hws,
- unsigned int dsc_inst,
- bool power_on)
-{
- uint32_t power_gate = power_on ? 0 : 1;
- uint32_t pwr_status = power_on ? 0 : 2;
- uint32_t org_ip_request_cntl = 0;
-
- if (hws->ctx->dc->debug.disable_dsc_power_gate)
- return;
- if (hws->ctx->dc->debug.ignore_pg)
- return;
- REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
-
- switch (dsc_inst) {
- case 0: /* DSC0 */
- REG_UPDATE(DOMAIN16_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN16_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 1: /* DSC1 */
- REG_UPDATE(DOMAIN17_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN17_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 2: /* DSC2 */
- REG_UPDATE(DOMAIN18_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN18_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- case 3: /* DSC3 */
- REG_UPDATE(DOMAIN19_PG_CONFIG,
- DOMAIN_POWER_GATE, power_gate);
-
- REG_WAIT(DOMAIN19_PG_STATUS,
- DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
- break;
- default:
- BREAK_TO_DEBUGGER();
- break;
- }
-
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0);
-}
-
-void dcn35_enable_power_gating_plane(struct dce_hwseq *hws, bool enable)
-{
- bool force_on = true; /* disable power gating */
- uint32_t org_ip_request_cntl = 0;
-
- if (hws->ctx->dc->debug.disable_hubp_power_gate)
- return;
- if (hws->ctx->dc->debug.ignore_pg)
- return;
- REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl);
- if (org_ip_request_cntl == 0)
- REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1);
- /* DCHUBP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- /* DPP0/1/2/3/4/5 */
- REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-
- force_on = true; /* disable power gating */
- if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate)
- force_on = false;
-
- /* DCS0/1/2/3/4 */
- REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
- REG_UPDATE(DOMAIN19_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on);
-
-
-}
-
/* In headless boot cases, DIG may be turned
* on which causes HW/SW discrepancies.
* To avoid this, power down hardware on boot
@@ -1453,6 +1372,8 @@ void dcn35_prepare_bandwidth(
}
dcn20_prepare_bandwidth(dc, context);
+
+ print_pg_status(dc, __func__, ": after rcg and power up");
}
void dcn35_optimize_bandwidth(
@@ -1461,6 +1382,8 @@ void dcn35_optimize_bandwidth(
{
struct pg_block_update pg_update_state;
+ print_pg_status(dc, __func__, ": before rcg and power up");
+
dcn20_optimize_bandwidth(dc, context);
if (dc->hwss.calc_blocks_to_gate) {
@@ -1472,6 +1395,8 @@ void dcn35_optimize_bandwidth(
if (dc->hwss.root_clock_control)
dc->hwss.root_clock_control(dc, &pg_update_state, false);
}
+
+ print_pg_status(dc, __func__, ": after rcg and power up");
}
void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index a3ccf805bd16..aefb7c473741 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -115,7 +115,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
.apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
- .update_dsc_pg = dcn32_update_dsc_pg,
.calc_blocks_to_gate = dcn35_calc_blocks_to_gate,
.calc_blocks_to_ungate = dcn35_calc_blocks_to_ungate,
.hw_block_power_up = dcn35_hw_block_power_up,
@@ -150,7 +149,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = {
.plane_atomic_disable = dcn35_plane_atomic_disable,
//.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
//.hubp_pg_control = dcn35_hubp_pg_control,
- .enable_power_gating_plane = dcn35_enable_power_gating_plane,
.dpp_root_clock_control = dcn35_dpp_root_clock_control,
.dpstream_root_clock_control = dcn35_dpstream_root_clock_control,
.physymclk_root_clock_control = dcn35_physymclk_root_clock_control,
@@ -165,7 +163,6 @@ static const struct hwseq_private_funcs dcn35_private_funcs = {
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
.resync_fifo_dccg_dio = dcn314_resync_fifo_dccg_dio,
.is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
- .dsc_pg_control = dcn35_dsc_pg_control,
.dsc_pg_status = dcn32_dsc_pg_status,
.enable_plane = dcn35_enable_plane,
.wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
index 58f2be2a326b..a580a55695c3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -114,7 +114,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = {
.exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,
.update_visual_confirm_color = dcn10_update_visual_confirm_color,
.apply_idle_power_optimizations = dcn35_apply_idle_power_optimizations,
- .update_dsc_pg = dcn32_update_dsc_pg,
.calc_blocks_to_gate = dcn351_calc_blocks_to_gate,
.calc_blocks_to_ungate = dcn351_calc_blocks_to_ungate,
.hw_block_power_up = dcn351_hw_block_power_up,
@@ -145,7 +144,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = {
.plane_atomic_disable = dcn35_plane_atomic_disable,
//.plane_atomic_disable = dcn20_plane_atomic_disable,/*todo*/
//.hubp_pg_control = dcn35_hubp_pg_control,
- .enable_power_gating_plane = dcn35_enable_power_gating_plane,
.dpp_root_clock_control = dcn35_dpp_root_clock_control,
.dpstream_root_clock_control = dcn35_dpstream_root_clock_control,
.physymclk_root_clock_control = dcn35_physymclk_root_clock_control,
@@ -159,7 +157,6 @@ static const struct hwseq_private_funcs dcn351_private_funcs = {
.setup_hpo_hw_control = dcn35_setup_hpo_hw_control,
.calculate_dccg_k1_k2_values = dcn32_calculate_dccg_k1_k2_values,
.is_dp_dig_pixel_rate_div_policy = dcn35_is_dp_dig_pixel_rate_div_policy,
- .dsc_pg_control = dcn35_dsc_pg_control,
.dsc_pg_status = dcn32_dsc_pg_status,
.enable_plane = dcn35_enable_plane,
.wait_for_pipe_update_if_needed = dcn10_wait_for_pipe_update_if_needed,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
index 44f86cc2d1d6..227e3f8d7e5f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h
@@ -49,6 +49,7 @@ struct pg_cntl_funcs {
void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on);
void (*init_pg_status)(struct pg_cntl *pg_cntl);
+ void (*print_pg_status)(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log);
};
#endif //__DC_PG_CNTL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c
index af21c0a27f86..72bd43f9bbe2 100644
--- a/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/pg/dcn35/dcn35_pg_cntl.c
@@ -79,16 +79,12 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
uint32_t power_gate = power_on ? 0 : 1;
uint32_t pwr_status = power_on ? 0 : 2;
uint32_t org_ip_request_cntl = 0;
- bool block_enabled;
-
- /*need to enable dscclk regardless DSC_PG*/
- if (pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc && power_on)
- pg_cntl->ctx->dc->res_pool->dccg->funcs->enable_dsc(
- pg_cntl->ctx->dc->res_pool->dccg, dsc_inst);
+ bool block_enabled = false;
+ bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_dsc_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed;
- if (pg_cntl->ctx->dc->debug.ignore_pg ||
- pg_cntl->ctx->dc->debug.disable_dsc_power_gate ||
- pg_cntl->ctx->dc->idle_optimizations_allowed)
+ if (skip_pg && !power_on)
return;
block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, dsc_inst);
@@ -111,7 +107,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
REG_WAIT(DOMAIN16_PG_STATUS,
DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
+ 1, 10000);
break;
case 1: /* DSC1 */
REG_UPDATE(DOMAIN17_PG_CONFIG,
@@ -119,7 +115,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
REG_WAIT(DOMAIN17_PG_STATUS,
DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
+ 1, 10000);
break;
case 2: /* DSC2 */
REG_UPDATE(DOMAIN18_PG_CONFIG,
@@ -127,7 +123,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
REG_WAIT(DOMAIN18_PG_STATUS,
DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
+ 1, 10000);
break;
case 3: /* DSC3 */
REG_UPDATE(DOMAIN19_PG_CONFIG,
@@ -135,7 +131,7 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
REG_WAIT(DOMAIN19_PG_STATUS,
DOMAIN_PGFSM_PWR_STATUS, pwr_status,
- 1, 1000);
+ 1, 10000);
break;
default:
BREAK_TO_DEBUGGER();
@@ -144,12 +140,6 @@ void pg_cntl35_dsc_pg_control(struct pg_cntl *pg_cntl, unsigned int dsc_inst, bo
if (dsc_inst < MAX_PIPES)
pg_cntl->pg_pipe_res_enable[PG_DSC][dsc_inst] = power_on;
-
- if (pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc && !power_on) {
- /*this is to disable dscclk*/
- pg_cntl->ctx->dc->res_pool->dccg->funcs->disable_dsc(
- pg_cntl->ctx->dc->res_pool->dccg, dsc_inst);
- }
}
static bool pg_cntl35_hubp_dpp_pg_status(struct pg_cntl *pg_cntl, unsigned int hubp_dpp_inst)
@@ -189,11 +179,12 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp
uint32_t pwr_status = power_on ? 0 : 2;
uint32_t org_ip_request_cntl;
bool block_enabled;
+ bool skip_pg = pg_cntl->ctx->dc->debug.ignore_pg ||
+ pg_cntl->ctx->dc->debug.disable_hubp_power_gate ||
+ pg_cntl->ctx->dc->debug.disable_dpp_power_gate ||
+ pg_cntl->ctx->dc->idle_optimizations_allowed;
- if (pg_cntl->ctx->dc->debug.ignore_pg ||
- pg_cntl->ctx->dc->debug.disable_hubp_power_gate ||
- pg_cntl->ctx->dc->debug.disable_dpp_power_gate ||
- pg_cntl->ctx->dc->idle_optimizations_allowed)
+ if (skip_pg && !power_on)
return;
block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, hubp_dpp_inst);
@@ -213,22 +204,22 @@ void pg_cntl35_hubp_dpp_pg_control(struct pg_cntl *pg_cntl, unsigned int hubp_dp
case 0:
/* DPP0 & HUBP0 */
REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+ REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
break;
case 1:
/* DPP1 & HUBP1 */
REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+ REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
break;
case 2:
/* DPP2 & HUBP2 */
REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+ REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
break;
case 3:
/* DPP3 & HUBP3 */
REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_GATE, power_gate);
- REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 1000);
+ REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, pwr_status, 1, 10000);
break;
default:
BREAK_TO_DEBUGGER();
@@ -501,6 +492,36 @@ void pg_cntl35_init_pg_status(struct pg_cntl *pg_cntl)
pg_cntl->pg_res_enable[PG_DWB] = block_enabled;
}
+static void pg_cntl35_print_pg_status(struct pg_cntl *pg_cntl, const char *debug_func, const char *debug_log)
+{
+ int i = 0;
+ bool block_enabled = false;
+
+ DC_LOG_DEBUG("%s: %s", debug_func, debug_log);
+
+ DC_LOG_DEBUG("PG_CNTL status:\n");
+
+ block_enabled = pg_cntl35_io_clk_status(pg_cntl);
+ DC_LOG_DEBUG("ONO0=%d (DCCG, DIO, DCIO)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_mem_status(pg_cntl);
+ DC_LOG_DEBUG("ONO1=%d (DCHUBBUB, DCHVM, DCHUBBUBMEM)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_plane_otg_status(pg_cntl);
+ DC_LOG_DEBUG("ONO2=%d (MPC, OPP, OPTC, DWB)\n", block_enabled ? 1 : 0);
+
+ block_enabled = pg_cntl35_hpo_pg_status(pg_cntl);
+ DC_LOG_DEBUG("ONO3=%d (HPO)\n", block_enabled ? 1 : 0);
+
+ for (i = 0; i < pg_cntl->ctx->dc->res_pool->pipe_count; i++) {
+ block_enabled = pg_cntl35_hubp_dpp_pg_status(pg_cntl, i);
+ DC_LOG_DEBUG("ONO%d=%d (DCHUBP%d, DPP%d)\n", 4 + i * 2, block_enabled ? 1 : 0, i, i);
+
+ block_enabled = pg_cntl35_dsc_pg_status(pg_cntl, i);
+ DC_LOG_DEBUG("ONO%d=%d (DSC%d)\n", 5 + i * 2, block_enabled ? 1 : 0, i);
+ }
+}
+
static const struct pg_cntl_funcs pg_cntl35_funcs = {
.init_pg_status = pg_cntl35_init_pg_status,
.dsc_pg_control = pg_cntl35_dsc_pg_control,
@@ -511,7 +532,8 @@ static const struct pg_cntl_funcs pg_cntl35_funcs = {
.mpcc_pg_control = pg_cntl35_mpcc_pg_control,
.opp_pg_control = pg_cntl35_opp_pg_control,
.optc_pg_control = pg_cntl35_optc_pg_control,
- .dwb_pg_control = pg_cntl35_dwb_pg_control
+ .dwb_pg_control = pg_cntl35_dwb_pg_control,
+ .print_pg_status = pg_cntl35_print_pg_status
};
struct pg_cntl *pg_cntl35_create(
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 273054c22325..c92f3e736228 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -1172,7 +1172,7 @@ static void icl_mbus_init(struct intel_display *display)
if (DISPLAY_VER(display) == 12)
abox_regs |= BIT(0);
- for_each_set_bit(i, &abox_regs, sizeof(abox_regs))
+ for_each_set_bit(i, &abox_regs, BITS_PER_TYPE(abox_regs))
intel_de_rmw(display, MBUS_ABOX_CTL(i), mask, val);
}
@@ -1629,11 +1629,11 @@ static void tgl_bw_buddy_init(struct intel_display *display)
if (table[config].page_mask == 0) {
drm_dbg_kms(display->drm,
"Unknown memory configuration; disabling address buddy logic.\n");
- for_each_set_bit(i, &abox_mask, sizeof(abox_mask))
+ for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask))
intel_de_write(display, BW_BUDDY_CTL(i),
BW_BUDDY_DISABLE);
} else {
- for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) {
+ for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) {
intel_de_write(display, BW_BUDDY_PAGE_MASK(i),
table[config].page_mask);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index f8a817689e16..eb5537f0ac90 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -387,19 +387,21 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev)
of_id = of_match_node(mtk_drm_of_ids, node);
if (!of_id)
- goto next_put_node;
+ continue;
pdev = of_find_device_by_node(node);
if (!pdev)
- goto next_put_node;
+ continue;
drm_dev = device_find_child(&pdev->dev, NULL, mtk_drm_match);
+ put_device(&pdev->dev);
if (!drm_dev)
- goto next_put_device_pdev_dev;
+ continue;
temp_drm_priv = dev_get_drvdata(drm_dev);
+ put_device(drm_dev);
if (!temp_drm_priv)
- goto next_put_device_drm_dev;
+ continue;
if (temp_drm_priv->data->main_len)
all_drm_priv[CRTC_MAIN] = temp_drm_priv;
@@ -411,17 +413,10 @@ static bool mtk_drm_get_all_drm_priv(struct device *dev)
if (temp_drm_priv->mtk_drm_bound)
cnt++;
-next_put_device_drm_dev:
- put_device(drm_dev);
-
-next_put_device_pdev_dev:
- put_device(&pdev->dev);
-
-next_put_node:
- of_node_put(node);
-
- if (cnt == MAX_CRTC)
+ if (cnt == MAX_CRTC) {
+ of_node_put(node);
break;
+ }
}
if (drm_priv->data->mmsys_dev_num == cnt) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 9f345a008717..869d4335c0f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -240,21 +240,6 @@ nouveau_fence_emit(struct nouveau_fence *fence)
return ret;
}
-void
-nouveau_fence_cancel(struct nouveau_fence *fence)
-{
- struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
- unsigned long flags;
-
- spin_lock_irqsave(&fctx->lock, flags);
- if (!dma_fence_is_signaled_locked(&fence->base)) {
- dma_fence_set_error(&fence->base, -ECANCELED);
- if (nouveau_fence_signal(fence))
- nvif_event_block(&fctx->event);
- }
- spin_unlock_irqrestore(&fctx->lock, flags);
-}
-
bool
nouveau_fence_done(struct nouveau_fence *fence)
{
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 9957a919bd38..183dd43ecfff 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -29,7 +29,6 @@ void nouveau_fence_unref(struct nouveau_fence **);
int nouveau_fence_emit(struct nouveau_fence *);
bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_cancel(struct nouveau_fence *fence);
int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 0cc0bc9f9952..e60f7892f5ce 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -11,7 +11,6 @@
#include "nouveau_exec.h"
#include "nouveau_abi16.h"
#include "nouveau_sched.h"
-#include "nouveau_chan.h"
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
@@ -122,9 +121,11 @@ nouveau_job_done(struct nouveau_job *job)
{
struct nouveau_sched *sched = job->sched;
- spin_lock(&sched->job_list.lock);
+ spin_lock(&sched->job.list.lock);
list_del(&job->entry);
- spin_unlock(&sched->job_list.lock);
+ spin_unlock(&sched->job.list.lock);
+
+ wake_up(&sched->job.wq);
}
void
@@ -305,9 +306,9 @@ nouveau_job_submit(struct nouveau_job *job)
}
/* Submit was successful; add the job to the schedulers job list. */
- spin_lock(&sched->job_list.lock);
- list_add(&job->entry, &sched->job_list.head);
- spin_unlock(&sched->job_list.lock);
+ spin_lock(&sched->job.list.lock);
+ list_add(&job->entry, &sched->job.list.head);
+ spin_unlock(&sched->job.list.lock);
drm_sched_job_arm(&job->base);
job->done_fence = dma_fence_get(&job->base.s_fence->finished);
@@ -392,23 +393,10 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
nouveau_job_fini(job);
}
-static void
-nouveau_sched_cancel_job(struct drm_sched_job *sched_job)
-{
- struct nouveau_fence *fence;
- struct nouveau_job *job;
-
- job = to_nouveau_job(sched_job);
- fence = to_nouveau_fence(job->done_fence);
-
- nouveau_fence_cancel(fence);
-}
-
static const struct drm_sched_backend_ops nouveau_sched_ops = {
.run_job = nouveau_sched_run_job,
.timedout_job = nouveau_sched_timedout_job,
.free_job = nouveau_sched_free_job,
- .cancel_job = nouveau_sched_cancel_job,
};
static int
@@ -458,8 +446,9 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
goto fail_sched;
mutex_init(&sched->mutex);
- spin_lock_init(&sched->job_list.lock);
- INIT_LIST_HEAD(&sched->job_list.head);
+ spin_lock_init(&sched->job.list.lock);
+ INIT_LIST_HEAD(&sched->job.list.head);
+ init_waitqueue_head(&sched->job.wq);
return 0;
@@ -493,12 +482,16 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
return 0;
}
+
static void
nouveau_sched_fini(struct nouveau_sched *sched)
{
struct drm_gpu_scheduler *drm_sched = &sched->base;
struct drm_sched_entity *entity = &sched->entity;
+ rmb(); /* for list_empty to work without lock */
+ wait_event(sched->job.wq, list_empty(&sched->job.list.head));
+
drm_sched_entity_fini(entity);
drm_sched_fini(drm_sched);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index b98c3f0bef30..20cd1da8db73 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -103,9 +103,12 @@ struct nouveau_sched {
struct mutex mutex;
struct {
- struct list_head head;
- spinlock_t lock;
- } job_list;
+ struct {
+ struct list_head head;
+ spinlock_t lock;
+ } list;
+ struct wait_queue_head wq;
+ } job;
};
int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index ddfc46bc1b3e..48f105239f42 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
u64 end = addr + range;
again:
- spin_lock(&sched->job_list.lock);
- list_for_each_entry(__job, &sched->job_list.head, entry) {
+ spin_lock(&sched->job.list.lock);
+ list_for_each_entry(__job, &sched->job.list.head, entry) {
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
list_for_each_op(op, &bind_job->ops) {
@@ -1030,7 +1030,7 @@ again:
if (!(end <= op_addr || addr >= op_end)) {
nouveau_uvmm_bind_job_get(bind_job);
- spin_unlock(&sched->job_list.lock);
+ spin_unlock(&sched->job.list.lock);
wait_for_completion(&bind_job->complete);
nouveau_uvmm_bind_job_put(bind_job);
goto again;
@@ -1038,7 +1038,7 @@ again:
}
}
}
- spin_unlock(&sched->job_list.lock);
+ spin_unlock(&sched->job.list.lock);
}
static int
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 1116f2d2826e..4d8e9b34702a 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1094,7 +1094,7 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
struct drm_panthor_queue_create *queue_args;
int ret;
- if (!args->queues.count)
+ if (!args->queues.count || args->queues.count > MAX_CS_PER_CSG)
return -EINVAL;
ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index bb469096d072..7b40cc8be1c9 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -236,7 +236,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
}
xe_bo_lock(external, false);
- err = xe_bo_pin_external(external);
+ err = xe_bo_pin_external(external, false);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo pin err=%pe\n",
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index c53f67ce4b0a..121f17c112ec 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -89,15 +89,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
return;
}
- /*
- * If on different devices, the exporter is kept in system if
- * possible, saving a migration step as the transfer is just
- * likely as fast from system memory.
- */
- if (params->mem_mask & XE_BO_FLAG_SYSTEM)
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
- else
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
if (params->force_different_devices)
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 9954bb458ce1..bae7ff2e5927 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -186,6 +186,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT,
+ .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
+ TTM_PL_FLAG_FALLBACK : 0,
};
*c += 1;
}
@@ -2269,6 +2271,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
/**
* xe_bo_pin_external - pin an external BO
* @bo: buffer object to be pinned
+ * @in_place: Pin in current placement, don't attempt to migrate.
*
* Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
* BO. Unique call compared to xe_bo_pin as this function has it own set of
@@ -2276,7 +2279,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_bo_pin_external(struct xe_bo *bo)
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
{
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -2285,9 +2288,11 @@ int xe_bo_pin_external(struct xe_bo *bo)
xe_assert(xe, xe_bo_is_user(bo));
if (!xe_bo_is_pinned(bo)) {
- err = xe_bo_validate(bo, NULL, false);
- if (err)
- return err;
+ if (!in_place) {
+ err = xe_bo_validate(bo, NULL, false);
+ if (err)
+ return err;
+ }
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
@@ -2440,6 +2445,9 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
};
int ret;
+ if (xe_bo_is_pinned(bo))
+ return 0;
+
if (vm) {
lockdep_assert_held(&vm->lock);
xe_vm_assert_held(vm);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 02e8cde4c6b2..9ce94d252015 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -198,7 +198,7 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
}
}
-int xe_bo_pin_external(struct xe_bo *bo);
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place);
int xe_bo_pin(struct xe_bo *bo);
void xe_bo_unpin_external(struct xe_bo *bo);
void xe_bo_unpin(struct xe_bo *bo);
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index d4d2c6854790..7ceb0c90f391 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -553,6 +553,12 @@ struct xe_device {
/** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */
struct notifier_block pm_notifier;
+ /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
+ struct completion pm_block;
+ /** @rebind_resume_list: List of wq items to kick on resume. */
+ struct list_head rebind_resume_list;
+ /** @rebind_resume_lock: Lock to protect the rebind_resume_list */
+ struct mutex rebind_resume_lock;
/** @pmt: Support the PMT driver callback interface */
struct {
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 346f857f3837..af64baf872ef 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -72,7 +72,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
return ret;
}
- ret = xe_bo_pin_external(bo);
+ ret = xe_bo_pin_external(bo, true);
xe_assert(xe, !ret);
return 0;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 44364c042ad7..374c831e691b 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -237,6 +237,15 @@ retry:
goto err_unlock_list;
}
+ /*
+ * It's OK to block interruptible here with the vm lock held, since
+ * on task freezing during suspend / hibernate, the call will
+ * return -ERESTARTSYS and the IOCTL will be rerun.
+ */
+ err = wait_for_completion_interruptible(&xe->pm_block);
+ if (err)
+ goto err_unlock_list;
+
vm_exec.vm = &vm->gpuvm;
vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
if (xe_vm_in_lr_mode(vm)) {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index e279b47ba03b..bb9b6ecad2af 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -24,6 +24,7 @@
#include "xe_pcode.h"
#include "xe_pxp.h"
#include "xe_trace.h"
+#include "xe_vm.h"
#include "xe_wa.h"
/**
@@ -290,6 +291,19 @@ static u32 vram_threshold_value(struct xe_device *xe)
return DEFAULT_VRAM_THRESHOLD;
}
+static void xe_pm_wake_rebind_workers(struct xe_device *xe)
+{
+ struct xe_vm *vm, *next;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
+ preempt.pm_activate_link) {
+ list_del_init(&vm->preempt.pm_activate_link);
+ xe_vm_resume_rebind_worker(vm);
+ }
+ mutex_unlock(&xe->rebind_resume_lock);
+}
+
static int xe_pm_notifier_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -299,30 +313,30 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
+ reinit_completion(&xe->pm_block);
xe_pm_runtime_get(xe);
err = xe_bo_evict_all_user(xe);
- if (err) {
+ if (err)
drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- break;
- }
err = xe_bo_notifier_prepare_all_pinned(xe);
- if (err) {
+ if (err)
drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- }
+ /*
+ * Keep the runtime pm reference until post hibernation / post suspend to
+ * avoid a runtime suspend interfering with evicted objects or backup
+ * allocations.
+ */
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
+ complete_all(&xe->pm_block);
+ xe_pm_wake_rebind_workers(xe);
xe_bo_notifier_unprepare_all_pinned(xe);
xe_pm_runtime_put(xe);
break;
}
- if (err)
- return NOTIFY_BAD;
-
return NOTIFY_DONE;
}
@@ -344,6 +358,14 @@ int xe_pm_init(struct xe_device *xe)
if (err)
return err;
+ err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
+ if (err)
+ goto err_unregister;
+
+ init_completion(&xe->pm_block);
+ complete_all(&xe->pm_block);
+ INIT_LIST_HEAD(&xe->rebind_resume_list);
+
/* For now suspend/resume is only allowed with GuC */
if (!xe_device_uc_enabled(xe))
return 0;
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 41705f5d52e3..8f7b0add2364 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -41,6 +41,8 @@
*
* # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
*
+ * It is the responsibility of the user to clear the mode once firmware flash is complete.
+ *
* Refer :ref:`xe_configfs` for more details on how to use configfs
*
* Survivability mode is indicated by the below admin-only readable sysfs which provides additional
@@ -147,7 +149,6 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
- xe_configfs_clear_survivability_mode(pdev);
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d60c4b115304..dc4f61e56579 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -393,6 +393,9 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
&vm->rebind_list);
+ if (!try_wait_for_completion(&vm->xe->pm_block))
+ return -EAGAIN;
+
ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
if (ret)
return ret;
@@ -479,6 +482,33 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
+static bool vm_suspend_rebind_worker(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ bool ret = false;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ if (!try_wait_for_completion(&vm->xe->pm_block)) {
+ ret = true;
+ list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
+ }
+ mutex_unlock(&xe->rebind_resume_lock);
+
+ return ret;
+}
+
+/**
+ * xe_vm_resume_rebind_worker() - Resume the rebind worker.
+ * @vm: The vm whose preempt worker to resume.
+ *
+ * Resume a preempt worker that was previously suspended by
+ * vm_suspend_rebind_worker().
+ */
+void xe_vm_resume_rebind_worker(struct xe_vm *vm)
+{
+ queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
@@ -502,6 +532,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
retry:
+ if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
+ up_write(&vm->lock);
+ return;
+ }
+
if (xe_vm_userptr_check_repin(vm)) {
err = xe_vm_userptr_pin(vm);
if (err)
@@ -1714,6 +1749,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
if (flags & XE_VM_FLAG_LR_MODE) {
INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
xe_pm_runtime_get_noresume(xe);
+ INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
}
if (flags & XE_VM_FLAG_FAULT_MODE) {
@@ -1895,8 +1931,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_assert(xe, !vm->preempt.num_exec_queues);
xe_vm_close(vm);
- if (xe_vm_in_preempt_fence_mode(vm))
+ if (xe_vm_in_preempt_fence_mode(vm)) {
+ mutex_lock(&xe->rebind_resume_lock);
+ list_del_init(&vm->preempt.pm_activate_link);
+ mutex_unlock(&xe->rebind_resume_lock);
flush_work(&vm->preempt.rebind_work);
+ }
if (xe_vm_in_fault_mode(vm))
xe_svm_close(vm);
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 2ecb417c19a2..82b112795807 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -273,6 +273,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
struct xe_exec_queue *q, u64 addr,
enum xe_cache_level cache_lvl);
+void xe_vm_resume_rebind_worker(struct xe_vm *vm);
+
/**
* xe_vm_resv() - Return's the vm's reservation object
* @vm: The vm
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 8a07feef503b..6058cf739388 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -293,6 +293,11 @@ struct xe_vm {
* BOs
*/
struct work_struct rebind_work;
+ /**
+ * @preempt.pm_activate_link: Link to list of rebind workers to be
+ * kicked on resume.
+ */
+ struct list_head pm_activate_link;
} preempt;
/** @um: unified memory state */
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index e990f20eccfe..710f4423726c 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -30,7 +30,8 @@
16022287689 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
13011645652 GRAPHICS_VERSION(2004)
- GRAPHICS_VERSION(3001)
+ GRAPHICS_VERSION_RANGE(3000, 3001)
+ GRAPHICS_VERSION(3003)
14022293748 GRAPHICS_VERSION_RANGE(2001, 2002)
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
diff --git a/drivers/gpu/nova-core/Kconfig b/drivers/gpu/nova-core/Kconfig
index 8726d80d6ba4..20d3e6d0d796 100644
--- a/drivers/gpu/nova-core/Kconfig
+++ b/drivers/gpu/nova-core/Kconfig
@@ -1,5 +1,6 @@
config NOVA_CORE
tristate "Nova Core GPU driver"
+ depends on 64BIT
depends on PCI
depends on RUST
depends on RUST_FW_LOADER_ABSTRACTIONS
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 46cebde79f34..e518dfeee654 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -185,8 +185,8 @@ config MTD_POWERNV_FLASH
config MTD_INTEL_DG
tristate "Intel Discrete Graphics non-volatile memory driver"
- depends on AUXILIARY_BUS
- depends on MTD
+ depends on AUXILIARY_BUS && MTD
+ depends on DRM_I915!=n || DRM_XE!=n || COMPILE_TEST
help
This provides an MTD device to access Intel Discrete Graphics
non-volatile memory.
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 84ab4a83cbd6..db94d14a3807 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1378,13 +1378,23 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
return ret;
/*
+ * Read setup timing depends on the operation done on the NAND:
+ *
+ * NRD_SETUP = max(tAR, tCLR)
+ */
+ timeps = max(conf->timings.sdr.tAR_min, conf->timings.sdr.tCLR_min);
+ ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+ totalcycles += ncycles;
+ ret = atmel_smc_cs_conf_set_setup(smcconf, ATMEL_SMC_NRD_SHIFT, ncycles);
+ if (ret)
+ return ret;
+
+ /*
* The read cycle timing is directly matching tRC, but is also
* dependent on the setup and hold timings we calculated earlier,
* which gives:
*
- * NRD_CYCLE = max(tRC, NRD_PULSE + NRD_HOLD)
- *
- * NRD_SETUP is always 0.
+ * NRD_CYCLE = max(tRC, NRD_SETUP + NRD_PULSE + NRD_HOLD)
*/
ncycles = DIV_ROUND_UP(conf->timings.sdr.tRC_min, mckperiodps);
ncycles = max(totalcycles, ncycles);
diff --git a/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c b/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c
index c23b537948d5..1a285cd8fad6 100644
--- a/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c
+++ b/drivers/mtd/nand/raw/nuvoton-ma35d1-nand-controller.c
@@ -935,10 +935,10 @@ static void ma35_chips_cleanup(struct ma35_nand_info *nand)
static int ma35_nand_chips_init(struct device *dev, struct ma35_nand_info *nand)
{
- struct device_node *np = dev->of_node, *nand_np;
+ struct device_node *np = dev->of_node;
int ret;
- for_each_child_of_node(np, nand_np) {
+ for_each_child_of_node_scoped(np, nand_np) {
ret = ma35_nand_chip_init(dev, nand, nand_np);
if (ret) {
ma35_chips_cleanup(nand);
diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
index a960403081f1..d957327fb4fa 100644
--- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
@@ -272,6 +272,7 @@ struct stm32_fmc2_nfc {
struct sg_table dma_data_sg;
struct sg_table dma_ecc_sg;
u8 *ecc_buf;
+ dma_addr_t dma_ecc_addr;
int dma_ecc_len;
u32 tx_dma_max_burst;
u32 rx_dma_max_burst;
@@ -902,17 +903,10 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf,
if (!write_data && !raw) {
/* Configure DMA ECC status */
- p = nfc->ecc_buf;
for_each_sg(nfc->dma_ecc_sg.sgl, sg, eccsteps, s) {
- sg_set_buf(sg, p, nfc->dma_ecc_len);
- p += nfc->dma_ecc_len;
- }
-
- ret = dma_map_sg(nfc->dev, nfc->dma_ecc_sg.sgl,
- eccsteps, dma_data_dir);
- if (!ret) {
- ret = -EIO;
- goto err_unmap_data;
+ sg_dma_address(sg) = nfc->dma_ecc_addr +
+ s * nfc->dma_ecc_len;
+ sg_dma_len(sg) = nfc->dma_ecc_len;
}
desc_ecc = dmaengine_prep_slave_sg(nfc->dma_ecc_ch,
@@ -921,7 +915,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf,
DMA_PREP_INTERRUPT);
if (!desc_ecc) {
ret = -ENOMEM;
- goto err_unmap_ecc;
+ goto err_unmap_data;
}
reinit_completion(&nfc->dma_ecc_complete);
@@ -929,7 +923,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf,
desc_ecc->callback_param = &nfc->dma_ecc_complete;
ret = dma_submit_error(dmaengine_submit(desc_ecc));
if (ret)
- goto err_unmap_ecc;
+ goto err_unmap_data;
dma_async_issue_pending(nfc->dma_ecc_ch);
}
@@ -949,7 +943,7 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf,
if (!write_data && !raw)
dmaengine_terminate_all(nfc->dma_ecc_ch);
ret = -ETIMEDOUT;
- goto err_unmap_ecc;
+ goto err_unmap_data;
}
/* Wait DMA data transfer completion */
@@ -969,11 +963,6 @@ static int stm32_fmc2_nfc_xfer(struct nand_chip *chip, const u8 *buf,
}
}
-err_unmap_ecc:
- if (!write_data && !raw)
- dma_unmap_sg(nfc->dev, nfc->dma_ecc_sg.sgl,
- eccsteps, dma_data_dir);
-
err_unmap_data:
dma_unmap_sg(nfc->dev, nfc->dma_data_sg.sgl, eccsteps, dma_data_dir);
@@ -996,9 +985,21 @@ static int stm32_fmc2_nfc_seq_write(struct nand_chip *chip, const u8 *buf,
/* Write oob */
if (oob_required) {
- ret = nand_change_write_column_op(chip, mtd->writesize,
- chip->oob_poi, mtd->oobsize,
- false);
+ unsigned int offset_in_page = mtd->writesize;
+ const void *buf = chip->oob_poi;
+ unsigned int len = mtd->oobsize;
+
+ if (!raw) {
+ struct mtd_oob_region oob_free;
+
+ mtd_ooblayout_free(mtd, 0, &oob_free);
+ offset_in_page += oob_free.offset;
+ buf += oob_free.offset;
+ len = oob_free.length;
+ }
+
+ ret = nand_change_write_column_op(chip, offset_in_page,
+ buf, len, false);
if (ret)
return ret;
}
@@ -1610,7 +1611,8 @@ static int stm32_fmc2_nfc_dma_setup(struct stm32_fmc2_nfc *nfc)
return ret;
/* Allocate a buffer to store ECC status registers */
- nfc->ecc_buf = devm_kzalloc(nfc->dev, FMC2_MAX_ECC_BUF_LEN, GFP_KERNEL);
+ nfc->ecc_buf = dmam_alloc_coherent(nfc->dev, FMC2_MAX_ECC_BUF_LEN,
+ &nfc->dma_ecc_addr, GFP_KERNEL);
if (!nfc->ecc_buf)
return -ENOMEM;
diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c
index 87053389a1fc..4870b2d5edb2 100644
--- a/drivers/mtd/nand/spi/winbond.c
+++ b/drivers/mtd/nand/spi/winbond.c
@@ -176,6 +176,36 @@ static const struct mtd_ooblayout_ops w25n02kv_ooblayout = {
.free = w25n02kv_ooblayout_free,
};
+static int w25n01jw_ooblayout_ecc(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *region)
+{
+ if (section > 3)
+ return -ERANGE;
+
+ region->offset = (16 * section) + 12;
+ region->length = 4;
+
+ return 0;
+}
+
+static int w25n01jw_ooblayout_free(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *region)
+{
+ if (section > 3)
+ return -ERANGE;
+
+ region->offset = (16 * section);
+ region->length = 12;
+
+ /* Extract BBM */
+ if (!section) {
+ region->offset += 2;
+ region->length -= 2;
+ }
+
+ return 0;
+}
+
static int w35n01jw_ooblayout_ecc(struct mtd_info *mtd, int section,
struct mtd_oob_region *region)
{
@@ -206,6 +236,11 @@ static int w35n01jw_ooblayout_free(struct mtd_info *mtd, int section,
return 0;
}
+static const struct mtd_ooblayout_ops w25n01jw_ooblayout = {
+ .ecc = w25n01jw_ooblayout_ecc,
+ .free = w25n01jw_ooblayout_free,
+};
+
static const struct mtd_ooblayout_ops w35n01jw_ooblayout = {
.ecc = w35n01jw_ooblayout_ecc,
.free = w35n01jw_ooblayout_free,
@@ -394,7 +429,7 @@ static const struct spinand_info winbond_spinand_table[] = {
&write_cache_variants,
&update_cache_variants),
0,
- SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL),
+ SPINAND_ECCINFO(&w25n01jw_ooblayout, NULL),
SPINAND_CONFIGURE_CHIP(w25n0xjw_hs_cfg)),
SPINAND_INFO("W25N01KV", /* 3.3V */
SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xae, 0x21),
diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
index 64e664f5adcc..87c134bcd48d 100644
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -861,7 +861,6 @@ static int rcar_can_resume(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct rcar_can_priv *priv = netdev_priv(ndev);
- u16 ctlr;
int err;
if (!netif_running(ndev))
@@ -873,12 +872,7 @@ static int rcar_can_resume(struct device *dev)
return err;
}
- ctlr = readw(&priv->regs->ctlr);
- ctlr &= ~RCAR_CAN_CTLR_SLPM;
- writew(ctlr, &priv->regs->ctlr);
- ctlr &= ~RCAR_CAN_CTLR_CANM;
- writew(ctlr, &priv->regs->ctlr);
- priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ rcar_can_start(ndev);
netif_device_attach(ndev);
netif_start_queue(ndev);
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 81baec8eb1e5..a25a3ca62c12 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -690,14 +690,6 @@ static void xcan_write_frame(struct net_device *ndev, struct sk_buff *skb,
dlc |= XCAN_DLCR_EDL_MASK;
}
- if (!(priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) &&
- (priv->devtype.flags & XCAN_FLAG_TXFEMP))
- can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max, 0);
- else
- can_put_echo_skb(skb, ndev, 0, 0);
-
- priv->tx_head++;
-
priv->write_reg(priv, XCAN_FRAME_ID_OFFSET(frame_offset), id);
/* If the CAN frame is RTR frame this write triggers transmission
* (not on CAN FD)
@@ -730,6 +722,14 @@ static void xcan_write_frame(struct net_device *ndev, struct sk_buff *skb,
data[1]);
}
}
+
+ if (!(priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) &&
+ (priv->devtype.flags & XCAN_FLAG_TXFEMP))
+ can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max, 0);
+ else
+ can_put_echo_skb(skb, ndev, 0, 0);
+
+ priv->tx_head++;
}
/**
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 829b1f087e9e..2f846381d5a7 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1273,9 +1273,15 @@ static int b53_setup(struct dsa_switch *ds)
*/
ds->untag_vlan_aware_bridge_pvid = true;
- /* Ageing time is set in seconds */
- ds->ageing_time_min = 1 * 1000;
- ds->ageing_time_max = AGE_TIME_MAX * 1000;
+ if (dev->chip_id == BCM53101_DEVICE_ID) {
+ /* BCM53101 uses 0.5 second increments */
+ ds->ageing_time_min = 1 * 500;
+ ds->ageing_time_max = AGE_TIME_MAX * 500;
+ } else {
+ /* Everything else uses 1 second increments */
+ ds->ageing_time_min = 1 * 1000;
+ ds->ageing_time_max = AGE_TIME_MAX * 1000;
+ }
ret = b53_reset_switch(dev);
if (ret) {
@@ -2559,7 +2565,10 @@ int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
else
reg = B53_AGING_TIME_CONTROL;
- atc = DIV_ROUND_CLOSEST(msecs, 1000);
+ if (dev->chip_id == BCM53101_DEVICE_ID)
+ atc = DIV_ROUND_CLOSEST(msecs, 500);
+ else
+ atc = DIV_ROUND_CLOSEST(msecs, 1000);
if (!is5325(dev) && !is5365(dev))
atc |= AGE_CHANGE;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1383918f8a3f..adf1f2bbcbb1 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2363,7 +2363,8 @@ static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
*/
phy_dev = of_phy_find_device(fep->phy_node);
phy_reset_after_clk_enable(phy_dev);
- put_device(&phy_dev->mdio.dev);
+ if (phy_dev)
+ put_device(&phy_dev->mdio.dev);
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 76d872b91a38..cc02a85ad42b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1561,6 +1561,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
struct i40e_aq_set_mac_config {
__le16 max_frame_size;
u8 params;
+#define I40E_AQ_SET_MAC_CONFIG_CRC_EN BIT(2)
u8 tx_timer_priority; /* bitmap */
__le16 tx_timer_value;
__le16 fc_refresh_threshold;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 270e7e8cf9cf..59f5c1e810eb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1190,6 +1190,40 @@ int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
}
/**
+ * i40e_aq_set_mac_config - Configure MAC settings
+ * @hw: pointer to the hw struct
+ * @max_frame_size: Maximum Frame Size to be supported by the port
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set MAC configuration (0x0603). Note that max_frame_size must be greater
+ * than zero.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_set_mac_config *cmd;
+ struct libie_aq_desc desc;
+
+ cmd = libie_aq_raw(&desc);
+
+ if (max_frame_size == 0)
+ return -EINVAL;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_mac_config);
+
+ cmd->max_frame_size = cpu_to_le16(max_frame_size);
+ cmd->params = I40E_AQ_SET_MAC_CONFIG_CRC_EN;
+
+#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD 0x7FFF
+ cmd->fc_refresh_threshold =
+ cpu_to_le16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD);
+
+ return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+}
+
+/**
* i40e_aq_clear_pxe_mode
* @hw: pointer to the hw struct
* @cmd_details: pointer to command details structure or NULL
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b83f823e4917..b14019d44b58 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4156,7 +4156,7 @@ free_queue_irqs:
irq_num = pf->msix_entries[base + vector].vector;
irq_set_affinity_notifier(irq_num, NULL);
irq_update_affinity_hint(irq_num, NULL);
- free_irq(irq_num, &vsi->q_vectors[vector]);
+ free_irq(irq_num, vsi->q_vectors[vector]);
}
return err;
}
@@ -16045,13 +16045,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
- /* make sure the MFS hasn't been set lower than the default */
#define MAX_FRAME_SIZE_DEFAULT 0x2600
- val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK,
- rd32(&pf->hw, I40E_PRTGL_SAH));
- if (val < MAX_FRAME_SIZE_DEFAULT)
- dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n",
- pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT);
+
+ err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL);
+ if (err)
+ dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n",
+ ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
+
+ /* Make sure the MFS is set to the expected value */
+ val = rd32(hw, I40E_PRTGL_SAH);
+ FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT);
+ wr32(hw, I40E_PRTGL_SAH, val);
/* Add a filter to drop all Flow control frames from any VSI from being
* transmitted. By doing so we stop a malicious VF from sending out
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index aef5de53ce3b..26bb7bffe361 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -98,6 +98,8 @@ int i40e_aq_set_mac_loopback(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask,
struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size,
+ struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_set_link_restart_an(struct i40e_hw *hw,
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 92ef33459aec..7b8f32c5169a 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2081,11 +2081,8 @@ static void igb_diag_test(struct net_device *netdev,
} else {
dev_info(&adapter->pdev->dev, "online testing starting\n");
- /* PHY is powered down when interface is down */
- if (if_running && igb_link_test(adapter, &data[TEST_LINK]))
+ if (igb_link_test(adapter, &data[TEST_LINK]))
eth_test->flags |= ETH_TEST_FL_FAILED;
- else
- data[TEST_LINK] = 0;
/* Online tests aren't run; pass by default */
data[TEST_REG] = 0;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a9a7a94ae61e..453deb6d14b3 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -4453,8 +4453,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index,
- rx_ring->q_vector->napi.napi_id);
+ rx_ring->queue_index, 0);
if (res < 0) {
dev_err(dev, "Failed to register xdp_rxq index %u\n",
rx_ring->queue_index);
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index dadce6009791..e42d0fdefee1 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -654,7 +654,7 @@ static void icssg_prueth_hsr_fdb_add_del(struct prueth_emac *emac,
static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr)
{
- struct net_device *real_dev;
+ struct net_device *real_dev, *port_dev;
struct prueth_emac *emac;
u8 vlan_id, i;
@@ -663,11 +663,15 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr)
if (is_hsr_master(real_dev)) {
for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) {
- emac = netdev_priv(hsr_get_port_ndev(real_dev, i));
- if (!emac)
+ port_dev = hsr_get_port_ndev(real_dev, i);
+ emac = netdev_priv(port_dev);
+ if (!emac) {
+ dev_put(port_dev);
return -EINVAL;
+ }
icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id,
true);
+ dev_put(port_dev);
}
} else {
emac = netdev_priv(real_dev);
@@ -679,7 +683,7 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr)
static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr)
{
- struct net_device *real_dev;
+ struct net_device *real_dev, *port_dev;
struct prueth_emac *emac;
u8 vlan_id, i;
@@ -688,11 +692,15 @@ static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr)
if (is_hsr_master(real_dev)) {
for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) {
- emac = netdev_priv(hsr_get_port_ndev(real_dev, i));
- if (!emac)
+ port_dev = hsr_get_port_ndev(real_dev, i);
+ emac = netdev_priv(port_dev);
+ if (!emac) {
+ dev_put(port_dev);
return -EINVAL;
+ }
icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id,
false);
+ dev_put(port_dev);
}
} else {
emac = netdev_priv(real_dev);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index bcd07a715752..5cb353a97d6d 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -2078,10 +2078,6 @@ static void wx_setup_mrqc(struct wx *wx)
{
u32 rss_field = 0;
- /* VT, and RSS do not coexist at the same time */
- if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags))
- return;
-
/* Disable indicating checksum in descriptor, enables RSS hash */
wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 01329fe7451a..0eca96eeed58 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -4286,6 +4286,7 @@ static int macsec_newlink(struct net_device *dev,
if (err < 0)
goto del_dev;
+ netdev_update_features(dev);
netif_stacked_transfer_operstate(real_dev, dev);
linkwatch_fire_event(dev);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 28acc6392cfc..392749aae54d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -361,7 +361,7 @@ config NXP_TJA11XX_PHY
tristate "NXP TJA11xx PHYs support"
depends on HWMON
help
- Currently supports the NXP TJA1100 and TJA1101 PHY.
+ Currently supports the NXP TJA1100, TJA1101 and TJA1102 PHYs.
config NCN26000_PHY
tristate "Onsemi 10BASE-T1S Ethernet PHY"
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 13df28445f02..c02da57a4da5 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1065,23 +1065,19 @@ EXPORT_SYMBOL_GPL(phy_inband_caps);
*/
int phy_config_inband(struct phy_device *phydev, unsigned int modes)
{
- int err;
+ lockdep_assert_held(&phydev->lock);
if (!!(modes & LINK_INBAND_DISABLE) +
!!(modes & LINK_INBAND_ENABLE) +
!!(modes & LINK_INBAND_BYPASS) != 1)
return -EINVAL;
- mutex_lock(&phydev->lock);
if (!phydev->drv)
- err = -EIO;
+ return -EIO;
else if (!phydev->drv->config_inband)
- err = -EOPNOTSUPP;
- else
- err = phydev->drv->config_inband(phydev, modes);
- mutex_unlock(&phydev->lock);
+ return -EOPNOTSUPP;
- return err;
+ return phydev->drv->config_inband(phydev, modes);
}
EXPORT_SYMBOL(phy_config_inband);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 7556aa3dd7ee..c82c1997147b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -287,8 +287,7 @@ static bool phy_uses_state_machine(struct phy_device *phydev)
if (phydev->phy_link_change == phy_link_change)
return phydev->attached_dev && phydev->adjust_link;
- /* phydev->phy_link_change is implicitly phylink_phy_change() */
- return true;
+ return !!phydev->phy_link_change;
}
static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
@@ -1864,6 +1863,8 @@ void phy_detach(struct phy_device *phydev)
phydev->attached_dev = NULL;
phy_link_topo_del_phy(dev, phydev);
}
+
+ phydev->phy_link_change = NULL;
phydev->phylink = NULL;
if (!phydev->is_on_sfp_module)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index c7cb95aa8007..1988b7d2089a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -67,6 +67,8 @@ struct phylink {
struct timer_list link_poll;
struct mutex state_mutex;
+ /* Serialize updates to pl->phydev with phylink_resolve() */
+ struct mutex phydev_mutex;
struct phylink_link_state phy_state;
unsigned int phy_ib_mode;
struct work_struct resolve;
@@ -1432,6 +1434,7 @@ static void phylink_get_fixed_state(struct phylink *pl,
static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
{
struct phylink_link_state link_state;
+ struct phy_device *phy = pl->phydev;
switch (pl->req_link_an_mode) {
case MLO_AN_PHY:
@@ -1455,7 +1458,11 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart)
link_state.link = false;
phylink_apply_manual_flow(pl, &link_state);
+ if (phy)
+ mutex_lock(&phy->lock);
phylink_major_config(pl, force_restart, &link_state);
+ if (phy)
+ mutex_unlock(&phy->lock);
}
static const char *phylink_pause_to_str(int pause)
@@ -1591,8 +1598,13 @@ static void phylink_resolve(struct work_struct *w)
struct phylink_link_state link_state;
bool mac_config = false;
bool retrigger = false;
+ struct phy_device *phy;
bool cur_link_state;
+ mutex_lock(&pl->phydev_mutex);
+ phy = pl->phydev;
+ if (phy)
+ mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
cur_link_state = phylink_link_is_up(pl);
@@ -1626,11 +1638,11 @@ static void phylink_resolve(struct work_struct *w)
/* If we have a phy, the "up" state is the union of both the
* PHY and the MAC
*/
- if (pl->phydev)
+ if (phy)
link_state.link &= pl->phy_state.link;
/* Only update if the PHY link is up */
- if (pl->phydev && pl->phy_state.link) {
+ if (phy && pl->phy_state.link) {
/* If the interface has changed, force a link down
* event if the link isn't already down, and re-resolve.
*/
@@ -1694,6 +1706,9 @@ static void phylink_resolve(struct work_struct *w)
queue_work(system_power_efficient_wq, &pl->resolve);
}
mutex_unlock(&pl->state_mutex);
+ if (phy)
+ mutex_unlock(&phy->lock);
+ mutex_unlock(&pl->phydev_mutex);
}
static void phylink_run_resolve(struct phylink *pl)
@@ -1829,6 +1844,7 @@ struct phylink *phylink_create(struct phylink_config *config,
if (!pl)
return ERR_PTR(-ENOMEM);
+ mutex_init(&pl->phydev_mutex);
mutex_init(&pl->state_mutex);
INIT_WORK(&pl->resolve, phylink_resolve);
@@ -2089,6 +2105,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
dev_name(&phy->mdio.dev), phy->drv->name, irq_str);
kfree(irq_str);
+ mutex_lock(&pl->phydev_mutex);
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
pl->phydev = phy;
@@ -2134,6 +2151,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
+ mutex_unlock(&pl->phydev_mutex);
phylink_dbg(pl,
"phy: %s setting supported %*pb advertising %*pb\n",
@@ -2312,6 +2330,7 @@ void phylink_disconnect_phy(struct phylink *pl)
ASSERT_RTNL();
+ mutex_lock(&pl->phydev_mutex);
phy = pl->phydev;
if (phy) {
mutex_lock(&phy->lock);
@@ -2321,8 +2340,11 @@ void phylink_disconnect_phy(struct phylink *pl)
pl->mac_tx_clk_stop = false;
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
- flush_work(&pl->resolve);
+ }
+ mutex_unlock(&pl->phydev_mutex);
+ if (phy) {
+ flush_work(&pl->resolve);
phy_disconnect(phy);
}
}
diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c
index bd1ec3b2c084..3a3965b79942 100644
--- a/drivers/net/wireless/ath/ath12k/mac.c
+++ b/drivers/net/wireless/ath/ath12k/mac.c
@@ -4078,12 +4078,68 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif,
return ret;
}
+static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif)
+{
+ struct ath12k *ar = arvif->ar;
+ struct ieee80211_vif *vif = arvif->ahvif->vif;
+ struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf;
+ enum wmi_sta_powersave_param param;
+ struct ieee80211_bss_conf *info;
+ enum wmi_sta_ps_mode psmode;
+ int ret;
+ int timeout;
+ bool enable_ps;
+
+ lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);
+
+ if (vif->type != NL80211_IFTYPE_STATION)
+ return;
+
+ enable_ps = arvif->ahvif->ps;
+ if (enable_ps) {
+ psmode = WMI_STA_PS_MODE_ENABLED;
+ param = WMI_STA_PS_PARAM_INACTIVITY_TIME;
+
+ timeout = conf->dynamic_ps_timeout;
+ if (timeout == 0) {
+ info = ath12k_mac_get_link_bss_conf(arvif);
+ if (!info) {
+ ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n",
+ vif->addr, arvif->link_id);
+ return;
+ }
+
+ /* firmware doesn't like 0 */
+ timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000;
+ }
+
+ ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param,
+ timeout);
+ if (ret) {
+ ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n",
+ arvif->vdev_id, ret);
+ return;
+ }
+ } else {
+ psmode = WMI_STA_PS_MODE_DISABLED;
+ }
+
+ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n",
+ arvif->vdev_id, psmode ? "enable" : "disable");
+
+ ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode);
+ if (ret)
+ ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n",
+ psmode, arvif->vdev_id, ret);
+}
+
static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
u64 changed)
{
struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif);
unsigned long links = ahvif->links_map;
+ struct ieee80211_vif_cfg *vif_cfg;
struct ieee80211_bss_conf *info;
struct ath12k_link_vif *arvif;
struct ieee80211_sta *sta;
@@ -4147,61 +4203,24 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw,
}
}
}
-}
-
-static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif)
-{
- struct ath12k *ar = arvif->ar;
- struct ieee80211_vif *vif = arvif->ahvif->vif;
- struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf;
- enum wmi_sta_powersave_param param;
- struct ieee80211_bss_conf *info;
- enum wmi_sta_ps_mode psmode;
- int ret;
- int timeout;
- bool enable_ps;
- lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy);
+ if (changed & BSS_CHANGED_PS) {
+ links = ahvif->links_map;
+ vif_cfg = &vif->cfg;
- if (vif->type != NL80211_IFTYPE_STATION)
- return;
+ for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) {
+ arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]);
+ if (!arvif || !arvif->ar)
+ continue;
- enable_ps = arvif->ahvif->ps;
- if (enable_ps) {
- psmode = WMI_STA_PS_MODE_ENABLED;
- param = WMI_STA_PS_PARAM_INACTIVITY_TIME;
+ ar = arvif->ar;
- timeout = conf->dynamic_ps_timeout;
- if (timeout == 0) {
- info = ath12k_mac_get_link_bss_conf(arvif);
- if (!info) {
- ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n",
- vif->addr, arvif->link_id);
- return;
+ if (ar->ab->hw_params->supports_sta_ps) {
+ ahvif->ps = vif_cfg->ps;
+ ath12k_mac_vif_setup_ps(arvif);
}
-
- /* firmware doesn't like 0 */
- timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000;
}
-
- ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param,
- timeout);
- if (ret) {
- ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n",
- arvif->vdev_id, ret);
- return;
- }
- } else {
- psmode = WMI_STA_PS_MODE_DISABLED;
}
-
- ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n",
- arvif->vdev_id, psmode ? "enable" : "disable");
-
- ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode);
- if (ret)
- ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n",
- psmode, arvif->vdev_id, ret);
}
static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif,
@@ -4223,7 +4242,6 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar,
{
struct ath12k_vif *ahvif = arvif->ahvif;
struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif);
- struct ieee80211_vif_cfg *vif_cfg = &vif->cfg;
struct cfg80211_chan_def def;
u32 param_id, param_value;
enum nl80211_band band;
@@ -4510,12 +4528,6 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar,
}
ath12k_mac_fils_discovery(arvif, info);
-
- if (changed & BSS_CHANGED_PS &&
- ar->ab->hw_params->supports_sta_ps) {
- ahvif->ps = vif_cfg->ps;
- ath12k_mac_vif_setup_ps(arvif);
- }
}
static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif,
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 742ffeb48bce..29dadedefdd2 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -843,7 +843,7 @@ int ath12k_wmi_mgmt_send(struct ath12k_link_vif *arvif, u32 buf_id,
cmd->tx_params_valid = 0;
frame_tlv = (struct wmi_tlv *)(skb->data + sizeof(*cmd));
- frame_tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_BYTE, buf_len);
+ frame_tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_BYTE, buf_len_aligned);
memcpy(frame_tlv->value, frame->data, buf_len);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index f9e2095d6490..7e56e4ff7642 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -124,13 +124,13 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x0082, 0x1304, iwl6005_mac_cfg)},/* low 5GHz active */
{IWL_PCI_DEVICE(0x0082, 0x1305, iwl6005_mac_cfg)},/* high 5GHz active */
-/* 6x30 Series */
- {IWL_PCI_DEVICE(0x008A, 0x5305, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x008A, 0x5307, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x008A, 0x5325, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x008A, 0x5327, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x008B, 0x5315, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x008B, 0x5317, iwl1000_mac_cfg)},
+/* 1030/6x30 Series */
+ {IWL_PCI_DEVICE(0x008A, 0x5305, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x008A, 0x5307, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x008A, 0x5325, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x008A, 0x5327, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x008B, 0x5315, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x008B, 0x5317, iwl6030_mac_cfg)},
{IWL_PCI_DEVICE(0x0090, 0x5211, iwl6030_mac_cfg)},
{IWL_PCI_DEVICE(0x0090, 0x5215, iwl6030_mac_cfg)},
{IWL_PCI_DEVICE(0x0090, 0x5216, iwl6030_mac_cfg)},
@@ -181,12 +181,12 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x08AE, 0x1027, iwl1000_mac_cfg)},
/* 130 Series WiFi */
- {IWL_PCI_DEVICE(0x0896, 0x5005, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x0896, 0x5007, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x0897, 0x5015, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x0897, 0x5017, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x0896, 0x5025, iwl1000_mac_cfg)},
- {IWL_PCI_DEVICE(0x0896, 0x5027, iwl1000_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0896, 0x5005, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0896, 0x5007, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0897, 0x5015, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0897, 0x5017, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0896, 0x5025, iwl6030_mac_cfg)},
+ {IWL_PCI_DEVICE(0x0896, 0x5027, iwl6030_mac_cfg)},
/* 2x00 Series */
{IWL_PCI_DEVICE(0x0890, 0x4022, iwl2000_mac_cfg)},
diff --git a/drivers/net/wireless/virtual/virt_wifi.c b/drivers/net/wireless/virtual/virt_wifi.c
index 1fffeff2190c..4eae89376feb 100644
--- a/drivers/net/wireless/virtual/virt_wifi.c
+++ b/drivers/net/wireless/virtual/virt_wifi.c
@@ -277,7 +277,9 @@ static void virt_wifi_connect_complete(struct work_struct *work)
priv->is_connected = true;
/* Schedules an event that acquires the rtnl lock. */
- cfg80211_connect_result(priv->upperdev, requested_bss, NULL, 0, NULL, 0,
+ cfg80211_connect_result(priv->upperdev,
+ priv->is_connected ? fake_router_bssid : NULL,
+ NULL, 0, NULL, 0,
status, GFP_KERNEL);
netif_carrier_on(priv->upperdev);
}
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 755651f33811..a72aa57591c0 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -1168,12 +1168,6 @@ static void __iomem *mvebu_pcie_map_registers(struct platform_device *pdev,
return devm_ioremap_resource(&pdev->dev, &port->regs);
}
-#define DT_FLAGS_TO_TYPE(flags) (((flags) >> 24) & 0x03)
-#define DT_TYPE_IO 0x1
-#define DT_TYPE_MEM32 0x2
-#define DT_CPUADDR_TO_TARGET(cpuaddr) (((cpuaddr) >> 56) & 0xFF)
-#define DT_CPUADDR_TO_ATTR(cpuaddr) (((cpuaddr) >> 48) & 0xFF)
-
static int mvebu_get_tgt_attr(struct device_node *np, int devfn,
unsigned long type,
unsigned int *tgt,
@@ -1189,19 +1183,12 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn,
return -EINVAL;
for_each_of_range(&parser, &range) {
- unsigned long rtype;
u32 slot = upper_32_bits(range.bus_addr);
- if (DT_FLAGS_TO_TYPE(range.flags) == DT_TYPE_IO)
- rtype = IORESOURCE_IO;
- else if (DT_FLAGS_TO_TYPE(range.flags) == DT_TYPE_MEM32)
- rtype = IORESOURCE_MEM;
- else
- continue;
-
- if (slot == PCI_SLOT(devfn) && type == rtype) {
- *tgt = DT_CPUADDR_TO_TARGET(range.cpu_addr);
- *attr = DT_CPUADDR_TO_ATTR(range.cpu_addr);
+ if (slot == PCI_SLOT(devfn) &&
+ type == (range.flags & IORESOURCE_TYPE_BITS)) {
+ *tgt = (range.parent_bus_addr >> 56) & 0xFF;
+ *attr = (range.parent_bus_addr >> 48) & 0xFF;
return 0;
}
}
diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
index d1e7ba1fb3e1..27e3d939b7bb 100644
--- a/drivers/regulator/sy7636a-regulator.c
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -83,9 +83,11 @@ static int sy7636a_regulator_probe(struct platform_device *pdev)
if (!regmap)
return -EPROBE_DEFER;
- gdp = devm_gpiod_get(pdev->dev.parent, "epd-pwr-good", GPIOD_IN);
+ device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+
+ gdp = devm_gpiod_get(&pdev->dev, "epd-pwr-good", GPIOD_IN);
if (IS_ERR(gdp)) {
- dev_err(pdev->dev.parent, "Power good GPIO fault %ld\n", PTR_ERR(gdp));
+ dev_err(&pdev->dev, "Power good GPIO fault %ld\n", PTR_ERR(gdp));
return PTR_ERR(gdp);
}
@@ -105,7 +107,6 @@ static int sy7636a_regulator_probe(struct platform_device *pdev)
}
config.dev = &pdev->dev;
- config.dev->of_node = pdev->dev.parent->of_node;
config.regmap = regmap;
rdev = devm_regulator_register(&pdev->dev, &desc, &config);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c953297aa89a..b21cb72835cc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -111,6 +111,24 @@ struct btrfs_bio_ctrl {
*/
unsigned long submit_bitmap;
struct readahead_control *ractl;
+
+ /*
+ * The start offset of the last used extent map by a read operation.
+ *
+ * This is for proper compressed read merge.
+ * U64_MAX means we are starting the read and have made no progress yet.
+ *
+ * The current btrfs_bio_is_contig() only uses disk_bytenr as
+ * the condition to check if the read can be merged with previous
+ * bio, which is not correct. E.g. two file extents pointing to the
+ * same extent but with different offset.
+ *
+ * So here we need to do extra checks to only merge reads that are
+ * covered by the same extent map.
+ * Just extent_map::start will be enough, as they are unique
+ * inside the same inode.
+ */
+ u64 last_em_start;
};
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@@ -909,7 +927,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl,
* return 0 on success, otherwise return error
*/
static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
- struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start)
+ struct btrfs_bio_ctrl *bio_ctrl)
{
struct inode *inode = folio->mapping->host;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
@@ -1019,12 +1037,11 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
* non-optimal behavior (submitting 2 bios for the same extent).
*/
if (compress_type != BTRFS_COMPRESS_NONE &&
- prev_em_start && *prev_em_start != (u64)-1 &&
- *prev_em_start != em->start)
+ bio_ctrl->last_em_start != U64_MAX &&
+ bio_ctrl->last_em_start != em->start)
force_bio_submit = true;
- if (prev_em_start)
- *prev_em_start = em->start;
+ bio_ctrl->last_em_start = em->start;
btrfs_free_extent_map(em);
em = NULL;
@@ -1238,12 +1255,15 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
const u64 start = folio_pos(folio);
const u64 end = start + folio_size(folio) - 1;
struct extent_state *cached_state = NULL;
- struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ };
+ struct btrfs_bio_ctrl bio_ctrl = {
+ .opf = REQ_OP_READ,
+ .last_em_start = U64_MAX,
+ };
struct extent_map *em_cached = NULL;
int ret;
lock_extents_for_read(inode, start, end, &cached_state);
- ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL);
+ ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl);
btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
btrfs_free_extent_map(em_cached);
@@ -2583,7 +2603,8 @@ void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = {
.opf = REQ_OP_READ | REQ_RAHEAD,
- .ractl = rac
+ .ractl = rac,
+ .last_em_start = U64_MAX,
};
struct folio *folio;
struct btrfs_inode *inode = BTRFS_I(rac->mapping->host);
@@ -2591,12 +2612,11 @@ void btrfs_readahead(struct readahead_control *rac)
const u64 end = start + readahead_length(rac) - 1;
struct extent_state *cached_state = NULL;
struct extent_map *em_cached = NULL;
- u64 prev_em_start = (u64)-1;
lock_extents_for_read(inode, start, end, &cached_state);
while ((folio = readahead_folio(rac)) != NULL)
- btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start);
+ btrfs_do_readpage(folio, &em_cached, &bio_ctrl);
btrfs_unlock_extent(&inode->io_tree, start, end, &cached_state);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dd82dcc7b2b7..e7218e78bff4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5696,7 +5696,17 @@ static void btrfs_del_inode_from_root(struct btrfs_inode *inode)
bool empty = false;
xa_lock(&root->inodes);
- entry = __xa_erase(&root->inodes, btrfs_ino(inode));
+ /*
+ * This btrfs_inode is being freed and has already been unhashed at this
+ * point. It's possible that another btrfs_inode has already been
+ * allocated for the same inode and inserted itself into the root, so
+ * don't delete it in that case.
+ *
+ * Note that this shouldn't need to allocate memory, so the gfp flags
+ * don't really matter.
+ */
+ entry = __xa_cmpxchg(&root->inodes, btrfs_ino(inode), inode, NULL,
+ GFP_ATOMIC);
if (entry == inode)
empty = xa_empty(&root->inodes);
xa_unlock(&root->inodes);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index ccaa9a3cf1ce..da102da169fd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1455,6 +1455,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
struct btrfs_qgroup *qgroup;
LIST_HEAD(qgroup_list);
u64 num_bytes = src->excl;
+ u64 num_bytes_cmpr = src->excl_cmpr;
int ret = 0;
qgroup = find_qgroup_rb(fs_info, ref_root);
@@ -1466,11 +1467,12 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root,
struct btrfs_qgroup_list *glist;
qgroup->rfer += sign * num_bytes;
- qgroup->rfer_cmpr += sign * num_bytes;
+ qgroup->rfer_cmpr += sign * num_bytes_cmpr;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+ WARN_ON(sign < 0 && qgroup->excl_cmpr < num_bytes_cmpr);
qgroup->excl += sign * num_bytes;
- qgroup->excl_cmpr += sign * num_bytes;
+ qgroup->excl_cmpr += sign * num_bytes_cmpr;
if (sign > 0)
qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a262b494a89f..df1f6cc3fe21 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -299,9 +299,12 @@ static int btrfs_parse_compress(struct btrfs_fs_context *ctx,
btrfs_set_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
- } else if (btrfs_match_compress_type(string, "lzo", false)) {
+ } else if (btrfs_match_compress_type(string, "lzo", true)) {
ctx->compress_type = BTRFS_COMPRESS_LZO;
- ctx->compress_level = 0;
+ ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_LZO,
+ string + 3);
+ if (string[3] == ':' && string[4])
+ btrfs_warn(NULL, "Compression level ignored for LZO");
btrfs_set_opt(ctx->mount_opt, COMPRESS);
btrfs_clear_opt(ctx->mount_opt, NODATACOW);
btrfs_clear_opt(ctx->mount_opt, NODATASUM);
@@ -1079,7 +1082,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_printf(seq, ",compress-force=%s", compress_type);
else
seq_printf(seq, ",compress=%s", compress_type);
- if (info->compress_level)
+ if (info->compress_level && info->compress_type != BTRFS_COMPRESS_LZO)
seq_printf(seq, ":%d", info->compress_level);
}
if (btrfs_test_opt(info, NOSSD))
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fa7a929a0461..c6e3efd6f602 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2722,6 +2722,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
goto error;
}
+ if (bdev_nr_bytes(file_bdev(bdev_file)) <= BTRFS_DEVICE_RANGE_RESERVED) {
+ ret = -EINVAL;
+ goto error;
+ }
+
if (fs_devices->seeding) {
seeding_dev = true;
down_write(&sb->s_umount);
diff --git a/fs/coredump.c b/fs/coredump.c
index 5dce257c67fc..60bc9685e149 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1466,11 +1466,15 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write,
ssize_t retval;
char old_core_pattern[CORENAME_MAX_SIZE];
+ if (write)
+ return proc_dostring(table, write, buffer, lenp, ppos);
+
retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
error = proc_dostring(table, write, buffer, lenp, ppos);
if (error)
return error;
+
if (!check_coredump_socket()) {
strscpy(core_pattern, old_core_pattern, retval + 1);
return -EINVAL;
diff --git a/fs/exec.c b/fs/exec.c
index 2a1e5e4042a1..e861a4b7ffda 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ
{
int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (!error)
+ if (!error && !write)
validate_coredump_safety();
return error;
}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 68a7d2861c58..a907ddfac4d5 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -208,6 +208,14 @@ static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
return 1;
/*
+ * Verify that the decoded dentry itself has a valid id mapping.
+ * In case the decoded dentry is the mountfd root itself, this
+ * verifies that the mountfd inode itself has a valid id mapping.
+ */
+ if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(dentry)))
+ return 0;
+
+ /*
* It's racy as we're not taking rename_lock but we're able to ignore
* permissions and we just need an approximation whether we were able
* to follow a path to the file.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e80cd8f2c049..5150aa25e64b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1893,7 +1893,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
- while (num) {
+ while (num && ap->num_folios < num_pages) {
struct folio *folio;
unsigned int folio_offset;
unsigned int nr_bytes;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2d817d7cab26..5c569c3cb53f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode,
if (attr->blksize != 0)
blkbits = ilog2(attr->blksize);
else
- blkbits = inode->i_sb->s_blocksize_bits;
+ blkbits = fc->blkbits;
stat->blksize = 1 << blkbits;
}
@@ -1377,6 +1377,7 @@ retry:
generic_fillattr(idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
+ stat->blksize = 1 << fi->cached_i_blkbits;
if (test_bit(FUSE_I_BTIME, &fi->state)) {
stat->btime = fi->i_btime;
stat->result_mask |= STATX_BTIME;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5525a4520b0f..4adcf09d4b01 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2960,7 +2960,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
.nodeid_out = ff_out->nodeid,
.fh_out = ff_out->fh,
.off_out = pos_out,
- .len = len,
+ .len = min_t(size_t, len, UINT_MAX & PAGE_MASK),
.flags = flags
};
struct fuse_write_out outarg;
@@ -3026,6 +3026,9 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
fc->no_copy_file_range = 1;
err = -EOPNOTSUPP;
}
+ if (!err && outarg.size > len)
+ err = -EIO;
+
if (err)
goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ec248d13c8bf..cc428d04be3e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -210,6 +210,12 @@ struct fuse_inode {
/** Reference to backing file in passthrough mode */
struct fuse_backing *fb;
#endif
+
+ /*
+ * The underlying inode->i_blkbits value will not be modified,
+ * so preserve the blocksize specified by the server.
+ */
+ u8 cached_i_blkbits;
};
/** FUSE inode state bits */
@@ -969,6 +975,14 @@ struct fuse_conn {
/* Request timeout (in jiffies). 0 = no timeout */
unsigned int req_timeout;
} timeout;
+
+ /*
+ * This is a workaround until fuse uses iomap for reads.
+ * For fuseblk servers, this represents the blocksize passed in at
+ * mount time and for regular fuse servers, this is equivalent to
+ * inode->i_blkbits.
+ */
+ u8 blkbits;
};
/*
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 67c2318bfc42..7ddfd2b3cc9c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -289,6 +289,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
}
}
+ if (attr->blksize)
+ fi->cached_i_blkbits = ilog2(attr->blksize);
+ else
+ fi->cached_i_blkbits = fc->blkbits;
+
/*
* Don't set the sticky bit in i_mode, unless we want the VFS
* to check permissions. This prevents failures due to the
@@ -1805,10 +1810,21 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
err = -EINVAL;
if (!sb_set_blocksize(sb, ctx->blksize))
goto err;
+ /*
+ * This is a workaround until fuse hooks into iomap for reads.
+ * Use PAGE_SIZE for the blocksize else if the writeback cache
+ * is enabled, buffered writes go through iomap and a read may
+ * overwrite partially written data if blocksize < PAGE_SIZE
+ */
+ fc->blkbits = sb->s_blocksize_bits;
+ if (ctx->blksize != PAGE_SIZE &&
+ !sb_set_blocksize(sb, PAGE_SIZE))
+ goto err;
#endif
} else {
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
+ fc->blkbits = sb->s_blocksize_bits;
}
sb->s_subtype = ctx->subtype;
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 607ef735ad4a..eb97ac009e75 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -237,6 +237,11 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
if (!file)
goto out;
+ /* read/write/splice/mmap passthrough only relevant for regular files */
+ res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL;
+ if (!d_is_reg(file->f_path.dentry))
+ goto out_fput;
+
backing_sb = file_inode(file)->i_sb;
res = -ELOOP;
if (backing_sb->s_stack_depth >= fc->max_stack_depth)
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index c826e7ca49f5..76c8fd0bfc75 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1016,7 +1016,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
if (kaddr)
*kaddr = fs->window_kaddr + offset;
if (pfn)
- *pfn = fs->window_phys_addr + offset;
+ *pfn = PHYS_PFN(fs->window_phys_addr + offset);
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index a6c692cac616..9adf36e6364b 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -70,6 +70,24 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
!list_empty(&of->list));
}
+/* Get active reference to kernfs node for an open file */
+static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
+{
+ /* Skip if file was already released */
+ if (unlikely(of->released))
+ return NULL;
+
+ if (!kernfs_get_active(of->kn))
+ return NULL;
+
+ return of;
+}
+
+static void kernfs_put_active_of(struct kernfs_open_file *of)
+{
+ return kernfs_put_active(of->kn);
+}
+
/**
* kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
*
@@ -139,7 +157,7 @@ static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
if (ops->seq_stop)
ops->seq_stop(sf, v);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
}
static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
@@ -152,7 +170,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
return ERR_PTR(-ENODEV);
ops = kernfs_ops(of->kn);
@@ -238,7 +256,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
- if (!kernfs_get_active(of->kn)) {
+ if (!kernfs_get_active_of(of)) {
len = -ENODEV;
mutex_unlock(&of->mutex);
goto out_free;
@@ -252,7 +270,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
else
len = -EINVAL;
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
mutex_unlock(&of->mutex);
if (len < 0)
@@ -323,7 +341,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
- if (!kernfs_get_active(of->kn)) {
+ if (!kernfs_get_active_of(of)) {
mutex_unlock(&of->mutex);
len = -ENODEV;
goto out_free;
@@ -335,7 +353,7 @@ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
else
len = -EINVAL;
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
mutex_unlock(&of->mutex);
if (len > 0)
@@ -357,13 +375,13 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
if (!of->vm_ops)
return;
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
return;
if (of->vm_ops->open)
of->vm_ops->open(vma);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
}
static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
@@ -375,14 +393,14 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
if (!of->vm_ops)
return VM_FAULT_SIGBUS;
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
return VM_FAULT_SIGBUS;
ret = VM_FAULT_SIGBUS;
if (of->vm_ops->fault)
ret = of->vm_ops->fault(vmf);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
return ret;
}
@@ -395,7 +413,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
if (!of->vm_ops)
return VM_FAULT_SIGBUS;
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
return VM_FAULT_SIGBUS;
ret = 0;
@@ -404,7 +422,7 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
else
file_update_time(file);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
return ret;
}
@@ -418,14 +436,14 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
if (!of->vm_ops)
return -EINVAL;
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
return -EINVAL;
ret = -EINVAL;
if (of->vm_ops->access)
ret = of->vm_ops->access(vma, addr, buf, len, write);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
return ret;
}
@@ -455,7 +473,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
mutex_lock(&of->mutex);
rc = -ENODEV;
- if (!kernfs_get_active(of->kn))
+ if (!kernfs_get_active_of(of))
goto out_unlock;
ops = kernfs_ops(of->kn);
@@ -490,7 +508,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
}
vma->vm_ops = &kernfs_vm_ops;
out_put:
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
out_unlock:
mutex_unlock(&of->mutex);
@@ -852,7 +870,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
__poll_t ret;
- if (!kernfs_get_active(kn))
+ if (!kernfs_get_active_of(of))
return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
if (kn->attr.ops->poll)
@@ -860,7 +878,7 @@ static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
else
ret = kernfs_generic_poll(of, wait);
- kernfs_put_active(kn);
+ kernfs_put_active_of(of);
return ret;
}
@@ -875,7 +893,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
* the ops aren't called concurrently for the same open file.
*/
mutex_lock(&of->mutex);
- if (!kernfs_get_active(of->kn)) {
+ if (!kernfs_get_active_of(of)) {
mutex_unlock(&of->mutex);
return -ENODEV;
}
@@ -886,7 +904,7 @@ static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
else
ret = generic_file_llseek(file, offset, whence);
- kernfs_put_active(of->kn);
+ kernfs_put_active_of(of);
mutex_unlock(&of->mutex);
return ret;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index ae6d1312b184..51f77c65c0c6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2455,7 +2455,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
return ERR_PTR(-EINVAL);
}
- if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (__has_locked_children(old_mnt, path->dentry))
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8fb4a950dd55..4e3dcc157a83 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -888,6 +888,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
if (fsinfo->xattr_support)
server->caps |= NFS_CAP_XATTR;
+ else
+ server->caps &= ~NFS_CAP_XATTR;
#endif
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 86e36c630f09..8059ece82468 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -28,6 +28,7 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/gfp.h>
+#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/compaction.h>
@@ -280,6 +281,37 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL_GPL(nfs_file_fsync);
+void nfs_truncate_last_folio(struct address_space *mapping, loff_t from,
+ loff_t to)
+{
+ struct folio *folio;
+
+ if (from >= to)
+ return;
+
+ folio = filemap_lock_folio(mapping, from >> PAGE_SHIFT);
+ if (IS_ERR(folio))
+ return;
+
+ if (folio_mkclean(folio))
+ folio_mark_dirty(folio);
+
+ if (folio_test_uptodate(folio)) {
+ loff_t fpos = folio_pos(folio);
+ size_t offset = from - fpos;
+ size_t end = folio_size(folio);
+
+ if (to - fpos < end)
+ end = to - fpos;
+ folio_zero_segment(folio, offset, end);
+ trace_nfs_size_truncate_folio(mapping->host, to);
+ }
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+EXPORT_SYMBOL_GPL(nfs_truncate_last_folio);
+
/*
* Decide whether a read/modify/write cycle may be more efficient
* then a modify/write/read cycle when writing to a page in the
@@ -356,6 +388,7 @@ static int nfs_write_begin(const struct kiocb *iocb,
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
+ nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos);
fgp |= fgf_set_order(len);
start:
@@ -442,10 +475,11 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
- if (offset != 0 || length < folio_size(folio))
- return;
/* Cancel any unstarted writes on this page */
- nfs_wb_folio_cancel(inode, folio);
+ if (offset != 0 || length < folio_size(folio))
+ nfs_wb_folio(inode, folio);
+ else
+ nfs_wb_folio_cancel(inode, folio);
folio_wait_private_2(folio); /* [DEPRECATED] */
trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 8dc921d83538..9edb5f9b0c4e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -293,7 +293,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
struct pnfs_layout_segment *l2)
{
const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
- const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
+ const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2);
u32 i;
if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
@@ -773,8 +773,11 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
continue;
if (check_device &&
- nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
+ nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) {
+ // reinitialize the error state in case if this is the last iteration
+ ds = ERR_PTR(-EINVAL);
continue;
+ }
*best_idx = idx;
break;
@@ -804,7 +807,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
struct nfs4_pnfs_ds *ds;
ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
- if (ds)
+ if (!IS_ERR(ds))
return ds;
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
}
@@ -818,7 +821,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
best_idx);
- if (ds || !pgio->pg_mirror_idx)
+ if (!IS_ERR(ds) || !pgio->pg_mirror_idx)
return ds;
return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
}
@@ -868,7 +871,7 @@ retry:
req->wb_nio = 0;
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
- if (!ds) {
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -1072,11 +1075,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
u32 new_idx = 0;
+ struct nfs4_pnfs_ds *ds;
- if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
- ff_layout_send_layouterror(hdr->lseg);
- else
+ ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx);
+ if (IS_ERR(ds))
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
+ else
+ ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr, new_idx);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 338ef77ae423..49df9debb1a6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -716,6 +716,7 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct nfs_fattr *fattr;
+ loff_t oldsize = i_size_read(inode);
int error = 0;
nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
@@ -731,7 +732,7 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (error)
return error;
- if (attr->ia_size == i_size_read(inode))
+ if (attr->ia_size == oldsize)
attr->ia_valid &= ~ATTR_SIZE;
}
@@ -767,8 +768,10 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_nfs_setattr_enter(inode);
/* Write all dirty data */
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode)) {
+ nfs_file_block_o_direct(NFS_I(inode));
nfs_sync_inode(inode);
+ }
fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
if (fattr == NULL) {
@@ -777,8 +780,12 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
}
error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
- if (error == 0)
+ if (error == 0) {
+ if (attr->ia_valid & ATTR_SIZE)
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ attr->ia_size);
error = nfs_refresh_inode(inode, fattr);
+ }
nfs_free_fattr(fattr);
out:
trace_nfs_setattr_exit(inode, error);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 74d712b58423..c0a44f389f8f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -437,6 +437,8 @@ int nfs_file_release(struct inode *, struct file *);
int nfs_lock(struct file *, int, struct file_lock *);
int nfs_flock(struct file *, int, struct file_lock *);
int nfs_check_flags(int);
+void nfs_truncate_last_folio(struct address_space *mapping, loff_t from,
+ loff_t to);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
@@ -530,6 +532,16 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0;
}
+/* Must be called with exclusively locked inode->i_rwsem */
+static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi)
+{
+ if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
+ clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
+ inode_dio_wait(&nfsi->vfs_inode);
+ }
+}
+
+
/* namespace.c */
#define NFS_PATH_CANONICAL 1
extern char *nfs_path(char **p, struct dentry *dentry,
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index 3388faf2acb9..d275b0a250bf 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -14,15 +14,6 @@
#include "internal.h"
-/* Call with exclusively locked inode->i_rwsem */
-static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
-{
- if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) {
- clear_bit(NFS_INO_ODIRECT, &nfsi->flags);
- inode_dio_wait(inode);
- }
-}
-
/**
* nfs_start_io_read - declare the file is being used for buffered reads
* @inode: file inode
@@ -57,7 +48,7 @@ nfs_start_io_read(struct inode *inode)
err = down_write_killable(&inode->i_rwsem);
if (err)
return err;
- nfs_block_o_direct(nfsi, inode);
+ nfs_file_block_o_direct(nfsi);
downgrade_write(&inode->i_rwsem);
return 0;
@@ -90,7 +81,7 @@ nfs_start_io_write(struct inode *inode)
err = down_write_killable(&inode->i_rwsem);
if (!err)
- nfs_block_o_direct(NFS_I(inode), inode);
+ nfs_file_block_o_direct(NFS_I(inode));
return err;
}
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index bd5fca285899..97abf62f109d 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -180,10 +180,8 @@ static void nfs_local_probe(struct nfs_client *clp)
return;
}
- if (nfs_client_is_local(clp)) {
- /* If already enabled, disable and re-enable */
- nfs_localio_disable_client(clp);
- }
+ if (nfs_client_is_local(clp))
+ return;
if (!nfs_uuid_begin(&clp->cl_uuid))
return;
@@ -244,7 +242,8 @@ __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
case -ENOMEM:
case -ENXIO:
case -ENOENT:
- /* Revalidate localio, will disable if unsupported */
+ /* Revalidate localio */
+ nfs_localio_disable_client(clp);
nfs_local_probe(clp);
}
}
@@ -453,12 +452,13 @@ static void nfs_local_call_read(struct work_struct *work)
nfs_local_iter_init(&iter, iocb, READ);
status = filp->f_op->read_iter(&iocb->kiocb, &iter);
+
+ revert_creds(save_cred);
+
if (status != -EIOCBQUEUED) {
nfs_local_read_done(iocb, status);
nfs_local_pgio_release(iocb);
}
-
- revert_creds(save_cred);
}
static int
@@ -648,14 +648,15 @@ static void nfs_local_call_write(struct work_struct *work)
file_start_write(filp);
status = filp->f_op->write_iter(&iocb->kiocb, &iter);
file_end_write(filp);
+
+ revert_creds(save_cred);
+ current->flags = old_flags;
+
if (status != -EIOCBQUEUED) {
nfs_local_write_done(iocb, status);
nfs_local_vfs_getattr(iocb);
nfs_local_pgio_release(iocb);
}
-
- revert_creds(save_cred);
- current->flags = old_flags;
}
static int
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 01c01f45358b..6a0b5871ba3b 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -114,6 +114,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
exception.inode = inode;
exception.state = lock->open_context->state;
+ nfs_file_block_o_direct(NFS_I(inode));
err = nfs_sync_inode(inode);
if (err)
goto out;
@@ -137,6 +138,7 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
};
struct inode *inode = file_inode(filep);
+ loff_t oldsize = i_size_read(inode);
int err;
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
@@ -145,7 +147,11 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
- if (err == -EOPNOTSUPP)
+
+ if (err == 0)
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ offset + len);
+ else if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE |
NFS_CAP_ZERO_RANGE);
@@ -183,6 +189,7 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE],
};
struct inode *inode = file_inode(filep);
+ loff_t oldsize = i_size_read(inode);
int err;
if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE))
@@ -191,9 +198,11 @@ int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
- if (err == 0)
+ if (err == 0) {
+ nfs_truncate_last_folio(inode->i_mapping, oldsize,
+ offset + len);
truncate_pagecache_range(inode, offset, (offset + len) -1);
- if (err == -EOPNOTSUPP)
+ } else if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE;
inode_unlock(inode);
@@ -354,22 +363,27 @@ out:
/**
* nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
- * @inode: pointer to destination inode
+ * @file: pointer to destination file
* @pos: destination offset
* @len: copy length
+ * @oldsize: length of the file prior to clone/copy
*
* Punch a hole in the inode page cache, so that the NFS client will
* know to retrieve new data.
* Update the file size if necessary, and then mark the inode as having
* invalid cached values for change attribute, ctime, mtime and space used.
*/
-static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+static void nfs42_copy_dest_done(struct file *file, loff_t pos, loff_t len,
+ loff_t oldsize)
{
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = file->f_mapping;
loff_t newsize = pos + len;
loff_t end = newsize - 1;
- WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_SHIFT, end >> PAGE_SHIFT));
+ nfs_truncate_last_folio(mapping, oldsize, pos);
+ WARN_ON_ONCE(invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
+ end >> PAGE_SHIFT));
spin_lock(&inode->i_lock);
if (newsize > i_size_read(inode))
@@ -402,6 +416,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_server *src_server = NFS_SERVER(src_inode);
loff_t pos_src = args->src_pos;
loff_t pos_dst = args->dst_pos;
+ loff_t oldsize_dst = i_size_read(dst_inode);
size_t count = args->count;
ssize_t status;
@@ -430,6 +445,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
return status;
}
+ nfs_file_block_o_direct(NFS_I(dst_inode));
status = nfs_sync_inode(dst_inode);
if (status)
return status;
@@ -475,7 +491,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
goto out;
}
- nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+ nfs42_copy_dest_done(dst, pos_dst, res->write_res.count, oldsize_dst);
nfs_invalidate_atime(src_inode);
status = res->write_res.count;
out:
@@ -1242,6 +1258,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct nfs42_clone_res res = {
.server = server,
};
+ loff_t oldsize_dst = i_size_read(dst_inode);
int status;
msg->rpc_argp = &args;
@@ -1276,7 +1293,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
/* a zero-length count means clone to EOF in src */
if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
- nfs42_copy_dest_done(dst_inode, dst_offset, count);
+ nfs42_copy_dest_done(dst_f, dst_offset, count, oldsize_dst);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 1d6b5f4230c9..c9a0d1e420c6 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -278,9 +278,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
lock_two_nondirectories(src_inode, dst_inode);
/* flush all pending writes on both src and dst so that server
* has the latest data */
+ nfs_file_block_o_direct(NFS_I(src_inode));
ret = nfs_sync_inode(src_inode);
if (ret)
goto out_unlock;
+ nfs_file_block_o_direct(NFS_I(dst_inode));
ret = nfs_sync_inode(dst_inode);
if (ret)
goto out_unlock;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7d2b67e06cc3..ce61253efd45 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4013,8 +4013,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
res.attr_bitmask[2];
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
- server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
- NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+ server->caps &=
+ ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS |
+ NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS |
+ NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME);
server->fattr_valid = NFS_ATTR_FATTR_V4;
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
@@ -4092,7 +4094,6 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
};
int err;
- nfs_server_set_init_caps(server);
do {
err = nfs4_handle_exception(server,
_nfs4_server_capabilities(server, fhandle),
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 96b1323318c2..627115179795 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -272,6 +272,7 @@ DECLARE_EVENT_CLASS(nfs_update_size_class,
TP_ARGS(inode, new_size))
DEFINE_NFS_UPDATE_SIZE_EVENT(truncate);
+DEFINE_NFS_UPDATE_SIZE_EVENT(truncate_folio);
DEFINE_NFS_UPDATE_SIZE_EVENT(wcc);
DEFINE_NFS_UPDATE_SIZE_EVENT(update);
DEFINE_NFS_UPDATE_SIZE_EVENT(grow);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8b7c04737967..647c53d1418a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -237,59 +237,17 @@ static void nfs_mapping_set_error(struct folio *folio, int error)
}
/*
- * nfs_page_group_search_locked
- * @head - head request of page group
- * @page_offset - offset into page
+ * nfs_page_covers_folio
+ * @req: struct nfs_page
*
- * Search page group with head @head to find a request that contains the
- * page offset @page_offset.
- *
- * Returns a pointer to the first matching nfs request, or NULL if no
- * match is found.
- *
- * Must be called with the page group lock held
- */
-static struct nfs_page *
-nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
-{
- struct nfs_page *req;
-
- req = head;
- do {
- if (page_offset >= req->wb_pgbase &&
- page_offset < (req->wb_pgbase + req->wb_bytes))
- return req;
-
- req = req->wb_this_page;
- } while (req != head);
-
- return NULL;
-}
-
-/*
- * nfs_page_group_covers_page
- * @head - head request of page group
- *
- * Return true if the page group with head @head covers the whole page,
- * returns false otherwise
+ * Return true if the request covers the whole folio.
+ * Note that the caller should ensure all subrequests have been joined
*/
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
unsigned int len = nfs_folio_length(nfs_page_to_folio(req));
- struct nfs_page *tmp;
- unsigned int pos = 0;
-
- nfs_page_group_lock(req);
- for (;;) {
- tmp = nfs_page_group_search_locked(req->wb_head, pos);
- if (!tmp)
- break;
- pos = tmp->wb_pgbase + tmp->wb_bytes;
- }
-
- nfs_page_group_unlock(req);
- return pos >= len;
+ return req->wb_pgbase == 0 && req->wb_bytes == len;
}
/* We can set the PG_uptodate flag if we see that a write request
@@ -2045,6 +2003,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
* release it */
nfs_inode_remove_request(req);
nfs_unlock_and_release_request(req);
+ folio_cancel_dirty(folio);
}
return ret;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 930150ed5db1..ef147e8b3271 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -706,6 +706,8 @@ out:
* it not only handles the fiemap for inlined files, but also deals
* with the fast symlink, cause they have no difference for extent
* mapping per se.
+ *
+ * Must be called with ip_alloc_sem semaphore held.
*/
static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
struct fiemap_extent_info *fieinfo,
@@ -717,6 +719,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
u64 phys;
u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ lockdep_assert_held_read(&oi->ip_alloc_sem);
di = (struct ocfs2_dinode *)di_bh->b_data;
if (ocfs2_inode_is_fast_symlink(inode))
@@ -732,8 +735,11 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
phys += offsetof(struct ocfs2_dinode,
id2.i_data.id_data);
+ /* Release the ip_alloc_sem to prevent deadlock on page fault */
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
flags);
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret < 0)
return ret;
}
@@ -802,9 +808,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
-
+ /* Release the ip_alloc_sem to prevent deadlock on page fault */
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
len_bytes, fe_flags);
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret)
break;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bd0c099cfdd2..176281112273 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -393,7 +393,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
if (proc_alloc_inum(&dp->low_ino))
goto out_free_entry;
- pde_set_flags(dp);
+ if (!S_ISDIR(dp->mode))
+ pde_set_flags(dp);
write_lock(&proc_subdir_lock);
dp->parent = dir;
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 1e64a4fb6af0..0fae95cf81c4 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -87,7 +87,7 @@
#define SMB_INTERFACE_POLL_INTERVAL 600
/* maximum number of PDUs in one compound */
-#define MAX_COMPOUND 7
+#define MAX_COMPOUND 10
/*
* Default number of credits to keep available for SMB3.
@@ -1882,9 +1882,12 @@ static inline bool is_replayable_error(int error)
/* cifs_get_writable_file() flags */
-#define FIND_WR_ANY 0
-#define FIND_WR_FSUID_ONLY 1
-#define FIND_WR_WITH_DELETE 2
+enum cifs_writable_file_flags {
+ FIND_WR_ANY = 0U,
+ FIND_WR_FSUID_ONLY = (1U << 0),
+ FIND_WR_WITH_DELETE = (1U << 1),
+ FIND_WR_NO_PENDING_DELETE = (1U << 2),
+};
#define MID_FREE 0
#define MID_REQUEST_ALLOCATED 1
@@ -2343,6 +2346,8 @@ struct smb2_compound_vars {
struct kvec qi_iov;
struct kvec io_iov[SMB2_IOCTL_IOV_SIZE];
struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
+ struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE];
+ struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE];
struct kvec close_iov;
struct smb2_file_rename_info_hdr rename_info;
struct smb2_file_link_info_hdr link_info;
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 186e061068be..cb907e18cc35 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -998,7 +998,10 @@ int cifs_open(struct inode *inode, struct file *file)
/* Get the cached handle as SMB2 close is deferred */
if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) {
- rc = cifs_get_writable_path(tcon, full_path, FIND_WR_FSUID_ONLY, &cfile);
+ rc = cifs_get_writable_path(tcon, full_path,
+ FIND_WR_FSUID_ONLY |
+ FIND_WR_NO_PENDING_DELETE,
+ &cfile);
} else {
rc = cifs_get_readable_path(tcon, full_path, &cfile);
}
@@ -2530,6 +2533,9 @@ refind_writable:
continue;
if (with_delete && !(open_file->fid.access & DELETE))
continue;
+ if ((flags & FIND_WR_NO_PENDING_DELETE) &&
+ open_file->status_file_deleted)
+ continue;
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
if (!open_file->invalidHandle) {
/* found a good writable file */
@@ -2647,6 +2653,16 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
spin_unlock(&tcon->open_file_lock);
free_dentry_path(page);
*ret_file = find_readable_file(cinode, 0);
+ if (*ret_file) {
+ spin_lock(&cinode->open_file_lock);
+ if ((*ret_file)->status_file_deleted) {
+ spin_unlock(&cinode->open_file_lock);
+ cifsFileInfo_put(*ret_file);
+ *ret_file = NULL;
+ } else {
+ spin_unlock(&cinode->open_file_lock);
+ }
+ }
return *ret_file ? 0 : -ENOENT;
}
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index fe453a4b3dc8..11d442e8b3d6 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1931,7 +1931,7 @@ cifs_drop_nlink(struct inode *inode)
* but will return the EACCES to the caller. Note that the VFS does not call
* unlink on negative dentries currently.
*/
-int cifs_unlink(struct inode *dir, struct dentry *dentry)
+static int __cifs_unlink(struct inode *dir, struct dentry *dentry, bool sillyrename)
{
int rc = 0;
unsigned int xid;
@@ -2003,7 +2003,11 @@ retry_std_delete:
goto psx_del_no_retry;
}
- rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry);
+ if (sillyrename || (server->vals->protocol_id > SMB10_PROT_ID &&
+ d_is_positive(dentry) && d_count(dentry) > 2))
+ rc = -EBUSY;
+ else
+ rc = server->ops->unlink(xid, tcon, full_path, cifs_sb, dentry);
psx_del_no_retry:
if (!rc) {
@@ -2071,6 +2075,11 @@ unlink_out:
return rc;
}
+int cifs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ return __cifs_unlink(dir, dentry, false);
+}
+
static int
cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
const char *full_path, struct cifs_sb_info *cifs_sb,
@@ -2358,14 +2367,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb);
cifs_put_tlink(tlink);
+ cifsInode = CIFS_I(d_inode(direntry));
+
if (!rc) {
+ set_bit(CIFS_INO_DELETE_PENDING, &cifsInode->flags);
spin_lock(&d_inode(direntry)->i_lock);
i_size_write(d_inode(direntry), 0);
clear_nlink(d_inode(direntry));
spin_unlock(&d_inode(direntry)->i_lock);
}
- cifsInode = CIFS_I(d_inode(direntry));
/* force revalidate to go get info when needed */
cifsInode->time = 0;
@@ -2458,8 +2469,11 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
do_rename_exit:
- if (rc == 0)
+ if (rc == 0) {
d_move(from_dentry, to_dentry);
+ /* Force a new lookup */
+ d_drop(from_dentry);
+ }
cifs_put_tlink(tlink);
return rc;
}
@@ -2470,6 +2484,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
struct dentry *target_dentry, unsigned int flags)
{
const char *from_name, *to_name;
+ struct TCP_Server_Info *server;
void *page1, *page2;
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
@@ -2505,6 +2520,7 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
if (IS_ERR(tlink))
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
+ server = tcon->ses->server;
page1 = alloc_dentry_path();
page2 = alloc_dentry_path();
@@ -2591,19 +2607,53 @@ cifs_rename2(struct mnt_idmap *idmap, struct inode *source_dir,
unlink_target:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
-
- /* Try unlinking the target dentry if it's not negative */
- if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
- if (d_is_dir(target_dentry))
- tmprc = cifs_rmdir(target_dir, target_dentry);
- else
- tmprc = cifs_unlink(target_dir, target_dentry);
- if (tmprc)
- goto cifs_rename_exit;
- rc = cifs_do_rename(xid, source_dentry, from_name,
- target_dentry, to_name);
- if (!rc)
- rehash = false;
+ if (d_really_is_positive(target_dentry)) {
+ if (!rc) {
+ struct inode *inode = d_inode(target_dentry);
+ /*
+ * Samba and ksmbd servers allow renaming a target
+ * directory that is open, so make sure to update
+ * ->i_nlink and then mark it as delete pending.
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ drop_cached_dir_by_name(xid, tcon, to_name, cifs_sb);
+ spin_lock(&inode->i_lock);
+ i_size_write(inode, 0);
+ clear_nlink(inode);
+ spin_unlock(&inode->i_lock);
+ set_bit(CIFS_INO_DELETE_PENDING, &CIFS_I(inode)->flags);
+ CIFS_I(inode)->time = 0; /* force reval */
+ inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ }
+ } else if (rc == -EACCES || rc == -EEXIST) {
+ /*
+ * Rename failed, possibly due to a busy target.
+ * Retry it by unliking the target first.
+ */
+ if (d_is_dir(target_dentry)) {
+ tmprc = cifs_rmdir(target_dir, target_dentry);
+ } else {
+ tmprc = __cifs_unlink(target_dir, target_dentry,
+ server->vals->protocol_id > SMB10_PROT_ID);
+ }
+ if (tmprc) {
+ /*
+ * Some servers will return STATUS_ACCESS_DENIED
+ * or STATUS_DIRECTORY_NOT_EMPTY when failing to
+ * rename a non-empty directory. Make sure to
+ * propagate the appropriate error back to
+ * userspace.
+ */
+ if (tmprc == -EEXIST || tmprc == -ENOTEMPTY)
+ rc = tmprc;
+ goto cifs_rename_exit;
+ }
+ rc = cifs_do_rename(xid, source_dentry, from_name,
+ target_dentry, to_name);
+ if (!rc)
+ rehash = false;
+ }
}
/* force revalidate to go get info when needed */
@@ -2629,6 +2679,8 @@ cifs_dentry_needs_reval(struct dentry *dentry)
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct cached_fid *cfid = NULL;
+ if (test_bit(CIFS_INO_DELETE_PENDING, &cifs_i->flags))
+ return false;
if (cifs_i->time == 0)
return true;
diff --git a/fs/smb/client/smb2glob.h b/fs/smb/client/smb2glob.h
index 224495322a05..e56e4d402f13 100644
--- a/fs/smb/client/smb2glob.h
+++ b/fs/smb/client/smb2glob.h
@@ -30,10 +30,9 @@ enum smb2_compound_ops {
SMB2_OP_QUERY_DIR,
SMB2_OP_MKDIR,
SMB2_OP_RENAME,
- SMB2_OP_DELETE,
SMB2_OP_HARDLINK,
SMB2_OP_SET_EOF,
- SMB2_OP_RMDIR,
+ SMB2_OP_UNLINK,
SMB2_OP_POSIX_QUERY_INFO,
SMB2_OP_SET_REPARSE,
SMB2_OP_GET_REPARSE,
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 31c13fb5b85b..7cadc8ca4f55 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -346,9 +346,6 @@ replay_again:
trace_smb3_posix_query_info_compound_enter(xid, tcon->tid,
ses->Suid, full_path);
break;
- case SMB2_OP_DELETE:
- trace_smb3_delete_enter(xid, tcon->tid, ses->Suid, full_path);
- break;
case SMB2_OP_MKDIR:
/*
* Directories are created through parameters in the
@@ -356,23 +353,40 @@ replay_again:
*/
trace_smb3_mkdir_enter(xid, tcon->tid, ses->Suid, full_path);
break;
- case SMB2_OP_RMDIR:
- rqst[num_rqst].rq_iov = &vars->si_iov[0];
+ case SMB2_OP_UNLINK:
+ rqst[num_rqst].rq_iov = vars->unlink_iov;
rqst[num_rqst].rq_nvec = 1;
size[0] = 1; /* sizeof __u8 See MS-FSCC section 2.4.11 */
data[0] = &delete_pending[0];
- rc = SMB2_set_info_init(tcon, server,
- &rqst[num_rqst], COMPOUND_FID,
- COMPOUND_FID, current->tgid,
- FILE_DISPOSITION_INFORMATION,
- SMB2_O_INFO_FILE, 0, data, size);
- if (rc)
+ if (cfile) {
+ rc = SMB2_set_info_init(tcon, server,
+ &rqst[num_rqst],
+ cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid,
+ current->tgid,
+ FILE_DISPOSITION_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ data, size);
+ } else {
+ rc = SMB2_set_info_init(tcon, server,
+ &rqst[num_rqst],
+ COMPOUND_FID,
+ COMPOUND_FID,
+ current->tgid,
+ FILE_DISPOSITION_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ data, size);
+ }
+ if (!rc && (!cfile || num_rqst > 1)) {
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst]);
+ } else if (rc) {
goto finished;
- smb2_set_next_command(tcon, &rqst[num_rqst]);
- smb2_set_related(&rqst[num_rqst++]);
- trace_smb3_rmdir_enter(xid, tcon->tid, ses->Suid, full_path);
+ }
+ num_rqst++;
+ trace_smb3_unlink_enter(xid, tcon->tid, ses->Suid, full_path);
break;
case SMB2_OP_SET_EOF:
rqst[num_rqst].rq_iov = &vars->si_iov[0];
@@ -442,7 +456,7 @@ replay_again:
ses->Suid, full_path);
break;
case SMB2_OP_RENAME:
- rqst[num_rqst].rq_iov = &vars->si_iov[0];
+ rqst[num_rqst].rq_iov = vars->rename_iov;
rqst[num_rqst].rq_nvec = 2;
len = in_iov[i].iov_len;
@@ -732,19 +746,6 @@ finished:
trace_smb3_posix_query_info_compound_done(xid, tcon->tid,
ses->Suid);
break;
- case SMB2_OP_DELETE:
- if (rc)
- trace_smb3_delete_err(xid, tcon->tid, ses->Suid, rc);
- else {
- /*
- * If dentry (hence, inode) is NULL, lease break is going to
- * take care of degrading leases on handles for deleted files.
- */
- if (inode)
- cifs_mark_open_handles_for_deleted_file(inode, full_path);
- trace_smb3_delete_done(xid, tcon->tid, ses->Suid);
- }
- break;
case SMB2_OP_MKDIR:
if (rc)
trace_smb3_mkdir_err(xid, tcon->tid, ses->Suid, rc);
@@ -765,11 +766,11 @@ finished:
trace_smb3_rename_done(xid, tcon->tid, ses->Suid);
SMB2_set_info_free(&rqst[num_rqst++]);
break;
- case SMB2_OP_RMDIR:
- if (rc)
- trace_smb3_rmdir_err(xid, tcon->tid, ses->Suid, rc);
+ case SMB2_OP_UNLINK:
+ if (!rc)
+ trace_smb3_unlink_done(xid, tcon->tid, ses->Suid);
else
- trace_smb3_rmdir_done(xid, tcon->tid, ses->Suid);
+ trace_smb3_unlink_err(xid, tcon->tid, ses->Suid, rc);
SMB2_set_info_free(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_EOF:
@@ -1166,7 +1167,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE);
return smb2_compound_op(xid, tcon, cifs_sb,
name, &oparms, NULL,
- &(int){SMB2_OP_RMDIR}, 1,
+ &(int){SMB2_OP_UNLINK}, 1,
NULL, NULL, NULL, NULL);
}
@@ -1175,20 +1176,29 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb, struct dentry *dentry)
{
struct cifs_open_parms oparms;
+ struct inode *inode = NULL;
+ int rc;
- oparms = CIFS_OPARMS(cifs_sb, tcon, name,
- DELETE, FILE_OPEN,
- CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
- ACL_NO_MODE);
- int rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
- NULL, &(int){SMB2_OP_DELETE}, 1,
- NULL, NULL, NULL, dentry);
+ if (dentry)
+ inode = d_inode(dentry);
+
+ oparms = CIFS_OPARMS(cifs_sb, tcon, name, DELETE,
+ FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
+ NULL, &(int){SMB2_OP_UNLINK},
+ 1, NULL, NULL, NULL, dentry);
if (rc == -EINVAL) {
cifs_dbg(FYI, "invalid lease key, resending request without lease");
rc = smb2_compound_op(xid, tcon, cifs_sb, name, &oparms,
- NULL, &(int){SMB2_OP_DELETE}, 1,
- NULL, NULL, NULL, NULL);
+ NULL, &(int){SMB2_OP_UNLINK},
+ 1, NULL, NULL, NULL, NULL);
}
+ /*
+ * If dentry (hence, inode) is NULL, lease break is going to
+ * take care of degrading leases on handles for deleted files.
+ */
+ if (!rc && inode)
+ cifs_mark_open_handles_for_deleted_file(inode, name);
return rc;
}
@@ -1441,3 +1451,113 @@ out:
cifs_free_open_info(&data);
return rc;
}
+
+static inline __le16 *utf16_smb2_path(struct cifs_sb_info *cifs_sb,
+ const char *name, size_t namelen)
+{
+ int len;
+
+ if (*name == '\\' ||
+ (cifs_sb_master_tlink(cifs_sb) &&
+ cifs_sb_master_tcon(cifs_sb)->posix_extensions && *name == '/'))
+ name++;
+ return cifs_strndup_to_utf16(name, namelen, &len,
+ cifs_sb->local_nls,
+ cifs_remap(cifs_sb));
+}
+
+int smb2_rename_pending_delete(const char *full_path,
+ struct dentry *dentry,
+ const unsigned int xid)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(d_inode(dentry)->i_sb);
+ struct cifsInodeInfo *cinode = CIFS_I(d_inode(dentry));
+ __le16 *utf16_path __free(kfree) = NULL;
+ __u32 co = file_create_options(dentry);
+ int cmds[] = {
+ SMB2_OP_SET_INFO,
+ SMB2_OP_RENAME,
+ SMB2_OP_UNLINK,
+ };
+ const int num_cmds = ARRAY_SIZE(cmds);
+ char *to_name __free(kfree) = NULL;
+ __u32 attrs = cinode->cifsAttrs;
+ struct cifs_open_parms oparms;
+ static atomic_t sillycounter;
+ struct cifsFileInfo *cfile;
+ struct tcon_link *tlink;
+ struct cifs_tcon *tcon;
+ struct kvec iov[2];
+ const char *ppath;
+ void *page;
+ size_t len;
+ int rc;
+
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+ tcon = tlink_tcon(tlink);
+
+ page = alloc_dentry_path();
+
+ ppath = build_path_from_dentry(dentry->d_parent, page);
+ if (IS_ERR(ppath)) {
+ rc = PTR_ERR(ppath);
+ goto out;
+ }
+
+ len = strlen(ppath) + strlen("/.__smb1234") + 1;
+ to_name = kmalloc(len, GFP_KERNEL);
+ if (!to_name) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ scnprintf(to_name, len, "%s%c.__smb%04X", ppath, CIFS_DIR_SEP(cifs_sb),
+ atomic_inc_return(&sillycounter) & 0xffff);
+
+ utf16_path = utf16_smb2_path(cifs_sb, to_name, len);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ drop_cached_dir_by_name(xid, tcon, full_path, cifs_sb);
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path,
+ DELETE | FILE_WRITE_ATTRIBUTES,
+ FILE_OPEN, co, ACL_NO_MODE);
+
+ attrs &= ~ATTR_READONLY;
+ if (!attrs)
+ attrs = ATTR_NORMAL;
+ if (d_inode(dentry)->i_nlink <= 1)
+ attrs |= ATTR_HIDDEN;
+ iov[0].iov_base = &(FILE_BASIC_INFO) {
+ .Attributes = cpu_to_le32(attrs),
+ };
+ iov[0].iov_len = sizeof(FILE_BASIC_INFO);
+ iov[1].iov_base = utf16_path;
+ iov[1].iov_len = sizeof(*utf16_path) * UniStrlen((wchar_t *)utf16_path);
+
+ cifs_get_writable_path(tcon, full_path, FIND_WR_WITH_DELETE, &cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
+ cmds, num_cmds, cfile, NULL, NULL, dentry);
+ if (rc == -EINVAL) {
+ cifs_dbg(FYI, "invalid lease key, resending request without lease\n");
+ cifs_get_writable_path(tcon, full_path,
+ FIND_WR_WITH_DELETE, &cfile);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, iov,
+ cmds, num_cmds, cfile, NULL, NULL, NULL);
+ }
+ if (!rc) {
+ set_bit(CIFS_INO_DELETE_PENDING, &cinode->flags);
+ } else {
+ cifs_tcon_dbg(FYI, "%s: failed to rename '%s' to '%s': %d\n",
+ __func__, full_path, to_name, rc);
+ rc = -EIO;
+ }
+out:
+ cifs_put_tlink(tlink);
+ free_dentry_path(page);
+ return rc;
+}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 94b1d7a395d5..e586f3f4b5c9 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2640,13 +2640,35 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst)
}
/* SMB headers in a compound are 8 byte aligned. */
- if (!IS_ALIGNED(len, 8)) {
- num_padding = 8 - (len & 7);
+ if (IS_ALIGNED(len, 8))
+ goto out;
+
+ num_padding = 8 - (len & 7);
+ if (smb3_encryption_required(tcon)) {
+ int i;
+
+ /*
+ * Flatten request into a single buffer with required padding as
+ * the encryption layer can't handle the padding iovs.
+ */
+ for (i = 1; i < rqst->rq_nvec; i++) {
+ memcpy(rqst->rq_iov[0].iov_base +
+ rqst->rq_iov[0].iov_len,
+ rqst->rq_iov[i].iov_base,
+ rqst->rq_iov[i].iov_len);
+ rqst->rq_iov[0].iov_len += rqst->rq_iov[i].iov_len;
+ }
+ memset(rqst->rq_iov[0].iov_base + rqst->rq_iov[0].iov_len,
+ 0, num_padding);
+ rqst->rq_iov[0].iov_len += num_padding;
+ rqst->rq_nvec = 1;
+ } else {
rqst->rq_iov[rqst->rq_nvec].iov_base = smb2_padding;
rqst->rq_iov[rqst->rq_nvec].iov_len = num_padding;
rqst->rq_nvec++;
- len += num_padding;
}
+ len += num_padding;
+out:
shdr->NextCommand = cpu_to_le32(len);
}
@@ -5376,6 +5398,7 @@ struct smb_version_operations smb20_operations = {
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
.is_network_name_deleted = smb2_is_network_name_deleted,
+ .rename_pending_delete = smb2_rename_pending_delete,
};
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
@@ -5481,6 +5504,7 @@ struct smb_version_operations smb21_operations = {
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
.is_network_name_deleted = smb2_is_network_name_deleted,
+ .rename_pending_delete = smb2_rename_pending_delete,
};
struct smb_version_operations smb30_operations = {
@@ -5597,6 +5621,7 @@ struct smb_version_operations smb30_operations = {
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
.is_network_name_deleted = smb2_is_network_name_deleted,
+ .rename_pending_delete = smb2_rename_pending_delete,
};
struct smb_version_operations smb311_operations = {
@@ -5713,6 +5738,7 @@ struct smb_version_operations smb311_operations = {
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
.is_network_name_deleted = smb2_is_network_name_deleted,
+ .rename_pending_delete = smb2_rename_pending_delete,
};
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 6e805ece6a7b..b3f1398c9f79 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -317,5 +317,8 @@ int posix_info_sid_size(const void *beg, const void *end);
int smb2_make_nfs_node(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
const char *full_path, umode_t mode, dev_t dev);
+int smb2_rename_pending_delete(const char *full_path,
+ struct dentry *dentry,
+ const unsigned int xid);
#endif /* _SMB2PROTO_H */
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index fe0e075bc63c..fd650e2afc76 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -669,13 +669,12 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_info_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(posix_query_info_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(hardlink_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rename_enter);
-DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(rmdir_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(unlink_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_eof_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_info_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(set_reparse_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(query_wsl_ea_compound_enter);
-DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
@@ -710,13 +709,12 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_info_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(posix_query_info_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(hardlink_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rename_done);
-DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(rmdir_done);
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(unlink_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_eof_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_info_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(set_reparse_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(get_reparse_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
-DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
@@ -756,14 +754,13 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_info_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(posix_query_info_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(hardlink_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rename_err);
-DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(rmdir_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(unlink_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_eof_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_info_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(set_reparse_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(get_reparse_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
-DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa4ffe037bc7..8720a0705900 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -18,23 +18,42 @@
#define KASAN_ABI_VERSION 5
/*
+ * Clang 22 added preprocessor macros to match GCC, in hopes of eventually
+ * dropping __has_feature support for sanitizers:
+ * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c
+ * Create these macros for older versions of clang so that it is easy to clean
+ * up once the minimum supported version of LLVM for building the kernel always
+ * creates these macros.
+ *
* Note: Checking __has_feature(*_sanitizer) is only true if the feature is
* enabled. Therefore it is not required to additionally check defined(CONFIG_*)
* to avoid adding redundant attributes in other configurations.
*/
+#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__)
+#define __SANITIZE_ADDRESS__
+#endif
+#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__)
+#define __SANITIZE_HWADDRESS__
+#endif
+#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__)
+#define __SANITIZE_THREAD__
+#endif
-#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
-/* Emulate GCC's __SANITIZE_ADDRESS__ flag */
+/*
+ * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel.
+ */
+#ifdef __SANITIZE_HWADDRESS__
#define __SANITIZE_ADDRESS__
+#endif
+
+#ifdef __SANITIZE_ADDRESS__
#define __no_sanitize_address \
__attribute__((no_sanitize("address", "hwaddress")))
#else
#define __no_sanitize_address
#endif
-#if __has_feature(thread_sanitizer)
-/* emulate gcc's __SANITIZE_THREAD__ flag */
-#define __SANITIZE_THREAD__
+#ifdef __SANITIZE_THREAD__
#define __no_sanitize_thread \
__attribute__((no_sanitize("thread")))
#else
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b91b993f58ee..487b3bf2e1ea 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -83,6 +83,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev,
extern ssize_t cpu_show_indirect_target_selection(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_vmscape(struct device *dev, struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 7fa1eb3cc823..61d50571ad88 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -171,6 +171,9 @@ int em_dev_update_perf_domain(struct device *dev,
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts);
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts);
void em_dev_unregister_perf_domain(struct device *dev);
struct em_perf_table *em_table_alloc(struct em_perf_domain *pd);
void em_table_free(struct em_perf_table *table);
@@ -350,6 +353,13 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
{
return -EINVAL;
}
+static inline
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
+{
+ return -EINVAL;
+}
static inline void em_dev_unregister_perf_domain(struct device *dev)
{
}
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de5bd76a400c..d7d757e72554 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -856,8 +856,8 @@ struct kernel_ethtool_ts_info {
enum hwtstamp_provider_qualifier phc_qualifier;
enum hwtstamp_source phc_source;
int phc_phyindex;
- enum hwtstamp_tx_types tx_types;
- enum hwtstamp_rx_filters rx_filters;
+ u32 tx_types;
+ u32 rx_filters;
};
/**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..601d036a6c78 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,7 +149,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)(1 << 12))
-/* FMODE_* bit 13 */
+/* Supports IOCB_HAS_METADATA */
+#define FMODE_HAS_METADATA ((__force fmode_t)(1 << 13))
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)(1 << 14))
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 890011071f2b..fe5ce9215821 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { }
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
-int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
unsigned long free_region_end,
@@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start,
unsigned long size)
{ }
static inline int kasan_populate_vmalloc(unsigned long start,
- unsigned long size)
+ unsigned long size, gfp_t gfp_mask)
{
return 0;
}
@@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start,
static inline void kasan_populate_early_vm_area_shadow(void *start,
unsigned long size) { }
static inline int kasan_populate_vmalloc(unsigned long start,
- unsigned long size)
+ unsigned long size, gfp_t gfp_mask)
{
return 0;
}
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 891e43a01bdc..3faa80f5d801 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1912,7 +1912,6 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
u64 tstamp;
- unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
struct work_struct destroy_work;
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 6c2f483d9828..656e784714f3 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *
nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
+#endif
+
const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
-#else
-static inline const struct nft_set_ext *
-nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
-{
- return set->ops->lookup(net, set, key);
-}
-#endif
/* called from nft_pipapo_avx2.c */
const struct nft_set_ext *
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index cc8060c017d5..99dd166c5d07 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -3,6 +3,7 @@
#define _NETNS_NFTABLES_H_
struct netns_nftables {
+ unsigned int base_seq;
u8 gencursor;
};
diff --git a/init/main.c b/init/main.c
index 0ee0ee7b7c2c..5753e9539ae6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -956,6 +956,7 @@ void start_kernel(void)
sort_main_extable();
trap_init();
mm_core_init();
+ maple_tree_init();
poking_init();
ftrace_init();
@@ -973,7 +974,6 @@ void start_kernel(void)
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
radix_tree_init();
- maple_tree_init();
/*
* Set up housekeeping before setting up workqueues to allow the unbound
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 52a5b950b2e5..af5a54b5db12 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -886,6 +886,9 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
if (req->flags & REQ_F_HAS_METADATA) {
struct io_async_rw *io = req->async_data;
+ if (!(file->f_mode & FMODE_HAS_METADATA))
+ return -EINVAL;
+
/*
* We have a union of meta fields with wpq used for buffered-io
* in io_async_rw, so fail it here.
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 269c04a24664..f6cf8c2af5f7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -62,3 +62,4 @@ CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_queue_stack_maps.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_lpm_trie.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_ringbuf.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rqspinlock.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5d1650af899d..e4568d44e827 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2366,8 +2366,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
const struct bpf_insn *insn)
{
/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
- * is not working properly, or interpreter is being used when
- * prog->jit_requested is not 0, so warn about it!
+ * is not working properly, so warn about it!
*/
WARN_ON_ONCE(1);
return 0;
@@ -2468,8 +2467,9 @@ out:
return ret;
}
-static void bpf_prog_select_func(struct bpf_prog *fp)
+static bool bpf_prog_select_interpreter(struct bpf_prog *fp)
{
+ bool select_interpreter = false;
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
u32 idx = (round_up(stack_depth, 32) / 32) - 1;
@@ -2478,15 +2478,16 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
* But for non-JITed programs, we don't need bpf_func, so no bounds
* check needed.
*/
- if (!fp->jit_requested &&
- !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) {
+ if (idx < ARRAY_SIZE(interpreters)) {
fp->bpf_func = interpreters[idx];
+ select_interpreter = true;
} else {
fp->bpf_func = __bpf_prog_ret0_warn;
}
#else
fp->bpf_func = __bpf_prog_ret0_warn;
#endif
+ return select_interpreter;
}
/**
@@ -2505,7 +2506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
/* In case of BPF to BPF calls, verifier did all the prep
* work with regards to JITing, etc.
*/
- bool jit_needed = fp->jit_requested;
+ bool jit_needed = false;
if (fp->bpf_func)
goto finalize;
@@ -2514,7 +2515,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
bpf_prog_has_kfunc_call(fp))
jit_needed = true;
- bpf_prog_select_func(fp);
+ if (!bpf_prog_select_interpreter(fp))
+ jit_needed = true;
/* eBPF JITs can rewrite the program in case constant
* blinding is active. However, in case of error during
@@ -3024,7 +3026,10 @@ EXPORT_SYMBOL_GPL(bpf_event_output);
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
- .func = NULL,
+ /* func is unused for tail_call, we set it to pass the
+ * get_helper_proto check
+ */
+ .func = BPF_PTR_POISON,
.gpl_only = false,
.ret_type = RET_VOID,
.arg1_type = ARG_PTR_TO_CTX,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index b2b7b8ec2c2a..c46360b27871 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -186,7 +186,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff xdp;
int i, nframes = 0;
- xdp_set_return_frame_no_direct();
xdp.rxq = &rxq;
for (i = 0; i < n; i++) {
@@ -231,7 +230,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
}
}
- xdp_clear_return_frame_no_direct();
stats->pass += nframes;
return nframes;
@@ -255,6 +253,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
rcu_read_lock();
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
+ xdp_set_return_frame_no_direct();
ret->xdp_n = cpu_map_bpf_prog_run_xdp(rcpu, frames, ret->xdp_n, stats);
if (unlikely(ret->skb_n))
@@ -264,6 +263,7 @@ static void cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
if (stats->redirect)
xdp_do_flush();
+ xdp_clear_return_frame_no_direct();
bpf_net_ctx_clear(bpf_net_ctx);
rcu_read_unlock();
diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c
index 94854cd9c4cc..83c4d9943084 100644
--- a/kernel/bpf/crypto.c
+++ b/kernel/bpf/crypto.c
@@ -278,7 +278,7 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx,
siv_len = siv ? __bpf_dynptr_size(siv) : 0;
src_len = __bpf_dynptr_size(src);
dst_len = __bpf_dynptr_size(dst);
- if (!src_len || !dst_len)
+ if (!src_len || !dst_len || src_len > dst_len)
return -EINVAL;
if (siv_len != ctx->siv_len)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6b4877e85a68..8af62cb243d9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1274,8 +1274,11 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
goto out;
}
- /* allocate hrtimer via map_kmalloc to use memcg accounting */
- cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
+ /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until
+ * kmalloc_nolock() is available, avoid locking issues by using
+ * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM).
+ */
+ cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node);
if (!cb) {
ret = -ENOMEM;
goto out;
@@ -3664,10 +3667,17 @@ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len
guard(pagefault)();
for (i = 0; i < XATTR_SIZE_MAX; i++) {
- for (j = 0; i + j < len && j < XATTR_SIZE_MAX; j++) {
+ for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) {
__get_kernel_nofault(&c2, s2__ign + j, char, err_out);
if (c2 == '\0')
return i;
+ /*
+ * We allow reading an extra byte from s2 (note the
+ * `i + j <= len` above) to cover the case when s2 is
+ * a suffix of the first len chars of s1.
+ */
+ if (i + j == len)
+ break;
__get_kernel_nofault(&c1, s1__ign + j, char, err_out);
if (c1 == '\0')
return -ENOENT;
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index 5ab354d55d82..a00561b1d3e5 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -471,7 +471,7 @@ queue:
* any MCS node. This is not the most elegant solution, but is
* simple enough.
*/
- if (unlikely(idx >= _Q_MAX_NODES)) {
+ if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) {
lockevent_inc(lock_no_node);
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
while (!queued_spin_trylock(lock)) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c4f69a9e9af6..9fb1f957a093 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8547,6 +8547,10 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
verifier_bug(env, "Two map pointers in a timer helper");
return -EFAULT;
}
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n");
+ return -EOPNOTSUPP;
+ }
meta->map_uid = reg->map_uid;
meta->map_ptr = map;
return 0;
@@ -11354,7 +11358,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
return -EINVAL;
*ptr = env->ops->get_func_proto(func_id, env->prog);
- return *ptr ? 0 : -EINVAL;
+ return *ptr && (*ptr)->func ? 0 : -EINVAL;
}
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index e43c6de2bce4..b82399437db0 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -39,6 +39,7 @@ enum {
dma_debug_sg,
dma_debug_coherent,
dma_debug_resource,
+ dma_debug_noncoherent,
};
enum map_err_types {
@@ -141,6 +142,7 @@ static const char *type2name[] = {
[dma_debug_sg] = "scatter-gather",
[dma_debug_coherent] = "coherent",
[dma_debug_resource] = "resource",
+ [dma_debug_noncoherent] = "noncoherent",
};
static const char *dir2name[] = {
@@ -993,7 +995,8 @@ static void check_unmap(struct dma_debug_entry *ref)
"[mapped as %s] [unmapped as %s]\n",
ref->dev_addr, ref->size,
type2name[entry->type], type2name[ref->type]);
- } else if (entry->type == dma_debug_coherent &&
+ } else if ((entry->type == dma_debug_coherent ||
+ entry->type == dma_debug_noncoherent) &&
ref->paddr != entry->paddr) {
err_printk(ref->dev, entry, "device driver frees "
"DMA memory with different CPU address "
@@ -1581,6 +1584,49 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
}
}
+void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct dma_debug_entry *entry;
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_noncoherent;
+ entry->dev = dev;
+ entry->paddr = page_to_phys(page);
+ entry->size = size;
+ entry->dev_addr = dma_addr;
+ entry->direction = direction;
+
+ add_dma_entry(entry, attrs);
+}
+
+void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+ struct dma_debug_entry ref = {
+ .type = dma_debug_noncoherent,
+ .dev = dev,
+ .paddr = page_to_phys(page),
+ .dev_addr = dma_addr,
+ .size = size,
+ .direction = direction,
+ };
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ check_unmap(&ref);
+}
+
static int __init dma_debug_driver_setup(char *str)
{
int i;
diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h
index f525197d3cae..48757ca13f31 100644
--- a/kernel/dma/debug.h
+++ b/kernel/dma/debug.h
@@ -54,6 +54,13 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev,
extern void debug_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg,
int nelems, int direction);
+extern void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs);
+extern void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr);
#else /* CONFIG_DMA_API_DEBUG */
static inline void debug_dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
@@ -126,5 +133,18 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev,
int nelems, int direction)
{
}
+
+static inline void debug_dma_alloc_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_free_pages(struct device *dev, struct page *page,
+ size_t size, int direction,
+ dma_addr_t dma_addr)
+{
+}
#endif /* CONFIG_DMA_API_DEBUG */
#endif /* _KERNEL_DMA_DEBUG_H */
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 107e4a4d251d..56de28a3b179 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -712,7 +712,7 @@ struct page *dma_alloc_pages(struct device *dev, size_t size,
if (page) {
trace_dma_alloc_pages(dev, page_to_virt(page), *dma_handle,
size, dir, gfp, 0);
- debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0);
+ debug_dma_alloc_pages(dev, page, size, dir, *dma_handle, 0);
} else {
trace_dma_alloc_pages(dev, NULL, 0, size, dir, gfp, 0);
}
@@ -738,7 +738,7 @@ void dma_free_pages(struct device *dev, size_t size, struct page *page,
dma_addr_t dma_handle, enum dma_data_direction dir)
{
trace_dma_free_pages(dev, page_to_virt(page), dma_handle, size, dir, 0);
- debug_dma_unmap_page(dev, dma_handle, size, dir);
+ debug_dma_free_pages(dev, page, size, dir, dma_handle);
__dma_free_pages(dev, size, page, dma_handle, dir);
}
EXPORT_SYMBOL_GPL(dma_free_pages);
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index ea7995a25780..8df55397414a 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -553,6 +553,30 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
const struct em_data_callback *cb,
const cpumask_t *cpus, bool microwatts)
{
+ int ret = em_dev_register_pd_no_update(dev, nr_states, cb, cpus, microwatts);
+
+ if (_is_cpu_device(dev))
+ em_check_capacity_update();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
+
+/**
+ * em_dev_register_pd_no_update() - Register a perf domain for a device
+ * @dev : Device to register the PD for
+ * @nr_states : Number of performance states in the new PD
+ * @cb : Callback functions for populating the energy model
+ * @cpus : CPUs to include in the new PD (mandatory if @dev is a CPU device)
+ * @microwatts : Whether or not the power values in the EM will be in uW
+ *
+ * Like em_dev_register_perf_domain(), but does not trigger a CPU capacity
+ * update after registering the PD, even if @dev is a CPU device.
+ */
+int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
+ const struct em_data_callback *cb,
+ const cpumask_t *cpus, bool microwatts)
+{
struct em_perf_table *em_table;
unsigned long cap, prev_cap = 0;
unsigned long flags = 0;
@@ -636,12 +660,9 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
unlock:
mutex_unlock(&em_pd_mutex);
- if (_is_cpu_device(dev))
- em_check_capacity_update();
-
return ret;
}
-EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
+EXPORT_SYMBOL_GPL(em_dev_register_pd_no_update);
/**
* em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1f1f30cca573..2f66ab453823 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -449,6 +449,7 @@ int hibernation_snapshot(int platform_mode)
shrink_shmem_memory();
console_suspend_all();
+ pm_restrict_gfp_mask();
error = dpm_suspend(PMSG_FREEZE);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 2a42c1036ea8..1e3b32b1e82c 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1397,7 +1397,8 @@ error:
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
- unregister_pm_notifier(&ftrace_suspend_notifier);
+ if (!ftrace_graph_active)
+ unregister_pm_notifier(&ftrace_suspend_notifier);
}
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1b7db732c0b1..b3c94fbaf002 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -834,7 +834,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
/* copy the current bits to the new max */
ret = trace_pid_list_first(filtered_pids, &pid);
while (!ret) {
- trace_pid_list_set(pid_list, pid);
+ ret = trace_pid_list_set(pid_list, pid);
+ if (ret < 0)
+ goto out;
+
ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
nr_pids++;
}
@@ -871,6 +874,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
trace_parser_clear(&parser);
ret = 0;
}
+ out:
trace_parser_put(&parser);
if (ret < 0) {
@@ -7209,7 +7213,7 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user
entry = ring_buffer_event_data(event);
entry->ip = ip;
- len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
+ len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
if (len) {
memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
cnt = FAULTED_SIZE;
@@ -7306,7 +7310,7 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
entry = ring_buffer_event_data(event);
- len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
+ len = copy_from_user_nofault(&entry->id, ubuf, cnt);
if (len) {
entry->id = -1;
memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index af42aaa3d172..2ab283fd3032 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -496,7 +496,7 @@ static bool user_event_enabler_queue_fault(struct user_event_mm *mm,
{
struct user_event_enabler_fault *fault;
- fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN);
+ fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT);
if (!fault)
return false;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index fd259da0aa64..337bc0eb5d71 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -2322,6 +2322,9 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
int running, err;
char *buf __free(kfree) = NULL;
+ if (count < 1)
+ return 0;
+
buf = kmalloc(count, GFP_KERNEL);
if (!buf)
return -ENOMEM;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 106ee8b0f2d5..c2e0b469fd43 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2111,6 +2111,10 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
if (!quota->ms && !quota->sz && list_empty(&quota->goals))
return;
+ /* First charge window */
+ if (!quota->total_charged_sz && !quota->charged_from)
+ quota->charged_from = jiffies;
+
/* New charge window starts */
if (time_after_eq(jiffies, quota->charged_from +
msecs_to_jiffies(quota->reset_interval))) {
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 151a9de5ad8b..b5a5ed16a7a5 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -198,6 +198,11 @@ static int damon_lru_sort_apply_parameters(void)
if (err)
return err;
+ if (!damon_lru_sort_mon_attrs.sample_interval) {
+ err = -EINVAL;
+ goto out;
+ }
+
err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs);
if (err)
goto out;
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 3c71b4596676..fb7c982a0018 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -194,6 +194,11 @@ static int damon_reclaim_apply_parameters(void)
if (err)
return err;
+ if (!damon_reclaim_mon_attrs.aggr_interval) {
+ err = -EINVAL;
+ goto out;
+ }
+
err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs);
if (err)
goto out;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 6d2b0dab50cb..7b9254cadd5f 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1260,14 +1260,18 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
{
struct damon_sysfs_kdamond *kdamond = container_of(kobj,
struct damon_sysfs_kdamond, kobj);
- struct damon_ctx *ctx = kdamond->damon_ctx;
- bool running;
+ struct damon_ctx *ctx;
+ bool running = false;
- if (!ctx)
- running = false;
- else
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+
+ ctx = kdamond->damon_ctx;
+ if (ctx)
running = damon_is_running(ctx);
+ mutex_unlock(&damon_sysfs_lock);
+
return sysfs_emit(buf, "%s\n", running ?
damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] :
damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 753f99b4c718..eed59cfb5d21 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5851,7 +5851,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
spinlock_t *ptl;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- bool adjust_reservation = false;
+ bool adjust_reservation;
unsigned long last_addr_mask;
bool force_flush = false;
@@ -5944,6 +5944,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
sz);
hugetlb_count_sub(pages_per_huge_page(h), mm);
hugetlb_remove_rmap(folio);
+ spin_unlock(ptl);
/*
* Restore the reservation for anonymous page, otherwise the
@@ -5951,14 +5952,16 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
* If there we are freeing a surplus, do not set the restore
* reservation bit.
*/
+ adjust_reservation = false;
+
+ spin_lock_irq(&hugetlb_lock);
if (!h->surplus_huge_pages && __vma_private_lock(vma) &&
folio_test_anon(folio)) {
folio_set_hugetlb_restore_reserve(folio);
/* Reservation to be adjusted after the spin lock */
adjust_reservation = true;
}
-
- spin_unlock(ptl);
+ spin_unlock_irq(&hugetlb_lock);
/*
* Adjust the reservation for the region that will have the
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index e2ceebf737ef..11d472a5c4e8 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -336,13 +336,13 @@ static void ___free_pages_bulk(struct page **pages, int nr_pages)
}
}
-static int ___alloc_pages_bulk(struct page **pages, int nr_pages)
+static int ___alloc_pages_bulk(struct page **pages, int nr_pages, gfp_t gfp_mask)
{
unsigned long nr_populated, nr_total = nr_pages;
struct page **page_array = pages;
while (nr_pages) {
- nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages);
+ nr_populated = alloc_pages_bulk(gfp_mask, nr_pages, pages);
if (!nr_populated) {
___free_pages_bulk(page_array, nr_total - nr_pages);
return -ENOMEM;
@@ -354,25 +354,42 @@ static int ___alloc_pages_bulk(struct page **pages, int nr_pages)
return 0;
}
-static int __kasan_populate_vmalloc(unsigned long start, unsigned long end)
+static int __kasan_populate_vmalloc(unsigned long start, unsigned long end, gfp_t gfp_mask)
{
unsigned long nr_pages, nr_total = PFN_UP(end - start);
struct vmalloc_populate_data data;
+ unsigned int flags;
int ret = 0;
- data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+ data.pages = (struct page **)__get_free_page(gfp_mask | __GFP_ZERO);
if (!data.pages)
return -ENOMEM;
while (nr_total) {
nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0]));
- ret = ___alloc_pages_bulk(data.pages, nr_pages);
+ ret = ___alloc_pages_bulk(data.pages, nr_pages, gfp_mask);
if (ret)
break;
data.start = start;
+
+ /*
+ * page tables allocations ignore external gfp mask, enforce it
+ * by the scope API
+ */
+ if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
+ flags = memalloc_nofs_save();
+ else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
+ flags = memalloc_noio_save();
+
ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE,
kasan_populate_vmalloc_pte, &data);
+
+ if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO)
+ memalloc_nofs_restore(flags);
+ else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0)
+ memalloc_noio_restore(flags);
+
___free_pages_bulk(data.pages, nr_pages);
if (ret)
break;
@@ -386,7 +403,7 @@ static int __kasan_populate_vmalloc(unsigned long start, unsigned long end)
return ret;
}
-int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask)
{
unsigned long shadow_start, shadow_end;
int ret;
@@ -415,7 +432,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
shadow_start = PAGE_ALIGN_DOWN(shadow_start);
shadow_end = PAGE_ALIGN(shadow_end);
- ret = __kasan_populate_vmalloc(shadow_start, shadow_end);
+ ret = __kasan_populate_vmalloc(shadow_start, shadow_end, gfp_mask);
if (ret)
return ret;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6b40bdfd224c..b486c1d19b2d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1417,8 +1417,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
*/
if (cc->is_khugepaged &&
(pte_young(pteval) || folio_test_young(folio) ||
- folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm,
- address)))
+ folio_test_referenced(folio) ||
+ mmu_notifier_test_young(vma->vm_mm, _address)))
referenced++;
}
if (!writable) {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fc30ca4804bf..df6ee59527dd 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -956,7 +956,7 @@ static const char * const action_page_types[] = {
[MF_MSG_BUDDY] = "free buddy page",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
- [MF_MSG_ALREADY_POISONED] = "already poisoned",
+ [MF_MSG_ALREADY_POISONED] = "already poisoned page",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -1349,9 +1349,10 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type,
{
trace_memory_failure_event(pfn, type, result);
- num_poisoned_pages_inc(pfn);
-
- update_per_node_mf_stats(pfn, result);
+ if (type != MF_MSG_ALREADY_POISONED) {
+ num_poisoned_pages_inc(pfn);
+ update_per_node_mf_stats(pfn, result);
+ }
pr_err("%#lx: recovery action for %s: %s\n",
pfn, action_page_types[type], action_name[result]);
@@ -2094,12 +2095,11 @@ retry:
*hugetlb = 0;
return 0;
} else if (res == -EHWPOISON) {
- pr_err("%#lx: already hardware poisoned\n", pfn);
if (flags & MF_ACTION_REQUIRED) {
folio = page_folio(p);
res = kill_accessing_process(current, folio_pfn(folio), flags);
- action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
}
+ action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
return res;
} else if (res == -EBUSY) {
if (!(flags & MF_NO_RETRY)) {
@@ -2285,7 +2285,6 @@ try_again:
goto unlock_mutex;
if (TestSetPageHWPoison(p)) {
- pr_err("%#lx: already hardware poisoned\n", pfn);
res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, pfn, flags);
@@ -2569,10 +2568,9 @@ int unpoison_memory(unsigned long pfn)
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- if (!pfn_valid(pfn))
- return -ENXIO;
-
- p = pfn_to_page(pfn);
+ p = pfn_to_online_page(pfn);
+ if (!p)
+ return -EIO;
folio = page_folio(p);
mutex_lock(&mf_mutex);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1f15af712bc3..74318c787715 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1815,8 +1815,14 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1;
if (folio_contain_hwpoisoned_page(folio)) {
- if (WARN_ON(folio_test_lru(folio)))
- folio_isolate_lru(folio);
+ /*
+ * unmap_poisoned_folio() cannot handle large folios
+ * in all cases yet.
+ */
+ if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ goto put_folio;
+ if (folio_test_lru(folio) && !folio_isolate_lru(folio))
+ goto put_folio;
if (folio_mapped(folio)) {
folio_lock(folio);
unmap_poisoned_folio(folio, pfn, false);
diff --git a/mm/mremap.c b/mm/mremap.c
index e618a706aff5..35de0a7b910e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1774,15 +1774,18 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
if (!vrm->new_len)
return -EINVAL;
- /* Is the new length or address silly? */
- if (vrm->new_len > TASK_SIZE ||
- vrm->new_addr > TASK_SIZE - vrm->new_len)
+ /* Is the new length silly? */
+ if (vrm->new_len > TASK_SIZE)
return -EINVAL;
/* Remainder of checks are for cases with specific new_addr. */
if (!vrm_implies_new_addr(vrm))
return 0;
+ /* Is the new address silly? */
+ if (vrm->new_addr > TASK_SIZE - vrm->new_len)
+ return -EINVAL;
+
/* The new address must be page-aligned. */
if (offset_in_page(vrm->new_addr))
return -EINVAL;
diff --git a/mm/percpu.c b/mm/percpu.c
index a56f35dcc417..81462ce5866e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1734,7 +1734,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
bool is_atomic;
bool do_warn;
struct obj_cgroup *objcg = NULL;
- static int warn_limit = 10;
+ static atomic_t warn_limit = ATOMIC_INIT(10);
struct pcpu_chunk *chunk, *next;
const char *err;
int slot, off, cpu, ret;
@@ -1904,13 +1904,17 @@ fail_unlock:
fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
- if (do_warn && warn_limit) {
- pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
- size, align, is_atomic, err);
- if (!is_atomic)
- dump_stack();
- if (!--warn_limit)
- pr_info("limit reached, disable warning\n");
+ if (do_warn) {
+ int remaining = atomic_dec_if_positive(&warn_limit);
+
+ if (remaining >= 0) {
+ pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
+ size, align, is_atomic, err);
+ if (!is_atomic)
+ dump_stack();
+ if (remaining == 0)
+ pr_info("limit reached, disable warning\n");
+ }
}
if (is_atomic) {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6dbcdceecae1..5edd536ba9d2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2026,6 +2026,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
if (unlikely(!vmap_initialized))
return ERR_PTR(-EBUSY);
+ /* Only reclaim behaviour flags are relevant. */
+ gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
might_sleep();
/*
@@ -2038,8 +2040,6 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
*/
va = node_alloc(size, align, vstart, vend, &addr, &vn_id);
if (!va) {
- gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
-
va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
if (unlikely(!va))
return ERR_PTR(-ENOMEM);
@@ -2089,7 +2089,7 @@ retry:
BUG_ON(va->va_start < vstart);
BUG_ON(va->va_end > vend);
- ret = kasan_populate_vmalloc(addr, size);
+ ret = kasan_populate_vmalloc(addr, size, gfp_mask);
if (ret) {
free_vmap_area(va);
return ERR_PTR(ret);
@@ -4826,7 +4826,7 @@ retry:
/* populate the kasan shadow space */
for (area = 0; area < nr_vms; area++) {
- if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
+ if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area], GFP_KERNEL))
goto err_free_shadow;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1885d0c315f0..c683baa3847f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -324,6 +324,13 @@ int br_boolopt_multi_toggle(struct net_bridge *br,
int err = 0;
int opt_id;
+ opt_id = find_next_bit(&bitmap, BITS_PER_LONG, BR_BOOLOPT_MAX);
+ if (opt_id != BITS_PER_LONG) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unknown boolean option %d",
+ opt_id);
+ return -EINVAL;
+ }
+
for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) {
bool on = !!(bm->optval & BIT(opt_id));
diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
index 39844f14eed8..797719cb227e 100644
--- a/net/can/j1939/bus.c
+++ b/net/can/j1939/bus.c
@@ -290,8 +290,11 @@ int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa)
if (!ecu)
ecu = j1939_ecu_create_locked(priv, name);
err = PTR_ERR_OR_ZERO(ecu);
- if (err)
+ if (err) {
+ if (j1939_address_is_unicast(sa))
+ priv->ents[sa].nusers--;
goto done;
+ }
ecu->nusers++;
/* TODO: do we care if ecu->addr != sa? */
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 31a93cae5111..81f58924b4ac 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -212,6 +212,7 @@ void j1939_priv_get(struct j1939_priv *priv);
/* notify/alert all j1939 sockets bound to ifindex */
void j1939_sk_netdev_event_netdown(struct j1939_priv *priv);
+void j1939_sk_netdev_event_unregister(struct j1939_priv *priv);
int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk);
void j1939_tp_init(struct j1939_priv *priv);
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 7e8a20f2fc42..3706a872ecaf 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -377,6 +377,9 @@ static int j1939_netdev_notify(struct notifier_block *nb,
j1939_sk_netdev_event_netdown(priv);
j1939_ecu_unmap_all(priv);
break;
+ case NETDEV_UNREGISTER:
+ j1939_sk_netdev_event_unregister(priv);
+ break;
}
j1939_priv_put(priv);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 3d8b588822f9..88e7160d4248 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -521,6 +521,9 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa);
if (ret) {
j1939_netdev_stop(priv);
+ jsk->priv = NULL;
+ synchronize_rcu();
+ j1939_priv_put(priv);
goto out_release_sock;
}
@@ -1300,6 +1303,55 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
read_unlock_bh(&priv->j1939_socks_lock);
}
+void j1939_sk_netdev_event_unregister(struct j1939_priv *priv)
+{
+ struct sock *sk;
+ struct j1939_sock *jsk;
+ bool wait_rcu = false;
+
+rescan: /* The caller is holding a ref on this "priv" via j1939_priv_get_by_ndev(). */
+ read_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ /* Skip if j1939_jsk_add() is not called on this socket. */
+ if (!(jsk->state & J1939_SOCK_BOUND))
+ continue;
+ sk = &jsk->sk;
+ sock_hold(sk);
+ read_unlock_bh(&priv->j1939_socks_lock);
+ /* Check if j1939_jsk_del() is not yet called on this socket after holding
+ * socket's lock, for both j1939_sk_bind() and j1939_sk_release() call
+ * j1939_jsk_del() with socket's lock held.
+ */
+ lock_sock(sk);
+ if (jsk->state & J1939_SOCK_BOUND) {
+ /* Neither j1939_sk_bind() nor j1939_sk_release() called j1939_jsk_del().
+ * Make this socket no longer bound, by pretending as if j1939_sk_bind()
+ * dropped old references but did not get new references.
+ */
+ j1939_jsk_del(priv, jsk);
+ j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
+ j1939_netdev_stop(priv);
+ /* Call j1939_priv_put() now and prevent j1939_sk_sock_destruct() from
+ * calling the corresponding j1939_priv_put().
+ *
+ * j1939_sk_sock_destruct() is supposed to call j1939_priv_put() after
+ * an RCU grace period. But since the caller is holding a ref on this
+ * "priv", we can defer synchronize_rcu() until immediately before
+ * the caller calls j1939_priv_put().
+ */
+ j1939_priv_put(priv);
+ jsk->priv = NULL;
+ wait_rcu = true;
+ }
+ release_sock(sk);
+ sock_put(sk);
+ goto rescan;
+ }
+ read_unlock_bh(&priv->j1939_socks_lock);
+ if (wait_rcu)
+ synchronize_rcu();
+}
+
static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 9c0ad7f4b5d8..ad54b12d4b4c 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -464,8 +464,15 @@ int generic_hwtstamp_get_lower(struct net_device *dev,
if (!netif_device_present(dev))
return -ENODEV;
- if (ops->ndo_hwtstamp_get)
- return dev_get_hwtstamp_phylib(dev, kernel_cfg);
+ if (ops->ndo_hwtstamp_get) {
+ int err;
+
+ netdev_lock_ops(dev);
+ err = dev_get_hwtstamp_phylib(dev, kernel_cfg);
+ netdev_unlock_ops(dev);
+
+ return err;
+ }
/* Legacy path: unconverted lower driver */
return generic_hwtstamp_ioctl_lower(dev, SIOCGHWTSTAMP, kernel_cfg);
@@ -481,8 +488,15 @@ int generic_hwtstamp_set_lower(struct net_device *dev,
if (!netif_device_present(dev))
return -ENODEV;
- if (ops->ndo_hwtstamp_set)
- return dev_set_hwtstamp_phylib(dev, kernel_cfg, extack);
+ if (ops->ndo_hwtstamp_set) {
+ int err;
+
+ netdev_lock_ops(dev);
+ err = dev_set_hwtstamp_phylib(dev, kernel_cfg, extack);
+ netdev_unlock_ops(dev);
+
+ return err;
+ }
/* Legacy path: unconverted lower driver */
return generic_hwtstamp_ioctl_lower(dev, SIOCSHWTSTAMP, kernel_cfg);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 88657255fec1..fbbc3ccf9df6 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -49,7 +49,7 @@ static bool hsr_check_carrier(struct hsr_port *master)
ASSERT_RTNL();
- hsr_for_each_port(master->hsr, port) {
+ hsr_for_each_port_rtnl(master->hsr, port) {
if (port->type != HSR_PT_MASTER && is_slave_up(port->dev)) {
netif_carrier_on(master->dev);
return true;
@@ -105,7 +105,7 @@ int hsr_get_max_mtu(struct hsr_priv *hsr)
struct hsr_port *port;
mtu_max = ETH_DATA_LEN;
- hsr_for_each_port(hsr, port)
+ hsr_for_each_port_rtnl(hsr, port)
if (port->type != HSR_PT_MASTER)
mtu_max = min(port->dev->mtu, mtu_max);
@@ -139,7 +139,7 @@ static int hsr_dev_open(struct net_device *dev)
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
@@ -172,7 +172,7 @@ static int hsr_dev_close(struct net_device *dev)
struct hsr_priv *hsr;
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
@@ -205,7 +205,7 @@ static netdev_features_t hsr_features_recompute(struct hsr_priv *hsr,
* may become enabled.
*/
features &= ~NETIF_F_ONE_FOR_ALL;
- hsr_for_each_port(hsr, port)
+ hsr_for_each_port_rtnl(hsr, port)
features = netdev_increment_features(features,
port->dev->features,
mask);
@@ -226,6 +226,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct hsr_priv *hsr = netdev_priv(dev);
struct hsr_port *master;
+ rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
if (master) {
skb->dev = master->dev;
@@ -238,6 +239,8 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
}
+ rcu_read_unlock();
+
return NETDEV_TX_OK;
}
@@ -484,7 +487,7 @@ static void hsr_set_rx_mode(struct net_device *dev)
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
@@ -506,7 +509,7 @@ static void hsr_change_rx_flags(struct net_device *dev, int change)
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
@@ -534,7 +537,7 @@ static int hsr_ndo_vlan_rx_add_vid(struct net_device *dev,
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
if (port->type == HSR_PT_MASTER ||
port->type == HSR_PT_INTERLINK)
continue;
@@ -580,7 +583,7 @@ static int hsr_ndo_vlan_rx_kill_vid(struct net_device *dev,
hsr = netdev_priv(dev);
- hsr_for_each_port(hsr, port) {
+ hsr_for_each_port_rtnl(hsr, port) {
switch (port->type) {
case HSR_PT_SLAVE_A:
case HSR_PT_SLAVE_B:
@@ -672,9 +675,14 @@ struct net_device *hsr_get_port_ndev(struct net_device *ndev,
struct hsr_priv *hsr = netdev_priv(ndev);
struct hsr_port *port;
+ rcu_read_lock();
hsr_for_each_port(hsr, port)
- if (port->type == pt)
+ if (port->type == pt) {
+ dev_hold(port->dev);
+ rcu_read_unlock();
return port->dev;
+ }
+ rcu_read_unlock();
return NULL;
}
EXPORT_SYMBOL(hsr_get_port_ndev);
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 192893c3f2ec..bc94b07101d8 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -22,7 +22,7 @@ static bool hsr_slave_empty(struct hsr_priv *hsr)
{
struct hsr_port *port;
- hsr_for_each_port(hsr, port)
+ hsr_for_each_port_rtnl(hsr, port)
if (port->type != HSR_PT_MASTER)
return false;
return true;
@@ -134,7 +134,7 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
{
struct hsr_port *port;
- hsr_for_each_port(hsr, port)
+ hsr_for_each_port_rtnl(hsr, port)
if (port->type == pt)
return port;
return NULL;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 135ec5fce019..33b0d2460c9b 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -224,6 +224,9 @@ struct hsr_priv {
#define hsr_for_each_port(hsr, port) \
list_for_each_entry_rcu((port), &(hsr)->ports, port_list)
+#define hsr_for_each_port_rtnl(hsr, port) \
+ list_for_each_entry_rcu((port), &(hsr)->ports, port_list, lockdep_rtnl_is_held())
+
struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt);
/* Caller must ensure skb is a valid HSR frame */
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index cc9915543637..2e61ac137128 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -206,6 +206,9 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
return -EINVAL;
+ if (skb_is_gso(skb))
+ skb_gso_reset(skb);
+
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
pskb_pull(skb, ETH_HLEN);
skb_reset_network_header(skb);
@@ -300,6 +303,9 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
return -EINVAL;
+ if (skb_is_gso(skb))
+ skb_gso_reset(skb);
+
skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
pskb_pull(skb, ETH_HLEN);
skb_reset_network_header(skb);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ba581785adb4..a268e1595b22 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -408,8 +408,11 @@ more_data:
if (!psock->cork) {
psock->cork = kzalloc(sizeof(*psock->cork),
GFP_ATOMIC | __GFP_NOWARN);
- if (!psock->cork)
+ if (!psock->cork) {
+ sk_msg_free(sk, msg);
+ *copied = 0;
return -ENOMEM;
+ }
}
memcpy(psock->cork, msg, sizeof(*msg));
return 0;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 2c267aff95be..2abe6f1e9940 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1532,13 +1532,12 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
{
static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
struct sock *sk = (struct sock *)msk;
+ bool keep_open;
- if (ssk->sk_prot->keepalive) {
- if (sock_flag(sk, SOCK_KEEPOPEN))
- ssk->sk_prot->keepalive(ssk, 1);
- else
- ssk->sk_prot->keepalive(ssk, 0);
- }
+ keep_open = sock_flag(sk, SOCK_KEEPOPEN);
+ if (ssk->sk_prot->keepalive)
+ ssk->sk_prot->keepalive(ssk, keep_open);
+ sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open);
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1082de09656..c3c73411c40c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1131,11 +1131,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
return ERR_PTR(-ENOENT);
}
-static __be16 nft_base_seq(const struct net *net)
+static unsigned int nft_base_seq(const struct net *net)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
+ return READ_ONCE(net->nft.base_seq);
+}
- return htons(nft_net->base_seq & 0xffff);
+static __be16 nft_base_seq_be16(const struct net *net)
+{
+ return htons(nft_base_seq(net) & 0xffff);
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
@@ -1155,7 +1158,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -1248,7 +1251,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -2030,7 +2033,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -2133,7 +2136,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3671,7 +3674,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
- nft_base_seq(net));
+ nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -3839,7 +3842,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -4050,7 +4053,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb,
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
nla_len(nla[NFTA_RULE_TABLE]),
(char *)nla_data(nla[NFTA_RULE_TABLE]),
- nft_net->base_seq);
+ nft_base_seq(net));
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
kfree(buf);
@@ -4887,7 +4890,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
flags, ctx->family, NFNETLINK_V0,
- nft_base_seq(ctx->net));
+ nft_base_seq_be16(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -5032,7 +5035,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (ctx->family != NFPROTO_UNSPEC &&
@@ -6209,7 +6212,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
@@ -6238,7 +6241,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
seq = cb->nlh->nlmsg_seq;
nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
- table->family, NFNETLINK_V0, nft_base_seq(net));
+ table->family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -6331,7 +6334,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
- NFNETLINK_V0, nft_base_seq(ctx->net));
+ NFNETLINK_V0, nft_base_seq_be16(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -6630,7 +6633,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb,
}
nelems++;
}
- audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
+ audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems);
out_unlock:
rcu_read_unlock();
@@ -8381,7 +8384,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -8446,7 +8449,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -8480,7 +8483,7 @@ cont:
idx++;
}
if (ctx->reset && entries)
- audit_log_obj_reset(table, nft_net->base_seq, entries);
+ audit_log_obj_reset(table, nft_base_seq(net), entries);
if (rc < 0)
break;
}
@@ -8649,7 +8652,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb,
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
nla_len(nla[NFTA_OBJ_TABLE]),
(char *)nla_data(nla[NFTA_OBJ_TABLE]),
- nft_net->base_seq);
+ nft_base_seq(net));
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
kfree(buf);
@@ -8754,9 +8757,8 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
char *buf = kasprintf(gfp, "%s:%u",
- table->name, nft_net->base_seq);
+ table->name, nft_base_seq(net));
audit_log_nfcfg(buf,
family,
@@ -9442,7 +9444,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -9511,7 +9513,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -9696,17 +9698,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
struct nlmsghdr *nlh;
char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
- NFNETLINK_V0, nft_base_seq(net));
+ NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) ||
nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
@@ -10968,11 +10969,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
* Bump generation counter, invalidate any dump in progress.
* Cannot fail after this point.
*/
- base_seq = READ_ONCE(nft_net->base_seq);
+ base_seq = nft_base_seq(net);
while (++base_seq == 0)
;
- WRITE_ONCE(nft_net->base_seq, base_seq);
+ /* pairs with smp_load_acquire in nft_lookup_eval */
+ smp_store_release(&net->nft.base_seq, base_seq);
gc_seq = nft_gc_seq_begin(nft_net);
@@ -11181,7 +11183,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
- nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+ nf_tables_commit_audit_log(&adl, nft_base_seq(net));
nft_gc_seq_end(nft_net, gc_seq);
nft_net->validate_state = NFT_VALIDATE_SKIP;
@@ -11506,7 +11508,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
mutex_lock(&nft_net->commit_mutex);
nft_net->tstamp = get_jiffies_64();
- genid_ok = genid == 0 || nft_net->base_seq == genid;
+ genid_ok = genid == 0 || nft_base_seq(net) == genid;
if (!genid_ok)
mutex_unlock(&nft_net->commit_mutex);
@@ -12143,7 +12145,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
- nft_net->base_seq = 1;
+ net->nft.base_seq = 1;
nft_net->gc_seq = 0;
nft_net->validate_state = NFT_VALIDATE_SKIP;
INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 40c602ffbcba..58c5b14889c4 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -24,11 +24,11 @@ struct nft_lookup {
struct nft_set_binding binding;
};
-#ifdef CONFIG_MITIGATION_RETPOLINE
-const struct nft_set_ext *
-nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
+static const struct nft_set_ext *
+__nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
+#ifdef CONFIG_MITIGATION_RETPOLINE
if (set->ops == &nft_set_hash_fast_type.ops)
return nft_hash_lookup_fast(net, set, key);
if (set->ops == &nft_set_hash_type.ops)
@@ -51,10 +51,46 @@ nft_set_do_lookup(const struct net *net, const struct nft_set *set,
return nft_rbtree_lookup(net, set, key);
WARN_ON_ONCE(1);
+#endif
return set->ops->lookup(net, set, key);
}
+
+static unsigned int nft_base_seq(const struct net *net)
+{
+ /* pairs with smp_store_release() in nf_tables_commit() */
+ return smp_load_acquire(&net->nft.base_seq);
+}
+
+static bool nft_lookup_should_retry(const struct net *net, unsigned int seq)
+{
+ return unlikely(seq != nft_base_seq(net));
+}
+
+const struct nft_set_ext *
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
+{
+ const struct nft_set_ext *ext;
+ unsigned int base_seq;
+
+ do {
+ base_seq = nft_base_seq(net);
+
+ ext = __nft_set_do_lookup(net, set, key);
+ if (ext)
+ break;
+ /* No match? There is a small chance that lookup was
+ * performed in the old generation, but nf_tables_commit()
+ * already unlinked a (matching) element.
+ *
+ * We need to repeat the lookup to make sure that we didn't
+ * miss a matching element in the new generation.
+ */
+ } while (nft_lookup_should_retry(net, base_seq));
+
+ return ext;
+}
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
-#endif
void nft_lookup_eval(const struct nft_expr *expr,
struct nft_regs *regs,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index c24c922f895d..8d3f040a904a 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -226,7 +226,8 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- list_for_each_entry_rcu(be, &priv->list, head) {
+ list_for_each_entry_rcu(be, &priv->list, head,
+ lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
if (iter->count < iter->skip)
goto cont;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 9a10251228fd..793790d79d13 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -510,6 +510,23 @@ out:
*
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy.
+ * Unlike other set types, this uses NFT_GENMASK_ANY instead of
+ * nft_genmask_cur().
+ *
+ * This is because new (future) elements are not reachable from
+ * priv->match, they get added to priv->clone instead.
+ * When the commit phase flips the generation bitmask, the
+ * 'now old' entries are skipped but without the 'now current'
+ * elements becoming visible. Using nft_genmask_cur() thus creates
+ * inconsistent state: matching old entries get skipped but thew
+ * newly matching entries are unreachable.
+ *
+ * GENMASK will still find the 'now old' entries which ensures consistent
+ * priv->match view.
+ *
+ * nft_pipapo_commit swaps ->clone and ->match shortly after the
+ * genbit flip. As ->clone doesn't contain the old entries in the first
+ * place, lookup will only find the now-current ones.
*
* Return: ntables API extension pointer or NULL if no match.
*/
@@ -518,12 +535,11 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
- u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_elem *e;
m = rcu_dereference(priv->match);
- e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
+ e = pipapo_get(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64());
return e ? &e->ext : NULL;
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 2f090e253caf..c0884fa68c79 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1152,7 +1152,6 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
struct nft_pipapo *priv = nft_set_priv(set);
const struct nft_set_ext *ext = NULL;
struct nft_pipapo_scratch *scratch;
- u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
@@ -1248,8 +1247,7 @@ next_match:
if (last) {
const struct nft_set_ext *e = &f->mt[ret].e->ext;
- if (unlikely(nft_set_elem_expired(e) ||
- !nft_set_elem_active(e, genmask)))
+ if (unlikely(nft_set_elem_expired(e)))
goto next_match;
ext = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 938a257c069e..b1f04168ec93 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -77,7 +77,9 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
- interval = rbe;
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_rbtree_elem_expired(rbe))
+ interval = rbe;
} else if (d > 0)
parent = rcu_dereference_raw(parent->rb_right);
else {
@@ -102,8 +104,6 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
- nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval))
return &interval->ext;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 104732d34543..978c129c6095 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1836,6 +1836,9 @@ static int genl_bind(struct net *net, int group)
!ns_capable(net->user_ns, CAP_SYS_ADMIN))
ret = -EPERM;
+ if (ret)
+ break;
+
if (family->bind)
family->bind(i);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73bc39281ef5..9b45fbdc90ca 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,8 +276,6 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- if (unlikely(current->flags & PF_EXITING))
- return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c5f7bbf5775f..3aa987e7f072 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -407,9 +407,9 @@ xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags)
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
alert_kvec.iov_len);
ret = sock_recvmsg(sock, &msg, flags);
- if (ret > 0 &&
- tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
- iov_iter_revert(&msg.msg_iter, ret);
+ if (ret > 0) {
+ if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT)
+ iov_iter_revert(&msg.msg_iter, ret);
ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg,
-EAGAIN);
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 89519aa52893..852573423e52 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7062,7 +7062,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
u32 seq, int flags,
struct cfg80211_registered_device *rdev,
struct net_device *dev,
- const u8 *mac_addr, struct station_info *sinfo)
+ const u8 *mac_addr, struct station_info *sinfo,
+ bool link_stats)
{
void *hdr;
struct nlattr *sinfoattr, *bss_param;
@@ -7283,7 +7284,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
goto nla_put_failure;
}
- if (sinfo->valid_links) {
+ if (link_stats && sinfo->valid_links) {
links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS);
if (!links)
goto nla_put_failure;
@@ -7574,7 +7575,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
rdev, wdev->netdev, mac_addr,
- &sinfo) < 0)
+ &sinfo, false) < 0)
goto out;
sta_idx++;
@@ -7635,7 +7636,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION,
info->snd_portid, info->snd_seq, 0,
- rdev, dev, mac_addr, &sinfo) < 0) {
+ rdev, dev, mac_addr, &sinfo, false) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
}
@@ -19680,7 +19681,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
return;
if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0,
- rdev, dev, mac_addr, sinfo) < 0) {
+ rdev, dev, mac_addr, sinfo, false) < 0) {
nlmsg_free(msg);
return;
}
@@ -19710,7 +19711,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
}
if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0,
- rdev, dev, mac_addr, sinfo) < 0) {
+ rdev, dev, mac_addr, sinfo, false) < 0) {
nlmsg_free(msg);
return;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9c3acecc14b1..72e34bd2d925 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -36,6 +36,20 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET 32
+struct xsk_addr_node {
+ u64 addr;
+ struct list_head addr_node;
+};
+
+struct xsk_addr_head {
+ u32 num_descs;
+ struct list_head addrs_list;
+};
+
+static struct kmem_cache *xsk_tx_generic_cache;
+
+#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))
+
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -532,24 +546,43 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
+static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(pool->cq, addr);
+ ret = xskq_prod_reserve(pool->cq);
spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
+static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
+ struct sk_buff *skb)
{
+ struct xsk_addr_node *pos, *tmp;
+ u32 descs_processed = 0;
unsigned long flags;
+ u32 idx;
spin_lock_irqsave(&pool->cq_lock, flags);
- xskq_prod_submit_n(pool->cq, n);
+ idx = xskq_get_prod(pool->cq);
+
+ xskq_prod_write_addr(pool->cq, idx,
+ (u64)(uintptr_t)skb_shinfo(skb)->destructor_arg);
+ descs_processed++;
+
+ if (unlikely(XSKCB(skb)->num_descs > 1)) {
+ list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
+ xskq_prod_write_addr(pool->cq, idx + descs_processed,
+ pos->addr);
+ descs_processed++;
+ list_del(&pos->addr_node);
+ kmem_cache_free(xsk_tx_generic_cache, pos);
+ }
+ }
+ xskq_prod_submit_n(pool->cq, descs_processed);
spin_unlock_irqrestore(&pool->cq_lock, flags);
}
@@ -562,9 +595,14 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
spin_unlock_irqrestore(&pool->cq_lock, flags);
}
+static void xsk_inc_num_desc(struct sk_buff *skb)
+{
+ XSKCB(skb)->num_descs++;
+}
+
static u32 xsk_get_num_desc(struct sk_buff *skb)
{
- return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
+ return XSKCB(skb)->num_descs;
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -576,23 +614,33 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
+ xsk_cq_submit_addr_locked(xdp_sk(skb->sk)->pool, skb);
sock_wfree(skb);
}
-static void xsk_set_destructor_arg(struct sk_buff *skb)
+static void xsk_set_destructor_arg(struct sk_buff *skb, u64 addr)
{
- long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
-
- skb_shinfo(skb)->destructor_arg = (void *)num;
+ BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));
+ INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);
+ XSKCB(skb)->num_descs = 0;
+ skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr;
}
static void xsk_consume_skb(struct sk_buff *skb)
{
struct xdp_sock *xs = xdp_sk(skb->sk);
+ u32 num_descs = xsk_get_num_desc(skb);
+ struct xsk_addr_node *pos, *tmp;
+
+ if (unlikely(num_descs > 1)) {
+ list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
+ list_del(&pos->addr_node);
+ kmem_cache_free(xsk_tx_generic_cache, pos);
+ }
+ }
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
+ xsk_cq_cancel_locked(xs->pool, num_descs);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -609,6 +657,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
{
struct xsk_buff_pool *pool = xs->pool;
u32 hr, len, ts, offset, copy, copied;
+ struct xsk_addr_node *xsk_addr;
struct sk_buff *skb = xs->skb;
struct page *page;
void *buffer;
@@ -623,6 +672,19 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
return ERR_PTR(err);
skb_reserve(skb, hr);
+
+ xsk_set_destructor_arg(skb, desc->addr);
+ } else {
+ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
+ if (!xsk_addr)
+ return ERR_PTR(-ENOMEM);
+
+ /* in case of -EOVERFLOW that could happen below,
+ * xsk_consume_skb() will release this node as whole skb
+ * would be dropped, which implies freeing all list elements
+ */
+ xsk_addr->addr = desc->addr;
+ list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}
addr = desc->addr;
@@ -694,8 +756,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err))
goto free_err;
+
+ xsk_set_destructor_arg(skb, desc->addr);
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct xsk_addr_node *xsk_addr;
struct page *page;
u8 *vaddr;
@@ -710,12 +775,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
goto free_err;
}
+ xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
+ if (!xsk_addr) {
+ __free_page(page);
+ err = -ENOMEM;
+ goto free_err;
+ }
+
vaddr = kmap_local_page(page);
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
+
+ xsk_addr->addr = desc->addr;
+ list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
}
if (first_frag && desc->options & XDP_TX_METADATA) {
@@ -759,7 +834,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
- xsk_set_destructor_arg(skb);
+ xsk_inc_num_desc(skb);
return skb;
@@ -769,7 +844,7 @@ free_err:
if (err == -EOVERFLOW) {
/* Drop the packet */
- xsk_set_destructor_arg(xs->skb);
+ xsk_inc_num_desc(xs->skb);
xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
} else {
@@ -812,7 +887,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
+ err = xsk_cq_reserve_locked(xs->pool);
if (err) {
err = -EAGAIN;
goto out;
@@ -1815,8 +1890,18 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
+ xsk_tx_generic_cache = kmem_cache_create("xsk_generic_xmit_cache",
+ sizeof(struct xsk_addr_node),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!xsk_tx_generic_cache) {
+ err = -ENOMEM;
+ goto out_unreg_notif;
+ }
+
return 0;
+out_unreg_notif:
+ unregister_netdevice_notifier(&xsk_netdev_notifier);
out_pernet:
unregister_pernet_subsys(&xsk_net_ops);
out_sk:
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 46d87e961ad6..f16f390370dc 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -344,6 +344,11 @@ static inline u32 xskq_cons_present_entries(struct xsk_queue *q)
/* Functions for producers */
+static inline u32 xskq_get_prod(struct xsk_queue *q)
+{
+ return READ_ONCE(q->ring->producer);
+}
+
static inline u32 xskq_prod_nb_free(struct xsk_queue *q, u32 max)
{
u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
@@ -390,6 +395,13 @@ static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
return 0;
}
+static inline void xskq_prod_write_addr(struct xsk_queue *q, u32 idx, u64 addr)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+ ring->desc[idx & q->ring_mask] = addr;
+}
+
static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_desc *descs,
u32 nb_entries)
{
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
index 5902b3714a16..a1db49eb159a 100644
--- a/rust/kernel/device.rs
+++ b/rust/kernel/device.rs
@@ -138,7 +138,9 @@ pub mod property;
/// }
/// ```
///
-/// An example for a class device implementation is [`drm::Device`].
+/// An example for a class device implementation is
+#[cfg_attr(CONFIG_DRM = "y", doc = "[`drm::Device`](kernel::drm::Device).")]
+#[cfg_attr(not(CONFIG_DRM = "y"), doc = "`drm::Device`.")]
///
/// # Invariants
///
@@ -151,7 +153,6 @@ pub mod property;
/// dropped from any thread.
///
/// [`AlwaysRefCounted`]: kernel::types::AlwaysRefCounted
-/// [`drm::Device`]: kernel::drm::Device
/// [`impl_device_context_deref`]: kernel::impl_device_context_deref
/// [`pci::Device`]: kernel::pci::Device
/// [`platform::Device`]: kernel::platform::Device
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index cfea7a38befb..da3a9f2091f5 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -75,8 +75,8 @@ asm (
CALL_DEPTH_ACCOUNT
" call my_direct_func1\n"
" leave\n"
-" .size my_tramp1, .-my_tramp1\n"
ASM_RET
+" .size my_tramp1, .-my_tramp1\n"
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
index b7b77a6b2979..0de8facca4c5 100644
--- a/tools/testing/selftests/bpf/prog_tests/free_timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -124,6 +124,10 @@ void test_free_timer(void)
int err;
skel = free_timer__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "open_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index d66687f1ee6a..56f660ca567b 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -86,6 +86,10 @@ void serial_test_timer(void)
int err;
timer_skel = timer__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
index f74b82305da8..b841597c8a3a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode)
struct timer_crash *skel;
skel = timer_crash__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
return;
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
index 1a2f99596916..eb303fa1e09a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -59,6 +59,10 @@ void test_timer_lockup(void)
}
skel = timer_lockup__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 9ff7843909e7..c930c7d7105b 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -65,6 +65,10 @@ void serial_test_timer_mim(void)
goto cleanup;
timer_skel = timer_mim__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
index d67466c1ff77..f90531cf3ee5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -302,7 +302,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val
* barriers.
*/
if (val & _Q_LOCKED_MASK)
- smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+ (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
/*
* take ownership and clear the pending bit.
@@ -380,7 +380,7 @@ queue:
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
- arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+ (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
/*
* While waiting for the MCS lock, the next pointer may have
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
index 645be6cddf36..dfd8a258f14a 100644
--- a/tools/testing/selftests/bpf/progs/crypto_sanity.c
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -14,7 +14,7 @@ unsigned char key[256] = {};
u16 udp_test_port = 7777;
u32 authsize, key_len;
char algo[128] = {};
-char dst[16] = {};
+char dst[16] = {}, dst_bad[8] = {};
int status;
static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
@@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx)
.authsize = authsize,
};
struct bpf_crypto_ctx *cctx;
- int err = 0;
+ int err;
status = 0;
-
if (key_len > 256) {
status = -EINVAL;
return 0;
@@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx)
__builtin_memcpy(&params.algo, algo, sizeof(algo));
__builtin_memcpy(&params.key, key, sizeof(key));
- cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
if (!cctx) {
status = err;
return 0;
@@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx)
err = crypto_ctx_insert(cctx);
if (err && err != -EEXIST)
status = err;
-
return 0;
}
@@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb)
struct bpf_dynptr psrc, pdst;
int err;
+ status = 0;
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
@@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb)
int err;
status = 0;
-
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 6438982b928b..ddd26d1a083f 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx)
SEC("?tc")
int obj_new_no_struct(void *ctx)
{
-
- bpf_obj_new(union { int data; unsigned udata; });
+ (void)bpf_obj_new(union { int data; unsigned udata; });
return 0;
}
@@ -252,7 +251,7 @@ int new_null_ret(void *ctx)
SEC("?tc")
int obj_new_acq(void *ctx)
{
- bpf_obj_new(struct foo);
+ (void)bpf_obj_new(struct foo);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index 46697f381878..a47690174e0e 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -30,8 +30,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
-__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); }
-__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
+__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
+__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
+__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 63ce708d93ed..e4b7c2b457ee 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -10,20 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
+#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
#include "../statmount/statmount.h"
#include "../utils.h"
-// Needed for linux/fanotify.h
-#ifndef __kernel_fsid_t
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-#endif
-
-#include <sys/fanotify.h>
-
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_cmds[] = {
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
index 090a5ca65004..9f57ca46e3af 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -10,21 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
+#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
-#include "../../pidfd/pidfd.h"
#include "../statmount/statmount.h"
#include "../utils.h"
-// Needed for linux/fanotify.h
-#ifndef __kernel_fsid_t
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-#endif
-
-#include <sys/fanotify.h>
-
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_types[] = {
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c7e03e1d6f63..2b31d4a93ad7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -116,6 +116,7 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_ether_dst.sh
TEST_PROGS += broadcast_pmtu.sh
TEST_PROGS += ipv6_force_forwarding.sh
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
new file mode 100755
index 000000000000..334a7eca8a80
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Author: Brett A C Sheffield <bacs@librecast.net>
+# Author: Oscar Maes <oscmaes92@gmail.com>
+#
+# Ensure destination ethernet field is correctly set for
+# broadcast packets
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+GW_IP4="192.168.0.2"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth \
+ peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+
+ ip -net "${CLIENT_NS}" route add default via "${GW_IP4}"
+ ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55
+}
+
+cleanup() {
+ rm -f "${CAPFILE}" "${OUTPUT}"
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+test_broadcast_ether_dst() {
+ local rc=0
+ CAPFILE=$(mktemp -u cap.XXXXXXXXXX)
+ OUTPUT=$(mktemp -u out.XXXXXXXXXX)
+
+ echo "Testing ethernet broadcast destination"
+
+ # start tcpdump listening for icmp
+ # tcpdump will exit after receiving a single packet
+ # timeout will kill tcpdump if it is still running after 2s
+ timeout 2s ip netns exec "${CLIENT_NS}" \
+ tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+ pid=$!
+ slowwait 1 grep -qs "listening" "${OUTPUT}"
+
+ # send broadcast ping
+ ip netns exec "${CLIENT_NS}" \
+ ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null
+
+ # wait for tcpdump for exit after receiving packet
+ wait "${pid}"
+
+ # compare ethernet destination field to ff:ff:ff:ff:ff:ff
+ ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \
+ awk '{sub(/,/,"",$3); print $3}')
+ if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \
+ "got ${ether_dst}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_broadcast_ether_dst
+
+exit $?
diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config
new file mode 100644
index 000000000000..188f79796670
--- /dev/null
+++ b/tools/testing/selftests/net/can/config
@@ -0,0 +1,3 @@
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 7a3cb4c09e45..d847ff1737c3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -28,7 +28,7 @@ flush_pids()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 5e3c56253274..c2ab9f7f0d21 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -134,7 +134,7 @@ ns4=""
TEST_GROUP=""
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cin_disconnect"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 82cae37d9c20..7fd555b123b9 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,7 @@
# ShellCheck incorrectly believes that most of the code here is unreachable
# because it's invoked by variable name, see how the "tests" array is used
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
. "$(dirname "${0}")/mptcp_lib.sh"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 418a903c3a4d..f01989be6e9b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -95,7 +95,7 @@ init()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index ac7ec6f94023..ec6a87588191 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -32,7 +32,7 @@ ns1=""
err=$(mktemp)
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "${err}"
@@ -70,8 +70,9 @@ format_endpoints() {
mptcp_lib_pm_nl_format_endpoints "${@}"
}
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
get_endpoint() {
- # shellcheck disable=SC2317 # invoked indirectly
mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 2329c2f8519b..1903e8e84a31 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -35,7 +35,7 @@ usage() {
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cout" "$sout"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 333064b0b5ac..970c329735ff 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -94,7 +94,7 @@ test_fail()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
print_title "Cleanup"