summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/netlink/specs/conntrack.yaml9
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml4
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h30
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h4
-rw-r--r--arch/arm64/kvm/arm.c4
-rw-r--r--arch/arm64/kvm/debug.c13
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c25
-rw-r--r--arch/arm64/kvm/mmu.c45
-rw-r--r--arch/arm64/kvm/nested.c6
-rw-r--r--arch/arm64/kvm/pkvm.c11
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c15
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic.c80
-rw-r--r--arch/arm64/kvm/vgic/vgic.h8
-rw-r--r--arch/s390/kvm/interrupt.c15
-rw-r--r--arch/s390/kvm/kvm-s390.c24
-rw-r--r--arch/s390/kvm/pv.c16
-rw-r--r--arch/um/drivers/virtio_uml.c6
-rw-r--r--arch/um/os-Linux/file.c2
-rw-r--r--arch/um/os-Linux/util.c3
-rw-r--r--arch/x86/kvm/svm/svm.c3
-rw-r--r--drivers/block/zram/zram_drv.c8
-rw-r--r--drivers/dpll/dpll_netlink.c4
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-raid.c6
-rw-r--r--drivers/md/dm-stripe.c10
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/ethernet/broadcom/cnic.c3
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c80
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h1
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c22
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c16
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c6
-rw-r--r--drivers/net/ethernet/natsemi/ns83820.c13
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c2
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c8
-rw-r--r--drivers/platform/x86/amd/pmf/core.c1
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c2
-rw-r--r--drivers/platform/x86/oxpec.c14
-rw-r--r--drivers/power/supply/bq27xxx_battery.c4
-rw-r--r--fs/btrfs/block-group.c9
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/nilfs2/sysfs.c4
-rw-r--r--fs/nilfs2/sysfs.h8
-rw-r--r--fs/smb/server/transport_rdma.c183
-rw-r--r--include/kvm/arm_vgic.h9
-rw-r--r--include/linux/damon.h2
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/rv.h6
-rw-r--r--include/linux/swap.h10
-rw-r--r--include/net/dst_metadata.h11
-rw-r--r--include/net/sock.h5
-rw-r--r--include/uapi/linux/mptcp.h2
-rw-r--r--include/uapi/linux/mptcp_pm.h4
-rw-r--r--kernel/cgroup/cgroup.c44
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/ext.c6
-rw-r--r--kernel/trace/rv/monitors/sleep/sleep.c4
-rw-r--r--kernel/trace/rv/rv.c4
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--mm/damon/core.c8
-rw-r--r--mm/damon/sysfs.c23
-rw-r--r--mm/gup.c14
-rw-r--r--mm/mlock.c6
-rw-r--r--mm/swap.c50
-rw-r--r--mm/vmscan.c2
-rw-r--r--net/core/dev.c2
-rw-r--r--net/devlink/rate.c4
-rw-r--r--net/ethtool/common.c4
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_ao.c4
-rw-r--r--net/mptcp/options.c6
-rw-r--r--net/mptcp/pm_netlink.c7
-rw-r--r--net/mptcp/protocol.c16
-rw-r--r--net/mptcp/subflow.c4
-rw-r--r--net/rds/ib_frmr.c20
-rw-r--r--net/rfkill/rfkill-gpio.c4
-rw-r--r--net/rxrpc/rxgk.c18
-rw-r--r--net/rxrpc/rxgk_app.c29
-rw-r--r--net/rxrpc/rxgk_common.h14
-rw-r--r--net/tls/tls.h1
-rw-r--r--net/tls/tls_strp.c14
-rw-r--r--net/tls/tls_sw.c3
-rw-r--r--samples/damon/mtier.c3
-rw-r--r--samples/damon/prcl.c3
-rw-r--r--samples/damon/wsse.c3
-rw-r--r--tools/lib/subcmd/help.c3
-rw-r--r--tools/perf/builtin-lock.c7
-rw-r--r--tools/perf/util/maps.c9
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh197
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c11
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c16
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c7
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh14
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh88
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt26
-rw-r--r--tools/testing/selftests/net/tls.c16
128 files changed, 1138 insertions, 515 deletions
diff --git a/Documentation/netlink/specs/conntrack.yaml b/Documentation/netlink/specs/conntrack.yaml
index c6832633ab7b..591e22a2ee43 100644
--- a/Documentation/netlink/specs/conntrack.yaml
+++ b/Documentation/netlink/specs/conntrack.yaml
@@ -575,8 +575,8 @@ operations:
- nat-dst
- timeout
- mark
- - counter-orig
- - counter-reply
+ - counters-orig
+ - counters-reply
- use
- id
- nat-dst
@@ -591,7 +591,6 @@ operations:
request:
value: 0x101
attributes:
- - nfgen-family
- mark
- filter
- status
@@ -608,8 +607,8 @@ operations:
- nat-dst
- timeout
- mark
- - counter-orig
- - counter-reply
+ - counters-orig
+ - counters-reply
- use
- id
- nat-dst
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index d15335684ec3..d1b4829b580a 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -28,13 +28,13 @@ definitions:
traffic-patterns it can take a long time until the
MPTCP_EVENT_ESTABLISHED is sent.
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: established
doc: >-
A MPTCP connection is established (can start new subflows).
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: closed
doc: >-
diff --git a/MAINTAINERS b/MAINTAINERS
index f6206963efbf..520fb4e379a3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7430,7 +7430,7 @@ S: Supported
F: Documentation/devicetree/bindings/dpll/dpll-device.yaml
F: Documentation/devicetree/bindings/dpll/dpll-pin.yaml
F: Documentation/driver-api/dpll.rst
-F: drivers/dpll/*
+F: drivers/dpll/
F: include/linux/dpll.h
F: include/uapi/linux/dpll.h
@@ -16196,6 +16196,7 @@ R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Harry Yoo <harry.yoo@oracle.com>
+R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
F: include/linux/rmap.h
@@ -16240,6 +16241,7 @@ R: Nico Pache <npache@redhat.com>
R: Ryan Roberts <ryan.roberts@arm.com>
R: Dev Jain <dev.jain@arm.com>
R: Barry Song <baohua@kernel.org>
+R: Lance Yang <lance.yang@linux.dev>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
@@ -22050,6 +22052,7 @@ F: drivers/infiniband/ulp/rtrs/
RUNTIME VERIFICATION (RV)
M: Steven Rostedt <rostedt@goodmis.org>
+M: Gabriele Monaco <gmonaco@redhat.com>
L: linux-trace-kernel@vger.kernel.org
S: Maintained
F: Documentation/trace/rv/
@@ -24257,7 +24260,7 @@ F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
F: drivers/input/keyboard/sun4i-lradc-keys.c
SUNDANCE NETWORK DRIVER
-M: Denis Kirjanov <dkirjanov@suse.de>
+M: Denis Kirjanov <kirjanov@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/dlink/sundance.c
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2b07f0a27a7d..0ee4f6fa3a17 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1369,6 +1369,7 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
}
void kvm_init_host_debug_data(void);
+void kvm_debug_init_vhe(void);
void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 1246216616b5..2888b5d03757 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -355,11 +355,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return pteref;
}
-static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
-{
- return pteref;
-}
-
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
/*
@@ -389,11 +384,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
}
-static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
-{
- return rcu_dereference_raw(pteref);
-}
-
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -562,26 +552,6 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
- * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
- * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr: Intermediate physical address at which to place the mapping.
- * @size: Size of the mapping.
- *
- * The page-table is assumed to be unreachable by any hardware walkers prior
- * to freeing and therefore no TLB invalidation is performed.
- */
-void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
- u64 addr, u64 size);
-
-/**
- * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
- * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
- *
- * It is assumed that the rest of the page-table is freed before this operation.
- */
-void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
-
-/**
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.
* @pgtable: Unlinked stage-2 paging structure to be freed.
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 35f9d9478004..ea58282f59bb 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -179,9 +179,7 @@ struct pkvm_mapping {
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
-void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
- u64 addr, u64 size);
-void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 5bf101c869c9..bd6b6a620a09 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2113,8 +2113,10 @@ static void cpu_hyp_init_features(void)
{
cpu_set_hyp_vector();
- if (is_kernel_in_hyp_mode())
+ if (is_kernel_in_hyp_mode()) {
kvm_timer_init_vhe();
+ kvm_debug_init_vhe();
+ }
if (vgic_present)
kvm_vgic_init_cpu_hardware();
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 381382c19fe4..e027d9c32b0d 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -96,6 +96,13 @@ void kvm_init_host_debug_data(void)
}
}
+void kvm_debug_init_vhe(void)
+{
+ /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */
+ if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1)))
+ write_sysreg_el1(0, SYS_PMSCR);
+}
+
/*
* Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host
* has taken over MDSCR_EL1.
@@ -138,6 +145,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
/* Must be called before kvm_vcpu_load_vhe() */
KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm);
+ if (has_vhe())
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
+
/*
* Determine which of the possible debug states we're in:
*
@@ -184,6 +194,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu)
{
+ if (has_vhe())
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
+
if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
return;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 84ec4e100fbb..b6682202edf3 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -431,9 +431,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
- write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2;
if (is_nested_ctxt(vcpu)) {
@@ -454,8 +451,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
- write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
-
write_sysreg(0, hstr_el2);
if (system_supports_pmuv3()) {
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index ccd575d5f6de..d3b9ec8a7c28 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -50,6 +50,10 @@ extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu)
{
___activate_traps(vcpu, vcpu->arch.hcr_el2);
+
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
+ write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+
__activate_traps_common(vcpu);
__activate_cptr_traps(vcpu);
@@ -93,6 +97,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
isb();
}
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
+
__deactivate_traps_common(vcpu);
write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 71d2fc97f004..82da9b03692d 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -253,7 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
- __vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1);
+ __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR));
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c36f282a175d..c351b4abd5db 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1551,38 +1551,21 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
return 0;
}
-void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
- u64 addr, u64 size)
+void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
+ size_t pgd_sz;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
};
- WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
-}
-
-void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
-{
- size_t pgd_sz;
-
+ WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
-
- /*
- * Since the pgtable is unlinked at this point, and not shared with
- * other walkers, safely deference pgd with kvm_dereference_pteref_raw()
- */
- pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
+ pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
pgt->pgd = NULL;
}
-void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
-{
- kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
- kvm_pgtable_stage2_destroy_pgd(pgt);
-}
-
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 86f3d80daf37..736394292503 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -904,38 +904,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
return 0;
}
-/*
- * Assume that @pgt is valid and unlinked from the KVM MMU to free the
- * page-table without taking the kvm_mmu_lock and without performing any
- * TLB invalidations.
- *
- * Also, the range of addresses can be large enough to cause need_resched
- * warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
- * cond_resched() periodically to prevent hogging the CPU for a long time
- * and schedule something else, if required.
- */
-static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
- phys_addr_t end)
-{
- u64 next;
-
- do {
- next = stage2_range_addr_end(addr, end);
- KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
- next - addr);
- if (next != end)
- cond_resched();
- } while (addr = next, addr != end);
-}
-
-static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
-{
- unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
-
- stage2_destroy_range(pgt, 0, BIT(ia_bits));
- KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
-}
-
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
@@ -1012,7 +980,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0;
out_destroy_pgtable:
- kvm_stage2_destroy(pgt);
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
out_free_pgtable:
kfree(pgt);
return err;
@@ -1106,10 +1074,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
mmu->pgt = NULL;
free_percpu(mmu->last_vcpu_ran);
}
+
+ if (kvm_is_nested_s2_mmu(kvm, mmu))
+ kvm_init_nested_s2_mmu(mmu);
+
write_unlock(&kvm->mmu_lock);
if (pgt) {
- kvm_stage2_destroy(pgt);
+ KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kfree(pgt);
}
}
@@ -1541,11 +1513,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
- if (fault_is_perm && !write_fault && !exec_fault) {
- kvm_err("Unexpected L2 read permission error\n");
- return -EFAULT;
- }
-
if (!is_protected_kvm_enabled())
memcache = &vcpu->arch.mmu_page_cache;
else
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 77db81bae86f..50d559248a1f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -847,7 +847,7 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
- ipa_start = vt->wr.pa & (ipa_size - 1);
+ ipa_start = vt->wr.pa & ~(ipa_size - 1);
ipa_end = ipa_start + ipa_size;
if (ipa_end <= start || ipa_start >= end)
@@ -887,7 +887,7 @@ static void invalidate_vncr_va(struct kvm *kvm,
va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level));
- va_start = vt->gva & (va_size - 1);
+ va_start = vt->gva & ~(va_size - 1);
va_end = va_start + va_size;
switch (scope->type) {
@@ -1276,7 +1276,7 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
!(tcr & TCR_ASID16))
asid &= GENMASK(7, 0);
- return asid != vt->wr.asid;
+ return asid == vt->wr.asid;
}
return true;
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 61827cf6fea4..fcd70bfe44fb 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -316,16 +316,9 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
return 0;
}
-void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
- u64 addr, u64 size)
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
- __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
-}
-
-void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
-{
- /* Expected to be called after all pKVM mappings have been released. */
- WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
+ __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
}
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 2684f273d9e1..4c1209261b65 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -69,7 +69,7 @@ static int iter_mark_lpis(struct kvm *kvm)
int nr_lpis = 0;
xa_for_each(&dist->lpi_xa, intid, irq) {
- if (!vgic_try_get_irq_kref(irq))
+ if (!vgic_try_get_irq_ref(irq))
continue;
xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 1e680ad6e863..4c3c0d82e476 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
+ xa_init(&dist->lpi_xa);
}
/* CREATION */
@@ -208,7 +208,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
raw_spin_lock_init(&irq->irq_lock);
irq->vcpu = NULL;
irq->target_vcpu = vcpu0;
- kref_init(&irq->refcount);
+ refcount_set(&irq->refcount, 0);
switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->targets = 0;
@@ -277,7 +277,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
irq->intid = i;
irq->vcpu = NULL;
irq->target_vcpu = vcpu;
- kref_init(&irq->refcount);
+ refcount_set(&irq->refcount, 0);
if (vgic_irq_is_sgi(i)) {
/* SGIs */
irq->enabled = 1;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 7368c13f16b7..ce3e3ed3f29f 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -78,7 +78,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq;
- unsigned long flags;
int ret;
/* In this case there is no put, since we keep the reference. */
@@ -89,7 +88,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
- ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
+ ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(irq);
return ERR_PTR(ret);
@@ -99,19 +98,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
raw_spin_lock_init(&irq->irq_lock);
irq->config = VGIC_CONFIG_EDGE;
- kref_init(&irq->refcount);
+ refcount_set(&irq->refcount, 1);
irq->intid = intid;
irq->target_vcpu = vcpu;
irq->group = 1;
- xa_lock_irqsave(&dist->lpi_xa, flags);
+ xa_lock(&dist->lpi_xa);
/*
* There could be a race with another vgic_add_lpi(), so we need to
* check that we don't add a second list entry with the same LPI.
*/
oldirq = xa_load(&dist->lpi_xa, intid);
- if (vgic_try_get_irq_kref(oldirq)) {
+ if (vgic_try_get_irq_ref(oldirq)) {
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
@@ -126,7 +125,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
}
out_unlock:
- xa_unlock_irqrestore(&dist->lpi_xa, flags);
+ xa_unlock(&dist->lpi_xa);
if (ret)
return ERR_PTR(ret);
@@ -547,7 +546,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
rcu_read_lock();
irq = xa_load(&its->translation_cache, cache_key);
- if (!vgic_try_get_irq_kref(irq))
+ if (!vgic_try_get_irq_ref(irq))
irq = NULL;
rcu_read_unlock();
@@ -571,7 +570,7 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* its_lock, as the ITE (and the reference it holds) cannot be freed.
*/
lockdep_assert_held(&its->its_lock);
- vgic_get_irq_kref(irq);
+ vgic_get_irq_ref(irq);
old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 4d9343d2b0b1..548aec9d5a72 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -518,7 +518,7 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
if (!irq->hw || irq->host_irq != host_irq)
continue;
- if (!vgic_try_get_irq_kref(irq))
+ if (!vgic_try_get_irq_ref(irq))
return NULL;
return irq;
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index f5148b38120a..6dd5a10081e2 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -28,8 +28,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* kvm->arch.config_lock (mutex)
* its->cmd_lock (mutex)
* its->its_lock (mutex)
- * vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
+ * vgic_dist->lpi_xa.xa_lock
+ * vgic_cpu->ap_list_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
@@ -71,7 +71,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
rcu_read_lock();
irq = xa_load(&dist->lpi_xa, intid);
- if (!vgic_try_get_irq_kref(irq))
+ if (!vgic_try_get_irq_ref(irq))
irq = NULL;
rcu_read_unlock();
@@ -114,37 +114,66 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
return vgic_get_irq(vcpu->kvm, intid);
}
-/*
- * We can't do anything in here, because we lack the kvm pointer to
- * lock and remove the item from the lpi_list. So we keep this function
- * empty and use the return value of kref_put() to trigger the freeing.
- */
-static void vgic_irq_release(struct kref *ref)
+static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq)
+{
+ lockdep_assert_held(&dist->lpi_xa.xa_lock);
+ __xa_erase(&dist->lpi_xa, irq->intid);
+ kfree_rcu(irq, rcu);
+}
+
+static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
+{
+ if (irq->intid < VGIC_MIN_LPI)
+ return false;
+
+ return refcount_dec_and_test(&irq->refcount);
+}
+
+static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq)
{
+ if (!__vgic_put_irq(kvm, irq))
+ return false;
+
+ irq->pending_release = true;
+ return true;
}
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- unsigned long flags;
- if (irq->intid < VGIC_MIN_LPI)
- return;
+ if (irq->intid >= VGIC_MIN_LPI)
+ might_lock(&dist->lpi_xa.xa_lock);
- if (!kref_put(&irq->refcount, vgic_irq_release))
+ if (!__vgic_put_irq(kvm, irq))
return;
- xa_lock_irqsave(&dist->lpi_xa, flags);
- __xa_erase(&dist->lpi_xa, irq->intid);
- xa_unlock_irqrestore(&dist->lpi_xa, flags);
+ xa_lock(&dist->lpi_xa);
+ vgic_release_lpi_locked(dist, irq);
+ xa_unlock(&dist->lpi_xa);
+}
- kfree_rcu(irq, rcu);
+static void vgic_release_deleted_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned long intid;
+ struct vgic_irq *irq;
+
+ xa_lock(&dist->lpi_xa);
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ if (irq->pending_release)
+ vgic_release_lpi_locked(dist, irq);
+ }
+
+ xa_unlock(&dist->lpi_xa);
}
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp;
+ bool deleted = false;
unsigned long flags;
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
@@ -155,11 +184,14 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
list_del(&irq->ap_list);
irq->vcpu = NULL;
raw_spin_unlock(&irq->irq_lock);
- vgic_put_irq(vcpu->kvm, irq);
+ deleted |= vgic_put_irq_norelease(vcpu->kvm, irq);
}
}
raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+
+ if (deleted)
+ vgic_release_deleted_lpis(vcpu->kvm);
}
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
@@ -399,7 +431,7 @@ retry:
* now in the ap_list. This is safe as the caller must already hold a
* reference on the irq.
*/
- vgic_get_irq_kref(irq);
+ vgic_get_irq_ref(irq);
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
irq->vcpu = vcpu;
@@ -630,6 +662,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp;
+ bool deleted_lpis = false;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -657,12 +690,12 @@ retry:
/*
* This vgic_put_irq call matches the
- * vgic_get_irq_kref in vgic_queue_irq_unlock,
+ * vgic_get_irq_ref in vgic_queue_irq_unlock,
* where we added the LPI to the ap_list. As
* we remove the irq from the list, we drop
* also drop the refcount.
*/
- vgic_put_irq(vcpu->kvm, irq);
+ deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq);
continue;
}
@@ -725,6 +758,9 @@ retry:
}
raw_spin_unlock(&vgic_cpu->ap_list_lock);
+
+ if (unlikely(deleted_lpis))
+ vgic_release_deleted_lpis(vcpu->kvm);
}
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
@@ -818,7 +854,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
* the AP list has been sorted already.
*/
if (multi_sgi && irq->priority > prio) {
- _raw_spin_unlock(&irq->irq_lock);
+ raw_spin_unlock(&irq->irq_lock);
break;
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index de1c1d3261c3..ac5f9c5d2b98 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -267,7 +267,7 @@ void vgic_v2_put(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
-static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
+static inline bool vgic_try_get_irq_ref(struct vgic_irq *irq)
{
if (!irq)
return false;
@@ -275,12 +275,12 @@ static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
if (irq->intid < VGIC_MIN_LPI)
return true;
- return kref_get_unless_zero(&irq->refcount);
+ return refcount_inc_not_zero(&irq->refcount);
}
-static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+static inline void vgic_get_irq_ref(struct vgic_irq *irq)
{
- WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
+ WARN_ON_ONCE(!vgic_try_get_irq_ref(irq));
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2a92a8b9e4c2..9384572ffa7b 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2778,12 +2778,19 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
+ struct mm_struct *mm = kvm->mm;
struct page *page = NULL;
+ int locked = 1;
+
+ if (mmget_not_zero(mm)) {
+ mmap_read_lock(mm);
+ get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
+ &page, &locked);
+ if (locked)
+ mmap_read_unlock(mm);
+ mmput(mm);
+ }
- mmap_read_lock(kvm->mm);
- get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
- &page, NULL);
- mmap_read_unlock(kvm->mm);
return page;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bf6fa8b9ca73..6d51aa5f66be 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4864,12 +4864,12 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
* @vcpu: the vCPU whose gmap is to be fixed up
* @gfn: the guest frame number used for memslots (including fake memslots)
* @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
- * @flags: FOLL_* flags
+ * @foll: FOLL_* flags
*
* Return: 0 on success, < 0 in case of error.
* Context: The mm lock must not be held before calling. May sleep.
*/
-int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
+int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll)
{
struct kvm_memory_slot *slot;
unsigned int fault_flags;
@@ -4883,13 +4883,13 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return vcpu_post_run_addressing_exception(vcpu);
- fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
+ fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
if (vcpu->arch.gmap->pfault_enabled)
- flags |= FOLL_NOWAIT;
+ foll |= FOLL_NOWAIT;
vmaddr = __gfn_to_hva_memslot(slot, gfn);
try_again:
- pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
+ pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page);
/* Access outside memory, inject addressing exception */
if (is_noslot_pfn(pfn))
@@ -4905,7 +4905,7 @@ try_again:
return 0;
vcpu->stat.pfault_sync++;
/* Could not setup async pfault, try again synchronously */
- flags &= ~FOLL_NOWAIT;
+ foll &= ~FOLL_NOWAIT;
goto try_again;
}
/* Any other error */
@@ -4925,7 +4925,7 @@ try_again:
return rc;
}
-static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
+static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll)
{
unsigned long gaddr_tmp;
gfn_t gfn;
@@ -4950,18 +4950,18 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un
}
gfn = gpa_to_gfn(gaddr_tmp);
}
- return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
+ return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll);
}
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{
- unsigned int flags = 0;
+ unsigned int foll = 0;
unsigned long gaddr;
int rc;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write())
- flags = FAULT_FLAG_WRITE;
+ foll = FOLL_WRITE;
switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
case 0:
@@ -5003,7 +5003,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
send_sig(SIGSEGV, current, 0);
if (rc != -ENXIO)
break;
- flags = FAULT_FLAG_WRITE;
+ foll = FOLL_WRITE;
fallthrough;
case PGM_PROTECTION:
case PGM_SEGMENT_TRANSLATION:
@@ -5013,7 +5013,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
kvm_s390_assert_primary_as(vcpu);
- return vcpu_dat_fault_handler(vcpu, gaddr, flags);
+ return vcpu_dat_fault_handler(vcpu, gaddr, foll);
default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 25ede8354514..6ba5a0305e25 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -624,6 +624,17 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
int cc, ret;
u16 dummy;
+ /* Add the notifier only once. No races because we hold kvm->lock */
+ if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
+ /* The notifier will be unregistered when the VM is destroyed */
+ kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
+ ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
+ if (ret) {
+ kvm->arch.pv.mmu_notifier.ops = NULL;
+ return ret;
+ }
+ }
+
ret = kvm_s390_pv_alloc_vm(kvm);
if (ret)
return ret;
@@ -659,11 +670,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
return -EIO;
}
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
- /* Add the notifier only once. No races because we hold kvm->lock */
- if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
- kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
- mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
- }
return 0;
}
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index ad8d78fb1d9a..de7867ae220d 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -1250,10 +1250,12 @@ static int virtio_uml_probe(struct platform_device *pdev)
device_set_wakeup_capable(&vu_dev->vdev.dev, true);
rc = register_virtio_device(&vu_dev->vdev);
- if (rc)
+ if (rc) {
put_device(&vu_dev->vdev.dev);
+ return rc;
+ }
vu_dev->registered = 1;
- return rc;
+ return 0;
error_init:
os_close_file(vu_dev->sock);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 617886d1fb1e..21f0e50fb1df 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -535,7 +535,7 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
cmsg->cmsg_type != SCM_RIGHTS)
return n;
- memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
+ memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0));
return n;
}
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 4193e04d7e4a..e3ad71a0d13c 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -20,8 +20,7 @@
void stack_protections(unsigned long address)
{
- if (mprotect((void *) address, UM_THREAD_SIZE,
- PROT_READ | PROT_WRITE | PROT_EXEC) < 0)
+ if (mprotect((void *) address, UM_THREAD_SIZE, PROT_READ | PROT_WRITE) < 0)
panic("protecting stack failed, errno = %d", errno);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d9931c6c4bc6..1bfebe40854f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4046,8 +4046,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8;
- if (nested_svm_virtualize_tpr(vcpu) ||
- kvm_vcpu_apicv_active(vcpu))
+ if (nested_svm_virtualize_tpr(vcpu))
return;
cr8 = kvm_get_cr8(vcpu);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8acad3cc6e6e..f31652085adc 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1795,6 +1795,7 @@ static int write_same_filled_page(struct zram *zram, unsigned long fill,
u32 index)
{
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_flag(zram, index, ZRAM_SAME);
zram_set_handle(zram, index, fill);
zram_slot_unlock(zram, index);
@@ -1832,6 +1833,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page,
kunmap_local(src);
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_flag(zram, index, ZRAM_HUGE);
zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, PAGE_SIZE);
@@ -1855,11 +1857,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
unsigned long element;
bool same_filled;
- /* First, free memory allocated to this slot (if any) */
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_slot_unlock(zram, index);
-
mem = kmap_local_page(page);
same_filled = page_same_filled(mem, &element);
kunmap_local(mem);
@@ -1901,6 +1898,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
zcomp_stream_put(zstrm);
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, comp_len);
zram_slot_unlock(zram, index);
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index 036f21cac0a9..0a852011653c 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -211,8 +211,8 @@ static int
dpll_msg_add_clock_quality_level(struct sk_buff *msg, struct dpll_device *dpll,
struct netlink_ext_ack *extack)
{
+ DECLARE_BITMAP(qls, DPLL_CLOCK_QUALITY_LEVEL_MAX + 1) = { 0 };
const struct dpll_device_ops *ops = dpll_device_ops(dpll);
- DECLARE_BITMAP(qls, DPLL_CLOCK_QUALITY_LEVEL_MAX) = { 0 };
enum dpll_clock_quality_level ql;
int ret;
@@ -221,7 +221,7 @@ dpll_msg_add_clock_quality_level(struct sk_buff *msg, struct dpll_device *dpll,
ret = ops->clock_quality_level_get(dpll, dpll_priv(dpll), qls, extack);
if (ret)
return ret;
- for_each_set_bit(ql, qls, DPLL_CLOCK_QUALITY_LEVEL_MAX)
+ for_each_set_bit(ql, qls, DPLL_CLOCK_QUALITY_LEVEL_MAX + 1)
if (nla_put_u32(msg, DPLL_A_CLOCK_QUALITY_LEVEL, ql))
return -EMSGSIZE;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index efeee0a873c0..ab96b692e5a3 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -133,7 +133,7 @@ struct journal_sector {
commit_id_t commit_id;
};
-#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
+#define MAX_TAG_SIZE 255
#define METADATA_PADDING_SECTORS 8
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 79ea85d18e24..f4b904e24328 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3813,8 +3813,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
struct raid_set *rs = ti->private;
unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
- limits->io_min = chunk_size_bytes;
- limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
+ if (chunk_size_bytes) {
+ limits->io_min = chunk_size_bytes;
+ limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
+ }
}
static void raid_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 58902091bf79..1461dc740dae 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -456,11 +456,15 @@ static void stripe_io_hints(struct dm_target *ti,
struct queue_limits *limits)
{
struct stripe_c *sc = ti->private;
- unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT;
+ unsigned int io_min, io_opt;
limits->chunk_sectors = sc->chunk_size;
- limits->io_min = chunk_size;
- limits->io_opt = chunk_size * sc->stripes;
+
+ if (!check_shl_overflow(sc->chunk_size, SECTOR_SHIFT, &io_min) &&
+ !check_mul_overflow(io_min, sc->stripes, &io_opt)) {
+ limits->io_min = io_min;
+ limits->io_opt = io_opt;
+ }
}
static struct target_type stripe_target = {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 257333c88710..57be04f6cb11 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2132,6 +2132,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
} else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
+ bond_has_slaves(bond) &&
memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) {
/* Set slave to random address to avoid duplicate mac
* address in later fail over.
@@ -3355,7 +3356,6 @@ static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
/* Find out through which dev should the packet go */
memset(&fl6, 0, sizeof(struct flowi6));
fl6.daddr = targets[i];
- fl6.flowi6_oif = bond->dev->ifindex;
dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6);
if (dst->error) {
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index a9040c42d2ff..6e97a5a7daaf 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -4230,8 +4230,7 @@ static void cnic_cm_stop_bnx2x_hw(struct cnic_dev *dev)
cnic_bnx2x_delete_wait(dev, 0);
- cancel_delayed_work(&cp->delete_task);
- flush_workqueue(cnic_wq);
+ cancel_delayed_work_sync(&cp->delete_task);
if (atomic_read(&cp->iscsi_conn) != 0)
netdev_warn(dev->netdev, "%d iSCSI connections not destroyed\n",
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index de8a6ce86ad7..12105ffb5dac 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -126,7 +126,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
oct->io_qmask.iq |= BIT_ULL(iq_no);
/* Set the 32B/64B mode for each input queue */
- oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
+ oct->io_qmask.iq64B |= ((u64)(conf->instr_type == 64) << iq_no);
iq->iqcmd_64B = (conf->instr_type == 64);
oct->fn_list.setup_iq_regs(oct, iq_no);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 4643a3380618..b1e1ad9e4b48 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2736,7 +2736,7 @@ static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n");
goto err_get_attr;
}
- ethsw->bpid = dpbp_attrs.id;
+ ethsw->bpid = dpbp_attrs.bpid;
return 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 048c33039130..b194eae03208 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -948,9 +948,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
if (!eop_desc)
break;
- /* prevent any other reads prior to eop_desc */
- smp_rmb();
-
i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
/* we have caught up to head, no work left to do */
if (tx_head == tx_desc)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index d2871757ec94..41e7e29879a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -894,10 +894,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
rx_buf->page_offset, size);
sinfo->xdp_frags_size += size;
- /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
- * can pop off frags but driver has to handle it on its own
- */
- rx_ring->nr_frags = sinfo->nr_frags;
if (page_is_pfmemalloc(rx_buf->page))
xdp_buff_set_frag_pfmemalloc(xdp);
@@ -968,20 +964,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
/**
* ice_get_pgcnts - grab page_count() for gathered fragments
* @rx_ring: Rx descriptor ring to store the page counts on
+ * @ntc: the next to clean element (not included in this frame!)
*
* This function is intended to be called right before running XDP
* program so that the page recycling mechanism will be able to take
* a correct decision regarding underlying pages; this is done in such
* way as XDP program can change the refcount of page
*/
-static void ice_get_pgcnts(struct ice_rx_ring *rx_ring)
+static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc)
{
- u32 nr_frags = rx_ring->nr_frags + 1;
u32 idx = rx_ring->first_desc;
struct ice_rx_buf *rx_buf;
u32 cnt = rx_ring->count;
- for (int i = 0; i < nr_frags; i++) {
+ while (idx != ntc) {
rx_buf = &rx_ring->rx_buf[idx];
rx_buf->pgcnt = page_count(rx_buf->page);
@@ -1154,62 +1150,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
}
/**
- * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags
+ * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame
* @rx_ring: Rx ring with all the auxiliary data
* @xdp: XDP buffer carrying linear + frags part
- * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage
- * @ntc: a current next_to_clean value to be stored at rx_ring
+ * @ntc: the next to clean element (not included in this frame!)
* @verdict: return code from XDP program execution
*
- * Walk through gathered fragments and satisfy internal page
- * recycle mechanism; we take here an action related to verdict
- * returned by XDP program;
+ * Called after XDP program is completed, or on error with verdict set to
+ * ICE_XDP_CONSUMED.
+ *
+ * Walk through buffers from first_desc to the end of the frame, releasing
+ * buffers and satisfying internal page recycle mechanism. The action depends
+ * on verdict from XDP program.
*/
static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
- u32 *xdp_xmit, u32 ntc, u32 verdict)
+ u32 ntc, u32 verdict)
{
- u32 nr_frags = rx_ring->nr_frags + 1;
u32 idx = rx_ring->first_desc;
u32 cnt = rx_ring->count;
- u32 post_xdp_frags = 1;
struct ice_rx_buf *buf;
- int i;
+ u32 xdp_frags = 0;
+ int i = 0;
if (unlikely(xdp_buff_has_frags(xdp)))
- post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags;
+ xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
- for (i = 0; i < post_xdp_frags; i++) {
+ while (idx != ntc) {
buf = &rx_ring->rx_buf[idx];
+ if (++idx == cnt)
+ idx = 0;
- if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+ /* An XDP program could release fragments from the end of the
+ * buffer. For these, we need to keep the pagecnt_bias as-is.
+ * To do this, only adjust pagecnt_bias for fragments up to
+ * the total remaining after the XDP program has run.
+ */
+ if (verdict != ICE_XDP_CONSUMED)
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
- *xdp_xmit |= verdict;
- } else if (verdict & ICE_XDP_CONSUMED) {
+ else if (i++ <= xdp_frags)
buf->pagecnt_bias++;
- } else if (verdict == ICE_XDP_PASS) {
- ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
- }
ice_put_rx_buf(rx_ring, buf);
-
- if (++idx == cnt)
- idx = 0;
- }
- /* handle buffers that represented frags released by XDP prog;
- * for these we keep pagecnt_bias as-is; refcount from struct page
- * has been decremented within XDP prog and we do not have to increase
- * the biased refcnt
- */
- for (; i < nr_frags; i++) {
- buf = &rx_ring->rx_buf[idx];
- ice_put_rx_buf(rx_ring, buf);
- if (++idx == cnt)
- idx = 0;
}
xdp->data = NULL;
rx_ring->first_desc = ntc;
- rx_ring->nr_frags = 0;
}
/**
@@ -1317,6 +1302,10 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
+ /* Increment ntc before calls to ice_put_rx_mbuf() */
+ if (++ntc == cnt)
+ ntc = 0;
+
if (!xdp->data) {
void *hard_start;
@@ -1325,24 +1314,23 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
xdp_buff_clear_frags_flag(xdp);
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
- ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED);
+ ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED);
break;
}
- if (++ntc == cnt)
- ntc = 0;
/* skip if it is NOP desc */
if (ice_is_non_eop(rx_ring, rx_desc))
continue;
- ice_get_pgcnts(rx_ring);
+ ice_get_pgcnts(rx_ring, ntc);
xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
if (xdp_verdict == ICE_XDP_PASS)
goto construct_skb;
total_rx_bytes += xdp_get_buff_len(xdp);
total_rx_pkts++;
- ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+ ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
+ xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR);
continue;
construct_skb:
@@ -1355,7 +1343,7 @@ construct_skb:
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
xdp_verdict = ICE_XDP_CONSUMED;
}
- ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict);
+ ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
if (!skb)
break;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index fef750c5f288..2fd8e78178a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -358,7 +358,6 @@ struct ice_rx_ring {
struct ice_tx_ring *xdp_ring;
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
struct xsk_buff_pool *xsk_pool;
- u32 nr_frags;
u16 max_frame;
u16 rx_buf_len;
dma_addr_t dma; /* physical address of ring */
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 266bfcf2a28f..a427f05814c1 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -345,6 +345,7 @@ struct igc_adapter {
/* LEDs */
struct mutex led_mutex;
struct igc_led_classdev *leds;
+ bool leds_available;
};
void igc_up(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e79b14d50b24..728d7ca5338b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7335,8 +7335,14 @@ static int igc_probe(struct pci_dev *pdev,
if (IS_ENABLED(CONFIG_IGC_LEDS)) {
err = igc_led_setup(adapter);
- if (err)
- goto err_register;
+ if (err) {
+ netdev_warn_once(netdev,
+ "LED init failed (%d); continuing without LED support\n",
+ err);
+ adapter->leds_available = false;
+ } else {
+ adapter->leds_available = true;
+ }
}
return 0;
@@ -7392,7 +7398,7 @@ static void igc_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->watchdog_task);
hrtimer_cancel(&adapter->hrtimer);
- if (IS_ENABLED(CONFIG_IGC_LEDS))
+ if (IS_ENABLED(CONFIG_IGC_LEDS) && adapter->leds_available)
igc_led_free(adapter);
/* Release control of h/w to f/w. If f/w is AMT enabled, this
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 80e6a2ef1350..6218bdb7f941 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6973,6 +6973,13 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
break;
}
+ /* Make sure the SWFW semaphore is in a valid state */
+ if (hw->mac.ops.init_swfw_sync)
+ hw->mac.ops.init_swfw_sync(hw);
+
+ if (hw->mac.type == ixgbe_mac_e610)
+ mutex_init(&hw->aci.lock);
+
#ifdef IXGBE_FCOE
/* FCoE support exists, always init the FCoE lock */
spin_lock_init(&adapter->fcoe.lock);
@@ -11643,10 +11650,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_sw_init;
- /* Make sure the SWFW semaphore is in a valid state */
- if (hw->mac.ops.init_swfw_sync)
- hw->mac.ops.init_swfw_sync(hw);
-
if (ixgbe_check_fw_error(adapter))
return ixgbe_recovery_probe(adapter);
@@ -11850,8 +11853,6 @@ skip_sriov:
ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
ixgbe_mac_set_default_filter(adapter);
- if (hw->mac.type == ixgbe_mac_e610)
- mutex_init(&hw->aci.lock);
timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
if (ixgbe_removed(hw->hw_addr)) {
@@ -12007,9 +12008,9 @@ err_register:
devl_unlock(adapter->devlink);
ixgbe_release_hw_control(adapter);
ixgbe_clear_interrupt_scheme(adapter);
+err_sw_init:
if (hw->mac.type == ixgbe_mac_e610)
mutex_destroy(&adapter->hw.aci.lock);
-err_sw_init:
ixgbe_disable_sriov(adapter);
adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
iounmap(adapter->io_addr);
@@ -12060,10 +12061,8 @@ static void ixgbe_remove(struct pci_dev *pdev)
set_bit(__IXGBE_REMOVING, &adapter->state);
cancel_work_sync(&adapter->service_task);
- if (adapter->hw.mac.type == ixgbe_mac_e610) {
+ if (adapter->hw.mac.type == ixgbe_mac_e610)
ixgbe_disable_link_status_events(adapter);
- mutex_destroy(&adapter->hw.aci.lock);
- }
if (adapter->mii_bus)
mdiobus_unregister(adapter->mii_bus);
@@ -12123,6 +12122,9 @@ static void ixgbe_remove(struct pci_dev *pdev)
disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
free_netdev(netdev);
+ if (adapter->hw.mac.type == ixgbe_mac_e610)
+ mutex_destroy(&adapter->hw.aci.lock);
+
if (disable_dev)
pci_disable_device(pdev);
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 24499bb36c00..bcea3fc26a8c 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -1124,11 +1124,24 @@ static int octep_set_features(struct net_device *dev, netdev_features_t features
return err;
}
+static bool octep_is_vf_valid(struct octep_device *oct, int vf)
+{
+ if (vf >= CFG_GET_ACTIVE_VFS(oct->conf)) {
+ netdev_err(oct->netdev, "Invalid VF ID %d\n", vf);
+ return false;
+ }
+
+ return true;
+}
+
static int octep_get_vf_config(struct net_device *dev, int vf,
struct ifla_vf_info *ivi)
{
struct octep_device *oct = netdev_priv(dev);
+ if (!octep_is_vf_valid(oct, vf))
+ return -EINVAL;
+
ivi->vf = vf;
ether_addr_copy(ivi->mac, oct->vf_info[vf].mac_addr);
ivi->spoofchk = true;
@@ -1143,6 +1156,9 @@ static int octep_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
struct octep_device *oct = netdev_priv(dev);
int err;
+ if (!octep_is_vf_valid(oct, vf))
+ return -EINVAL;
+
if (!is_valid_ether_addr(mac)) {
dev_err(&oct->pdev->dev, "Invalid MAC Address %pM\n", mac);
return -EADDRNOTAVAIL;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
index ebecdd29f3bd..0867fab61b19 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
@@ -196,6 +196,7 @@ static void octep_pfvf_get_mac_addr(struct octep_device *oct, u32 vf_id,
vf_id);
return;
}
+ ether_addr_copy(oct->vf_info[vf_id].mac_addr, rsp->s_set_mac.mac_addr);
rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK;
}
@@ -205,6 +206,8 @@ static void octep_pfvf_dev_remove(struct octep_device *oct, u32 vf_id,
{
int err;
+ /* Reset VF-specific information maintained by the PF */
+ memset(&oct->vf_info[vf_id], 0, sizeof(struct octep_pfvf_info));
err = octep_ctrl_net_dev_remove(oct, vf_id);
if (err) {
rsp->s.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
index e52cc6b1a26c..dedd586ed310 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -491,7 +491,7 @@ void otx2_ptp_destroy(struct otx2_nic *pfvf)
if (!ptp)
return;
- cancel_delayed_work(&pfvf->ptp->synctstamp_work);
+ cancel_delayed_work_sync(&pfvf->ptp->synctstamp_work);
ptp_clock_unregister(ptp->ptp_clock);
kfree(ptp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 9560fcba643f..ac65e3191480 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -92,6 +92,7 @@ enum {
MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL,
MLX5E_ACCEL_FS_POL_FT_LEVEL,
+ MLX5E_ACCEL_FS_POL_MISS_FT_LEVEL,
MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL,
#endif
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index ffcd0cdeb775..23703f28386a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -185,6 +185,7 @@ struct mlx5e_ipsec_rx_create_attr {
u32 family;
int prio;
int pol_level;
+ int pol_miss_level;
int sa_level;
int status_level;
enum mlx5_flow_namespace_type chains_ns;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 98b6a3a623f9..65dc3529283b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -747,6 +747,7 @@ static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
attr->family = family;
attr->prio = MLX5E_NIC_PRIO;
attr->pol_level = MLX5E_ACCEL_FS_POL_FT_LEVEL;
+ attr->pol_miss_level = MLX5E_ACCEL_FS_POL_MISS_FT_LEVEL;
attr->sa_level = MLX5E_ACCEL_FS_ESP_FT_LEVEL;
attr->status_level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL;
attr->chains_ns = MLX5_FLOW_NAMESPACE_KERNEL;
@@ -833,7 +834,7 @@ static int ipsec_rx_chains_create_miss(struct mlx5e_ipsec *ipsec,
ft_attr.max_fte = 1;
ft_attr.autogroup.max_num_groups = 1;
- ft_attr.level = attr->pol_level;
+ ft_attr.level = attr->pol_miss_level;
ft_attr.prio = attr->prio;
ft = mlx5_create_auto_grouped_flow_table(attr->ns, &ft_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e680673ffb72..15eded36b872 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -139,8 +139,6 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv)
if (up) {
netdev_info(priv->netdev, "Link up\n");
netif_carrier_on(priv->netdev);
- mlx5e_port_manual_buffer_config(priv, 0, priv->netdev->mtu,
- NULL, NULL, NULL);
} else {
netdev_info(priv->netdev, "Link down\n");
netif_carrier_off(priv->netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 63a7a788fb0d..cd0242eb008c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1506,12 +1506,21 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
static int
mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ int err;
+
+ netdev = mlx5_uplink_netdev_get(dev);
+ if (!netdev)
+ return 0;
+ priv = netdev_priv(netdev);
rpriv->netdev = priv->netdev;
- return mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
- rpriv);
+ err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
+ rpriv);
+ mlx5_uplink_netdev_put(dev, netdev);
+ return err;
}
static void
@@ -1638,8 +1647,16 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
struct net_device *netdev = rpriv->netdev;
- struct mlx5e_priv *priv = netdev_priv(netdev);
- void *ppriv = priv->ppriv;
+ struct mlx5e_priv *priv;
+ void *ppriv;
+
+ if (!netdev) {
+ ppriv = rpriv;
+ goto free_ppriv;
+ }
+
+ priv = netdev_priv(netdev);
+ ppriv = priv->ppriv;
if (rep->vport == MLX5_VPORT_UPLINK) {
mlx5e_vport_uplink_rep_unload(rpriv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 8b4977650183..5f2d6c35f1ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -1515,6 +1515,7 @@ static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
speed = lksettings.base.speed;
out:
+ mlx5_uplink_netdev_put(mdev, slave);
return speed;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index cb165085a4c1..db552c012b4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -114,9 +114,9 @@
#define ETHTOOL_NUM_PRIOS 11
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy,
- * {IPsec RoCE MPV,Alias table},IPsec RoCE policy
+ * IPsec policy miss, {IPsec RoCE MPV,Alias table},IPsec RoCE policy
*/
-#define KERNEL_NIC_PRIO_NUM_LEVELS 10
+#define KERNEL_NIC_PRIO_NUM_LEVELS 11
#define KERNEL_NIC_NUM_PRIOS 1
/* One more level for tc, and one more for promisc */
#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index b111ccd03b02..74ea5da58b7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -47,7 +47,20 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
{
- return mdev->mlx5e_res.uplink_netdev;
+ struct mlx5e_resources *mlx5e_res = &mdev->mlx5e_res;
+ struct net_device *netdev;
+
+ mutex_lock(&mlx5e_res->uplink_netdev_lock);
+ netdev = mlx5e_res->uplink_netdev;
+ netdev_hold(netdev, &mlx5e_res->tracker, GFP_KERNEL);
+ mutex_unlock(&mlx5e_res->uplink_netdev_lock);
+ return netdev;
+}
+
+static inline void mlx5_uplink_netdev_put(struct mlx5_core_dev *mdev,
+ struct net_device *netdev)
+{
+ netdev_put(netdev, &mdev->mlx5e_res.tracker);
}
struct mlx5_sd;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 2d7adf7444ba..aa9f2b0a77d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1170,7 +1170,11 @@ const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev,
mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size,
force_legacy);
i = find_first_bit(&temp, max_size);
- if (i < max_size)
+
+ /* mlx5e_link_info has holes. Check speed
+ * is not zero as indication of one.
+ */
+ if (i < max_size && table[i].speed)
return &table[i];
return NULL;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 56d5464222d9..cdbf82affa7b 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -820,7 +820,7 @@ static void rx_irq(struct net_device *ndev)
struct ns83820 *dev = PRIV(ndev);
struct rx_info *info = &dev->rx_info;
unsigned next_rx;
- int rx_rc, len;
+ int len;
u32 cmdsts;
__le32 *desc;
unsigned long flags;
@@ -881,8 +881,10 @@ static void rx_irq(struct net_device *ndev)
if (likely(CMDSTS_OK & cmdsts)) {
#endif
skb_put(skb, len);
- if (unlikely(!skb))
+ if (unlikely(!skb)) {
+ ndev->stats.rx_dropped++;
goto netdev_mangle_me_harder_failed;
+ }
if (cmdsts & CMDSTS_DEST_MULTI)
ndev->stats.multicast++;
ndev->stats.rx_packets++;
@@ -901,15 +903,12 @@ static void rx_irq(struct net_device *ndev)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_IPV6), tag);
}
#endif
- rx_rc = netif_rx(skb);
- if (NET_RX_DROP == rx_rc) {
-netdev_mangle_me_harder_failed:
- ndev->stats.rx_dropped++;
- }
+ netif_rx(skb);
} else {
dev_kfree_skb_irq(skb);
}
+netdev_mangle_me_harder_failed:
nr++;
next_rx = info->next_rx;
desc = info->descs + (DESC_SIZE * next_rx);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 9c3d3dd2f847..1f0cea3cae92 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -4462,10 +4462,11 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
goto out;
}
- /* Add override window info to buffer */
+ /* Add override window info to buffer, preventing buffer overflow */
override_window_dwords =
- qed_rd(p_hwfn, p_ptt, GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) *
- PROTECTION_OVERRIDE_ELEMENT_DWORDS;
+ min(qed_rd(p_hwfn, p_ptt, GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) *
+ PROTECTION_OVERRIDE_ELEMENT_DWORDS,
+ PROTECTION_OVERRIDE_DEPTH_DWORDS);
if (override_window_dwords) {
addr = BYTES_TO_DWORDS(GRC_REG_PROTECTION_OVERRIDE_WINDOW);
offset += qed_grc_dump_addr_range(p_hwfn,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c
index d912e709a92c..bb03dad4a300 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/tx.c
@@ -2092,7 +2092,7 @@ static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans,
break;
}
- if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_9000 &&
+ if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_7000 &&
trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
len = DIV_ROUND_UP(len, 4);
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index 18fb44139de2..4d0a38e06f08 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -239,6 +239,14 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"),
}
},
+ {
+ .ident = "MECHREVO Yilong15Pro Series GM5HG7A",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MECHREVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Yilong15Pro Series GM5HG7A"),
+ }
+ },
/* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */
{
.ident = "PCSpecialist Lafite Pro V 14M",
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index ef988605c4da..bc544a4a5266 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -403,6 +403,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = {
{"AMDI0103", 0},
{"AMDI0105", 0},
{"AMDI0107", 0},
+ {"AMDI0108", 0},
{ }
};
MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids);
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 3a488cf9ca06..6a62bc5b02fd 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -673,6 +673,8 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code,
if (atkbd_reports_vol_keys)
*code = ASUS_WMI_KEY_IGNORE;
break;
+ case 0x5D: /* Wireless console Toggle */
+ case 0x5E: /* Wireless console Enable / Keyboard Attach, Detach */
case 0x5F: /* Wireless console Disable / Special Key */
if (quirks->key_wlan_event)
*code = quirks->key_wlan_event;
diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c
index eb076bb4099b..54377b282ff8 100644
--- a/drivers/platform/x86/oxpec.c
+++ b/drivers/platform/x86/oxpec.c
@@ -126,6 +126,13 @@ static const struct dmi_system_id dmi_table[] = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"),
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "AOKZOE A1X"),
+ },
+ .driver_data = (void *)oxp_fly,
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AYANEO"),
DMI_MATCH(DMI_BOARD_NAME, "AYANEO 2"),
},
@@ -306,6 +313,13 @@ static const struct dmi_system_id dmi_table[] = {
},
.driver_data = (void *)oxp_x1,
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"),
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1Pro EVA-02"),
+ },
+ .driver_data = (void *)oxp_x1,
+ },
{},
};
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 93dcebbe1141..ad2d9ecf32a5 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1919,8 +1919,8 @@ static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
- if ((cache.flags & 0xff) == 0xff)
- cache.flags = -1; /* read error */
+ if (di->chip == BQ27000 && (cache.flags & 0xff) == 0xff)
+ cache.flags = -ENODEV; /* bq27000 hdq read error */
if (cache.flags >= 0) {
cache.capacity = bq27xxx_battery_read_soc(di);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9bf282d2453c..499a9edf0ca3 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1795,7 +1795,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
- return bg1->used > bg2->used;
+ /*
+ * Some other task may be updating the ->used field concurrently, but it
+ * is not serious if we get a stale value or load/store tearing issues,
+ * as sorting the list of block groups to reclaim is not critical and an
+ * occasional imperfect order is ok. So silence KCSAN and avoid the
+ * overhead of locking or any other synchronization.
+ */
+ return data_race(bg1->used > bg2->used);
}
static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0f8d8e275143..c0c1ddd46b67 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1843,7 +1843,6 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
struct inode *vfs_inode = &inode->vfs_inode;
@@ -1864,8 +1863,6 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item));
btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e7218e78bff4..18db1053cdf0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3885,10 +3885,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
bool filled = false;
int first_xattr_slot;
- ret = btrfs_init_file_extent_tree(inode);
- if (ret)
- goto out;
-
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
filled = true;
@@ -3920,8 +3916,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime),
btrfs_timespec_nsec(leaf, &inode_item->atime));
@@ -3953,6 +3947,11 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
btrfs_set_inode_mapping_order(inode);
cache_index:
+ ret = btrfs_init_file_extent_tree(inode);
+ if (ret)
+ goto out;
+ btrfs_inode_set_file_extent_range(inode, 0,
+ round_up(i_size_read(vfs_inode), fs_info->sectorsize));
/*
* If we were modified in the current generation and evicted from memory
* and then re-read we need to do a full sync since we don't have any
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7d5d90845ca9..7a63afedd01e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1964,7 +1964,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_EXTREF_KEY;
- search_key.offset = key->objectid;
+ search_key.offset = btrfs_extref_hash(key->objectid, name.name, name.len);
ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
if (ret < 0) {
goto out;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index ea662036f441..efc2a81f50e5 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2582,9 +2582,9 @@ again:
spin_lock(&space_info->lock);
space_info->total_bytes -= bg->length;
space_info->disk_total -= bg->length * factor;
+ space_info->disk_total -= bg->zone_unusable;
/* There is no allocation ever happened. */
ASSERT(bg->used == 0);
- ASSERT(bg->zone_unusable == 0);
/* No super block in a block group on the zoned setup. */
ASSERT(bg->bytes_super == 0);
spin_unlock(&space_info->lock);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 14868a3dd592..bc52afbfc5c7 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -1075,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
************************************************************************/
static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
+ struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%d.%d\n",
NILFS_CURRENT_REV, NILFS_MINOR_REV);
@@ -1087,7 +1087,7 @@ static const char features_readme_str[] =
"(1) revision\n\tshow current revision of NILFS file system driver.\n";
static ssize_t nilfs_feature_README_show(struct kobject *kobj,
- struct attribute *attr,
+ struct kobj_attribute *attr,
char *buf)
{
return sysfs_emit(buf, features_readme_str);
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
index 78a87a016928..d370cd5cce3f 100644
--- a/fs/nilfs2/sysfs.h
+++ b/fs/nilfs2/sysfs.h
@@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups {
struct completion sg_segments_kobj_unregister;
};
-#define NILFS_COMMON_ATTR_STRUCT(name) \
+#define NILFS_KOBJ_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
- ssize_t (*show)(struct kobject *, struct attribute *, \
+ ssize_t (*show)(struct kobject *, struct kobj_attribute *, \
char *); \
- ssize_t (*store)(struct kobject *, struct attribute *, \
+ ssize_t (*store)(struct kobject *, struct kobj_attribute *, \
const char *, size_t); \
}
-NILFS_COMMON_ATTR_STRUCT(feature);
+NILFS_KOBJ_ATTR_STRUCT(feature);
#define NILFS_DEV_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 5466aa8c39b1..6550bd9f002c 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -554,7 +554,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
case SMB_DIRECT_MSG_DATA_TRANSFER: {
struct smb_direct_data_transfer *data_transfer =
(struct smb_direct_data_transfer *)recvmsg->packet;
- unsigned int data_length;
+ u32 remaining_data_length, data_offset, data_length;
int avail_recvmsg_count, receive_credits;
if (wc->byte_len <
@@ -564,15 +564,25 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length);
data_length = le32_to_cpu(data_transfer->data_length);
- if (data_length) {
- if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
- (u64)data_length) {
- put_recvmsg(t, recvmsg);
- smb_direct_disconnect_rdma_connection(t);
- return;
- }
+ data_offset = le32_to_cpu(data_transfer->data_offset);
+ if (wc->byte_len < data_offset ||
+ wc->byte_len < (u64)data_offset + data_length) {
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
+ return;
+ }
+ if (remaining_data_length > t->max_fragmented_recv_size ||
+ data_length > t->max_fragmented_recv_size ||
+ (u64)remaining_data_length + (u64)data_length >
+ (u64)t->max_fragmented_recv_size) {
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
+ return;
+ }
+ if (data_length) {
if (t->full_packet_received)
recvmsg->first_segment = true;
@@ -1209,78 +1219,130 @@ static int smb_direct_writev(struct ksmbd_transport *t,
bool need_invalidate, unsigned int remote_key)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
- int remaining_data_length;
- int start, i, j;
- int max_iov_size = st->max_send_size -
+ size_t remaining_data_length;
+ size_t iov_idx;
+ size_t iov_ofs;
+ size_t max_iov_size = st->max_send_size -
sizeof(struct smb_direct_data_transfer);
int ret;
- struct kvec vec;
struct smb_direct_send_ctx send_ctx;
+ int error = 0;
if (st->status != SMB_DIRECT_CS_CONNECTED)
return -ENOTCONN;
//FIXME: skip RFC1002 header..
+ if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4))
+ return -EINVAL;
buflen -= 4;
+ iov_idx = 1;
+ iov_ofs = 0;
remaining_data_length = buflen;
ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
- start = i = 1;
- buflen = 0;
- while (true) {
- buflen += iov[i].iov_len;
- if (buflen > max_iov_size) {
- if (i > start) {
- remaining_data_length -=
- (buflen - iov[i].iov_len);
- ret = smb_direct_post_send_data(st, &send_ctx,
- &iov[start], i - start,
- remaining_data_length);
- if (ret)
+ while (remaining_data_length) {
+ struct kvec vecs[SMB_DIRECT_MAX_SEND_SGES - 1]; /* minus smbdirect hdr */
+ size_t possible_bytes = max_iov_size;
+ size_t possible_vecs;
+ size_t bytes = 0;
+ size_t nvecs = 0;
+
+ /*
+ * For the last message remaining_data_length should be
+ * have been 0 already!
+ */
+ if (WARN_ON_ONCE(iov_idx >= niovs)) {
+ error = -EINVAL;
+ goto done;
+ }
+
+ /*
+ * We have 2 factors which limit the arguments we pass
+ * to smb_direct_post_send_data():
+ *
+ * 1. The number of supported sges for the send,
+ * while one is reserved for the smbdirect header.
+ * And we currently need one SGE per page.
+ * 2. The number of negotiated payload bytes per send.
+ */
+ possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx);
+
+ while (iov_idx < niovs && possible_vecs && possible_bytes) {
+ struct kvec *v = &vecs[nvecs];
+ int page_count;
+
+ v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs;
+ v->iov_len = min_t(size_t,
+ iov[iov_idx].iov_len - iov_ofs,
+ possible_bytes);
+ page_count = get_buf_page_count(v->iov_base, v->iov_len);
+ if (page_count > possible_vecs) {
+ /*
+ * If the number of pages in the buffer
+ * is to much (because we currently require
+ * one SGE per page), we need to limit the
+ * length.
+ *
+ * We know possible_vecs is at least 1,
+ * so we always keep the first page.
+ *
+ * We need to calculate the number extra
+ * pages (epages) we can also keep.
+ *
+ * We calculate the number of bytes in the
+ * first page (fplen), this should never be
+ * larger than v->iov_len because page_count is
+ * at least 2, but adding a limitation feels
+ * better.
+ *
+ * Then we calculate the number of bytes (elen)
+ * we can keep for the extra pages.
+ */
+ size_t epages = possible_vecs - 1;
+ size_t fpofs = offset_in_page(v->iov_base);
+ size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len);
+ size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE);
+
+ v->iov_len = fplen + elen;
+ page_count = get_buf_page_count(v->iov_base, v->iov_len);
+ if (WARN_ON_ONCE(page_count > possible_vecs)) {
+ /*
+ * Something went wrong in the above
+ * logic...
+ */
+ error = -EINVAL;
goto done;
- } else {
- /* iov[start] is too big, break it */
- int nvec = (buflen + max_iov_size - 1) /
- max_iov_size;
-
- for (j = 0; j < nvec; j++) {
- vec.iov_base =
- (char *)iov[start].iov_base +
- j * max_iov_size;
- vec.iov_len =
- min_t(int, max_iov_size,
- buflen - max_iov_size * j);
- remaining_data_length -= vec.iov_len;
- ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
- remaining_data_length);
- if (ret)
- goto done;
}
- i++;
- if (i == niovs)
- break;
}
- start = i;
- buflen = 0;
- } else {
- i++;
- if (i == niovs) {
- /* send out all remaining vecs */
- remaining_data_length -= buflen;
- ret = smb_direct_post_send_data(st, &send_ctx,
- &iov[start], i - start,
- remaining_data_length);
- if (ret)
- goto done;
- break;
+ possible_vecs -= page_count;
+ nvecs += 1;
+ possible_bytes -= v->iov_len;
+ bytes += v->iov_len;
+
+ iov_ofs += v->iov_len;
+ if (iov_ofs >= iov[iov_idx].iov_len) {
+ iov_idx += 1;
+ iov_ofs = 0;
}
}
+
+ remaining_data_length -= bytes;
+
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ vecs, nvecs,
+ remaining_data_length);
+ if (unlikely(ret)) {
+ error = ret;
+ goto done;
+ }
}
done:
ret = smb_direct_flush_send_list(st, &send_ctx, true);
+ if (unlikely(!ret && error))
+ ret = error;
/*
* As an optimization, we don't wait for individual I/O to finish
@@ -1744,6 +1806,11 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
return -EINVAL;
}
+ if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("warning: device max_send_sge = %d too small\n",
+ device->attrs.max_send_sge);
+ return -EINVAL;
+ }
if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
pr_err("warning: device max_recv_sge = %d too small\n",
device->attrs.max_recv_sge);
@@ -1767,7 +1834,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_wr = max_send_wrs;
cap->max_recv_wr = t->recv_credit_max;
- cap->max_send_sge = max_sge_per_wr;
+ cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
cap->max_rdma_ctxs = t->max_rw_credits;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 404883c7af6e..4000ff16f295 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -8,8 +8,8 @@
#include <linux/bits.h>
#include <linux/kvm.h>
#include <linux/irqreturn.h>
-#include <linux/kref.h>
#include <linux/mutex.h>
+#include <linux/refcount.h>
#include <linux/spinlock.h>
#include <linux/static_key.h>
#include <linux/types.h>
@@ -139,10 +139,13 @@ struct vgic_irq {
bool pending_latch; /* The pending latch state used to calculate
* the pending state for both level
* and edge triggered IRQs. */
- bool active; /* not used for LPIs */
+ bool active;
+ bool pending_release; /* Used for LPIs only, unreferenced IRQ
+ * pending a release */
+
bool enabled;
bool hw; /* Tied to HW IRQ */
- struct kref refcount; /* Used for LPIs */
+ refcount_t refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
union {
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f13664c62ddd..9e62b2a85538 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -636,6 +636,7 @@ struct damon_operations {
* @data: Data that will be passed to @fn.
* @repeat: Repeat invocations.
* @return_code: Return code from @fn invocation.
+ * @dealloc_on_cancel: De-allocate when canceled.
*
* Control damon_call(), which requests specific kdamond to invoke a given
* function. Refer to damon_call() for more details.
@@ -645,6 +646,7 @@ struct damon_call_control {
void *data;
bool repeat;
int return_code;
+ bool dealloc_on_cancel;
/* private: internal use only */
/* informs if the kdamond finished handling of the request */
struct completion completion;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c5fbfb85749..10fe492e1fed 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -663,6 +663,7 @@ struct mlx5e_resources {
bool tisn_valid;
} hw_objs;
struct net_device *uplink_netdev;
+ netdevice_tracker tracker;
struct mutex uplink_netdev_lock;
struct mlx5_crypto_dek_priv *dek_priv;
};
diff --git a/include/linux/rv.h b/include/linux/rv.h
index 14410a42faef..9520aab34bcb 100644
--- a/include/linux/rv.h
+++ b/include/linux/rv.h
@@ -7,16 +7,14 @@
#ifndef _LINUX_RV_H
#define _LINUX_RV_H
-#include <linux/types.h>
-#include <linux/list.h>
-
#define MAX_DA_NAME_LEN 32
#define MAX_DA_RETRY_RACING_EVENTS 3
#ifdef CONFIG_RV
+#include <linux/array_size.h>
#include <linux/bitops.h>
+#include <linux/list.h>
#include <linux/types.h>
-#include <linux/array_size.h>
/*
* Deterministic automaton per-object variables.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fe6ed2cc3fd..7012a0f758d8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -385,6 +385,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
+static inline bool folio_may_be_lru_cached(struct folio *folio)
+{
+ /*
+ * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting.
+ * Holding small numbers of low-order mTHP folios in per-CPU LRU cache
+ * will be sensible, but nobody has implemented and tested that yet.
+ */
+ return !folio_test_large(folio);
+}
+
extern atomic_t lru_disable_count;
static inline bool lru_cache_disabled(void)
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 4160731dcb6e..1fc2fb03ce3f 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -3,6 +3,7 @@
#define __NET_DST_METADATA_H 1
#include <linux/skbuff.h>
+#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/macsec.h>
#include <net/dst.h>
@@ -220,9 +221,15 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
int md_size)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct metadata_dst *tun_dst;
+
+ tun_dst = __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+ 0, flags, tunnel_id, md_size);
- return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
- 0, flags, tunnel_id, md_size);
+ if (tun_dst && (iph->frag_off & htons(IP_DF)))
+ __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT,
+ tun_dst->u.tun_info.key.tun_flags);
+ return tun_dst;
}
static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr,
diff --git a/include/net/sock.h b/include/net/sock.h
index fb13322a11fc..2e14283c5be1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2061,6 +2061,9 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
if (sock) {
WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
+ } else {
+ /* Note: sk_uid is unchanged. */
+ WRITE_ONCE(sk->sk_ino, 0);
}
}
@@ -2082,8 +2085,6 @@ static inline void sock_orphan(struct sock *sk)
sock_set_flag(sk, SOCK_DEAD);
sk_set_socket(sk, NULL);
sk->sk_wq = NULL;
- /* Note: sk_uid is unchanged. */
- WRITE_ONCE(sk->sk_ino, 0);
write_unlock_bh(&sk->sk_callback_lock);
}
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 67d015df8893..5fd5b4cf75ca 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -31,6 +31,8 @@
#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
+#define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0)
+
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index 6ac84b2f636c..7359d34da446 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -16,10 +16,10 @@
* good time to allocate memory and send ADD_ADDR if needed. Depending on the
* traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
* is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new
* subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token.
* @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer.
* Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 312c6a8b55bb..77d02f87f3f1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -126,8 +126,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
* of concurrent destructions. Use a separate workqueue so that cgroup
* destruction work items don't end up filling up max_active of system_wq
* which may lead to deadlock.
+ *
+ * A cgroup destruction should enqueue work sequentially to:
+ * cgroup_offline_wq: use for css offline work
+ * cgroup_release_wq: use for css release work
+ * cgroup_free_wq: use for free work
+ *
+ * Rationale for using separate workqueues:
+ * The cgroup root free work may depend on completion of other css offline
+ * operations. If all tasks were enqueued to a single workqueue, this could
+ * create a deadlock scenario where:
+ * - Free work waits for other css offline work to complete.
+ * - But other css offline work is queued after free work in the same queue.
+ *
+ * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
+ * 1. umount net_prio
+ * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
+ * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
+ * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
+ * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
+ * which can never complete as it's behind in the same queue and
+ * workqueue's max_active is 1.
*/
-static struct workqueue_struct *cgroup_destroy_wq;
+static struct workqueue_struct *cgroup_offline_wq;
+static struct workqueue_struct *cgroup_release_wq;
+static struct workqueue_struct *cgroup_free_wq;
/* generate an array of cgroup subsystem pointers */
#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@ -4159,6 +4182,7 @@ static void cgroup_file_release(struct kernfs_open_file *of)
cft->release(of);
put_cgroup_ns(ctx->ns);
kfree(ctx);
+ of->priv = NULL;
}
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
@@ -5558,7 +5582,7 @@ static void css_release_work_fn(struct work_struct *work)
cgroup_unlock();
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
}
static void css_release(struct percpu_ref *ref)
@@ -5567,7 +5591,7 @@ static void css_release(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_release_wq, &css->destroy_work);
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5701,7 +5725,7 @@ err_list_del:
list_del_rcu(&css->sibling);
err_free_css:
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
return ERR_PTR(err);
}
@@ -5939,7 +5963,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
if (atomic_dec_and_test(&css->online_cnt)) {
INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_offline_wq, &css->destroy_work);
}
}
@@ -6325,8 +6349,14 @@ static int __init cgroup_wq_init(void)
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
- cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
- BUG_ON(!cgroup_destroy_wq);
+ cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
+ BUG_ON(!cgroup_offline_wq);
+
+ cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
+ BUG_ON(!cgroup_release_wq);
+
+ cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
+ BUG_ON(!cgroup_free_wq);
return 0;
}
core_initcall(cgroup_wq_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index be00629f0ba4..ccba6fc3c3fe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9551,7 +9551,7 @@ static unsigned long tg_weight(struct task_group *tg)
#ifdef CONFIG_FAIR_GROUP_SCHED
return scale_load_down(tg->shares);
#else
- return sched_weight_from_cgroup(tg->scx_weight);
+ return sched_weight_from_cgroup(tg->scx.weight);
#endif
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 4ae32ef179dd..088ceff38c8a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -6788,12 +6788,8 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(void)
* CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
* the current local DSQ for running tasks and thus are not
* visible to the BPF scheduler.
- *
- * Also skip re-enqueueing tasks that can only run on this
- * CPU, as they would just be re-added to the same local
- * DSQ without any benefit.
*/
- if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1)
+ if (p->migration_pending)
continue;
dispatch_dequeue(rq, p);
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c
index eea447b06907..c1347da69e9d 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.c
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -127,7 +127,9 @@ static void handle_sys_enter(void *data, struct pt_regs *regs, long id)
mon = ltl_get_monitor(current);
switch (id) {
+#ifdef __NR_clock_nanosleep
case __NR_clock_nanosleep:
+#endif
#ifdef __NR_clock_nanosleep_time64
case __NR_clock_nanosleep_time64:
#endif
@@ -138,7 +140,9 @@ static void handle_sys_enter(void *data, struct pt_regs *regs, long id)
ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, true);
break;
+#ifdef __NR_futex
case __NR_futex:
+#endif
#ifdef __NR_futex_time64
case __NR_futex_time64:
#endif
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 1482e91c39f4..48338520376f 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -495,7 +495,7 @@ static void *available_monitors_next(struct seq_file *m, void *p, loff_t *pos)
*/
static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
{
- struct rv_monitor *mon = p;
+ struct rv_monitor *mon = container_of(p, struct rv_monitor, list);
(*pos)++;
@@ -805,7 +805,7 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
retval = create_monitor_dir(monitor, parent);
if (retval)
- return retval;
+ goto out_unlock;
/* keep children close to the parent for easier visualisation */
if (parent)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ccae62d4fb91..fa60362a3f31 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -908,6 +908,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
return -EINVAL;
}
buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
buf[len] = '\0';
ret = kstrtouint(buf, 0, &maxactive);
if (ret || !maxactive) {
diff --git a/mm/damon/core.c b/mm/damon/core.c
index c2e0b469fd43..08065b363972 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2479,10 +2479,14 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel)
mutex_lock(&ctx->call_controls_lock);
list_del(&control->list);
mutex_unlock(&ctx->call_controls_lock);
- if (!control->repeat)
+ if (!control->repeat) {
complete(&control->completion);
- else
+ } else if (control->canceled && control->dealloc_on_cancel) {
+ kfree(control);
+ continue;
+ } else {
list_add(&control->list, &repeat_controls);
+ }
}
control = list_first_entry_or_null(&repeat_controls,
struct damon_call_control, list);
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 7b9254cadd5f..c96c2154128f 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1534,14 +1534,10 @@ static int damon_sysfs_repeat_call_fn(void *data)
return 0;
}
-static struct damon_call_control damon_sysfs_repeat_call_control = {
- .fn = damon_sysfs_repeat_call_fn,
- .repeat = true,
-};
-
static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
{
struct damon_ctx *ctx;
+ struct damon_call_control *repeat_call_control;
int err;
if (damon_sysfs_kdamond_running(kdamond))
@@ -1554,18 +1550,29 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
damon_destroy_ctx(kdamond->damon_ctx);
kdamond->damon_ctx = NULL;
+ repeat_call_control = kmalloc(sizeof(*repeat_call_control),
+ GFP_KERNEL);
+ if (!repeat_call_control)
+ return -ENOMEM;
+
ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
- if (IS_ERR(ctx))
+ if (IS_ERR(ctx)) {
+ kfree(repeat_call_control);
return PTR_ERR(ctx);
+ }
err = damon_start(&ctx, 1, false);
if (err) {
+ kfree(repeat_call_control);
damon_destroy_ctx(ctx);
return err;
}
kdamond->damon_ctx = ctx;
- damon_sysfs_repeat_call_control.data = kdamond;
- damon_call(ctx, &damon_sysfs_repeat_call_control);
+ repeat_call_control->fn = damon_sysfs_repeat_call_fn;
+ repeat_call_control->data = kdamond;
+ repeat_call_control->repeat = true;
+ repeat_call_control->dealloc_on_cancel = true;
+ damon_call(ctx, repeat_call_control);
return err;
}
diff --git a/mm/gup.c b/mm/gup.c
index adffe663594d..0bc4d140fc07 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2287,8 +2287,8 @@ static unsigned long collect_longterm_unpinnable_folios(
struct pages_or_folios *pofs)
{
unsigned long collected = 0;
- bool drain_allow = true;
struct folio *folio;
+ int drained = 0;
long i = 0;
for (folio = pofs_get_folio(pofs, i); folio;
@@ -2307,9 +2307,17 @@ static unsigned long collect_longterm_unpinnable_folios(
continue;
}
- if (!folio_test_lru(folio) && drain_allow) {
+ if (drained == 0 && folio_may_be_lru_cached(folio) &&
+ folio_ref_count(folio) !=
+ folio_expected_ref_count(folio) + 1) {
+ lru_add_drain();
+ drained = 1;
+ }
+ if (drained == 1 && folio_may_be_lru_cached(folio) &&
+ folio_ref_count(folio) !=
+ folio_expected_ref_count(folio) + 1) {
lru_add_drain_all();
- drain_allow = false;
+ drained = 2;
}
if (!folio_isolate_lru(folio))
diff --git a/mm/mlock.c b/mm/mlock.c
index a1d93ad33c6d..bb0776f5ef7c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -255,7 +255,7 @@ void mlock_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -278,7 +278,7 @@ void mlock_new_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_new(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -299,7 +299,7 @@ void munlock_folio(struct folio *folio)
*/
folio_get(folio);
if (!folio_batch_add(fbatch, folio) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
diff --git a/mm/swap.c b/mm/swap.c
index 3632dd061beb..b74ebe865dd9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -164,6 +164,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
for (i = 0; i < folio_batch_count(fbatch); i++) {
struct folio *folio = fbatch->folios[i];
+ /* block memcg migration while the folio moves between lru */
+ if (move_fn != lru_add && !folio_test_clear_lru(folio))
+ continue;
+
folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
move_fn(lruvec, folio);
@@ -176,14 +180,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
}
static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
- struct folio *folio, move_fn_t move_fn,
- bool on_lru, bool disable_irq)
+ struct folio *folio, move_fn_t move_fn, bool disable_irq)
{
unsigned long flags;
- if (on_lru && !folio_test_clear_lru(folio))
- return;
-
folio_get(folio);
if (disable_irq)
@@ -191,8 +191,8 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
else
local_lock(&cpu_fbatches.lock);
- if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) ||
- lru_cache_disabled())
+ if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
if (disable_irq)
@@ -201,13 +201,13 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
local_unlock(&cpu_fbatches.lock);
}
-#define folio_batch_add_and_move(folio, op, on_lru) \
- __folio_batch_add_and_move( \
- &cpu_fbatches.op, \
- folio, \
- op, \
- on_lru, \
- offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq) \
+#define folio_batch_add_and_move(folio, op) \
+ __folio_batch_add_and_move( \
+ &cpu_fbatches.op, \
+ folio, \
+ op, \
+ offsetof(struct cpu_fbatches, op) >= \
+ offsetof(struct cpu_fbatches, lock_irq) \
)
static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
@@ -231,10 +231,10 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
void folio_rotate_reclaimable(struct folio *folio)
{
if (folio_test_locked(folio) || folio_test_dirty(folio) ||
- folio_test_unevictable(folio))
+ folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
- folio_batch_add_and_move(folio, lru_move_tail, true);
+ folio_batch_add_and_move(folio, lru_move_tail);
}
void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
@@ -328,10 +328,11 @@ static void folio_activate_drain(int cpu)
void folio_activate(struct folio *folio)
{
- if (folio_test_active(folio) || folio_test_unevictable(folio))
+ if (folio_test_active(folio) || folio_test_unevictable(folio) ||
+ !folio_test_lru(folio))
return;
- folio_batch_add_and_move(folio, lru_activate, true);
+ folio_batch_add_and_move(folio, lru_activate);
}
#else
@@ -507,7 +508,7 @@ void folio_add_lru(struct folio *folio)
lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
folio_set_active(folio);
- folio_batch_add_and_move(folio, lru_add, false);
+ folio_batch_add_and_move(folio, lru_add);
}
EXPORT_SYMBOL(folio_add_lru);
@@ -685,13 +686,13 @@ void lru_add_drain_cpu(int cpu)
void deactivate_file_folio(struct folio *folio)
{
/* Deactivating an unevictable folio will not accelerate reclaim */
- if (folio_test_unevictable(folio))
+ if (folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
if (lru_gen_enabled() && lru_gen_clear_refs(folio))
return;
- folio_batch_add_and_move(folio, lru_deactivate_file, true);
+ folio_batch_add_and_move(folio, lru_deactivate_file);
}
/*
@@ -704,13 +705,13 @@ void deactivate_file_folio(struct folio *folio)
*/
void folio_deactivate(struct folio *folio)
{
- if (folio_test_unevictable(folio))
+ if (folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio))
return;
- folio_batch_add_and_move(folio, lru_deactivate, true);
+ folio_batch_add_and_move(folio, lru_deactivate);
}
/**
@@ -723,10 +724,11 @@ void folio_deactivate(struct folio *folio)
void folio_mark_lazyfree(struct folio *folio)
{
if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
+ !folio_test_lru(folio) ||
folio_test_swapcache(folio) || folio_test_unevictable(folio))
return;
- folio_batch_add_and_move(folio, lru_lazyfree, true);
+ folio_batch_add_and_move(folio, lru_lazyfree);
}
void lru_add_drain(void)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a48aec8bfd92..674999999cd0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4507,7 +4507,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* ineligible */
- if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
+ if (zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
diff --git a/net/core/dev.c b/net/core/dev.c
index 93a25d87b86b..8d49b2198d07 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6965,7 +6965,7 @@ static void napi_stop_kthread(struct napi_struct *napi)
* the kthread.
*/
while (true) {
- if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state))
+ if (!test_bit(NAPI_STATE_SCHED_THREADED, &napi->state))
break;
msleep(20);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 110b3fa8a0b1..264fb82cba19 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -34,7 +34,7 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
static struct devlink_rate *
devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
{
- static struct devlink_rate *devlink_rate;
+ struct devlink_rate *devlink_rate;
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
if (devlink_rate_is_node(devlink_rate) &&
@@ -819,8 +819,8 @@ EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
*/
void devl_rate_nodes_destroy(struct devlink *devlink)
{
- static struct devlink_rate *devlink_rate, *tmp;
const struct devlink_ops *ops = devlink->ops;
+ struct devlink_rate *devlink_rate, *tmp;
devl_assert_locked(devlink);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 4f58648a27ad..92e6a681c797 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -905,7 +905,7 @@ int ethtool_net_get_ts_info_by_phc(struct net_device *dev,
int err;
if (!ops->get_ts_info)
- return -ENODEV;
+ return -EOPNOTSUPP;
/* Does ptp comes from netdev */
ethtool_init_tsinfo(info);
@@ -973,7 +973,7 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev,
int err;
err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc);
- if (err == -ENODEV) {
+ if (err == -ENODEV || err == -EOPNOTSUPP) {
struct phy_device *phy;
phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71a956fbfc55..ad76556800f2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3327,6 +3327,7 @@ int tcp_disconnect(struct sock *sk, int flags)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int old_state = sk->sk_state;
+ struct request_sock *req;
u32 seq;
if (old_state != TCP_CLOSE)
@@ -3442,6 +3443,10 @@ int tcp_disconnect(struct sock *sk, int flags)
/* Clean up fastopen related fields */
+ req = rcu_dereference_protected(tp->fastopen_rsk,
+ lockdep_sock_is_held(sk));
+ if (req)
+ reqsk_fastopen_remove(sk, req, false);
tcp_free_fastopen_req(tp);
inet_clear_bit(DEFER_CONNECT, sk);
tp->fastopen_client_fail = 0;
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index bbb8d5f0eae7..3338b6cc85c4 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -1178,7 +1178,9 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
if (!ao)
return;
- WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq);
+ /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */
+ if (skb)
+ WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq);
ao->rcv_sne = 0;
hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk))
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 2a8ea28442b2..1103b3341a70 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -985,13 +985,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
- if (mp_opt->deny_join_id0)
- WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
set_fully_established:
+ if (mp_opt->deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
mptcp_data_lock((struct sock *)msk);
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
mptcp_data_unlock((struct sock *)msk);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 50aaf259959a..ce7d42d3bd00 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -408,6 +408,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct sock *ssk)
{
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
+ u16 flags = 0;
if (err)
return err;
@@ -415,6 +416,12 @@ static int mptcp_event_created(struct sk_buff *skb,
if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
return -EMSGSIZE;
+ if (READ_ONCE(msk->pm.remote_deny_join_id0))
+ flags |= MPTCP_PM_EV_FLAG_DENY_JOIN_ID0;
+
+ if (flags && nla_put_u16(skb, MPTCP_ATTR_FLAGS, flags))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e6fd97b21e9e..5e497a83e967 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -371,6 +371,20 @@ static void mptcp_close_wake_up(struct sock *sk)
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
+static void mptcp_shutdown_subflows(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+
+ slow = lock_sock_fast(ssk);
+ tcp_shutdown(ssk, SEND_SHUTDOWN);
+ unlock_sock_fast(ssk, slow);
+ }
+}
+
/* called under the msk socket lock */
static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
@@ -395,6 +409,7 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
+ mptcp_shutdown_subflows(msk);
mptcp_set_state(sk, TCP_CLOSE);
break;
}
@@ -563,6 +578,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
+ mptcp_shutdown_subflows(msk);
mptcp_set_state(sk, TCP_CLOSE);
break;
default:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3f1b62a9fe88..f31a3a79531a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -883,6 +883,10 @@ create_child:
ctx->subflow_id = 1;
owner = mptcp_sk(ctx->conn);
+
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(owner->pm.remote_deny_join_id0, true);
+
mptcp_pm_new_connection(owner, child, 1);
/* with OoO packets we can reach here without ingress
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 28c1b0022178..bd861191157b 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -133,12 +133,15 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
&off, PAGE_SIZE);
- if (unlikely(ret != ibmr->sg_dma_len))
- return ret < 0 ? ret : -EINVAL;
+ if (unlikely(ret != ibmr->sg_dma_len)) {
+ ret = ret < 0 ? ret : -EINVAL;
+ goto out_inc;
+ }
- if (cmpxchg(&frmr->fr_state,
- FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
- return -EBUSY;
+ if (cmpxchg(&frmr->fr_state, FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) {
+ ret = -EBUSY;
+ goto out_inc;
+ }
atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
@@ -166,11 +169,10 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
/* Failure here can be because of -ENOMEM as well */
rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
- atomic_inc(&ibmr->ic->i_fastreg_wrs);
if (printk_ratelimit())
pr_warn("RDS/IB: %s returned error(%d)\n",
__func__, ret);
- goto out;
+ goto out_inc;
}
/* Wait for the registration to complete in order to prevent an invalid
@@ -179,8 +181,10 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
*/
wait_event(frmr->fr_reg_done, !frmr->fr_reg);
-out:
+ return ret;
+out_inc:
+ atomic_inc(&ibmr->ic->i_fastreg_wrs);
return ret;
}
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 41e657e97761..cf2dcec6ce5a 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -94,10 +94,10 @@ static const struct dmi_system_id rfkill_gpio_deny_table[] = {
static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
- struct gpio_desc *gpio;
+ const char *type_name = NULL;
const char *name_property;
const char *type_property;
- const char *type_name;
+ struct gpio_desc *gpio;
int ret;
if (dmi_check_system(rfkill_gpio_deny_table))
diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c
index 1e19c605bcc8..dce5a3d8a964 100644
--- a/net/rxrpc/rxgk.c
+++ b/net/rxrpc/rxgk.c
@@ -475,7 +475,7 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
struct krb5_buffer metadata;
unsigned int offset = sp->offset, len = sp->len;
size_t data_offset = 0, data_len = len;
- u32 ac;
+ u32 ac = 0;
int ret = -ENOMEM;
_enter("");
@@ -499,9 +499,10 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata,
skb, &offset, &len, &ac);
kfree(hdr);
- if (ret == -EPROTO) {
- rxrpc_abort_eproto(call, skb, ac,
- rxgk_abort_1_verify_mic_eproto);
+ if (ret < 0) {
+ if (ret != -ENOMEM)
+ rxrpc_abort_eproto(call, skb, ac,
+ rxgk_abort_1_verify_mic_eproto);
} else {
sp->offset = offset;
sp->len = len;
@@ -524,15 +525,16 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call,
struct rxgk_header hdr;
unsigned int offset = sp->offset, len = sp->len;
int ret;
- u32 ac;
+ u32 ac = 0;
_enter("");
ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac);
- if (ret == -EPROTO)
- rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret != -ENOMEM)
+ rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto);
goto error;
+ }
if (len < sizeof(hdr)) {
ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c
index b94b77a1c317..30275cb5ba3e 100644
--- a/net/rxrpc/rxgk_app.c
+++ b/net/rxrpc/rxgk_app.c
@@ -54,6 +54,10 @@ int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
_enter("");
+ if (ticket_len < 10 * sizeof(__be32))
+ return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
+ rxgk_abort_resp_short_yfs_tkt);
+
/* Get the session key length */
ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp));
if (ret < 0)
@@ -187,7 +191,7 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
struct key *server_key;
unsigned int ticket_offset, ticket_len;
u32 kvno, enctype;
- int ret, ec;
+ int ret, ec = 0;
struct {
__be32 kvno;
@@ -195,22 +199,23 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
__be32 token_len;
} container;
+ if (token_len < sizeof(container))
+ goto short_packet;
+
/* Decode the RXGK_TokenContainer object. This tells us which server
* key we should be using. We can then fetch the key, get the secret
* and set up the crypto to extract the token.
*/
if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0)
- return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
- rxgk_abort_resp_tok_short);
+ goto short_packet;
kvno = ntohl(container.kvno);
enctype = ntohl(container.enctype);
ticket_len = ntohl(container.token_len);
ticket_offset = token_offset + sizeof(container);
- if (xdr_round_up(ticket_len) > token_len - 3 * 4)
- return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
- rxgk_abort_resp_tok_short);
+ if (xdr_round_up(ticket_len) > token_len - sizeof(container))
+ goto short_packet;
_debug("KVNO %u", kvno);
_debug("ENC %u", enctype);
@@ -236,9 +241,11 @@ int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
&ticket_offset, &ticket_len, &ec);
crypto_free_aead(token_enc);
token_enc = NULL;
- if (ret < 0)
- return rxrpc_abort_conn(conn, skb, ec, ret,
- rxgk_abort_resp_tok_dec);
+ if (ret < 0) {
+ if (ret != -ENOMEM)
+ return rxrpc_abort_conn(conn, skb, ec, ret,
+ rxgk_abort_resp_tok_dec);
+ }
ret = conn->security->default_decode_ticket(conn, skb, ticket_offset,
ticket_len, _key);
@@ -283,4 +290,8 @@ temporary_error:
* also come out this way if the ticket decryption fails.
*/
return ret;
+
+short_packet:
+ return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_resp_tok_short);
}
diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h
index 7370a5655985..80164d89e19c 100644
--- a/net/rxrpc/rxgk_common.h
+++ b/net/rxrpc/rxgk_common.h
@@ -88,11 +88,16 @@ int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
*_offset += offset;
*_len = len;
break;
+ case -EBADMSG: /* Checksum mismatch. */
case -EPROTO:
- case -EBADMSG:
*_error_code = RXGK_SEALEDINCON;
break;
+ case -EMSGSIZE:
+ *_error_code = RXGK_PACKETSHORT;
+ break;
+ case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */
default:
+ *_error_code = RXGK_INCONSISTENCY;
break;
}
@@ -127,11 +132,16 @@ int rxgk_verify_mic_skb(const struct krb5_enctype *krb5,
*_offset += offset;
*_len = len;
break;
+ case -EBADMSG: /* Checksum mismatch */
case -EPROTO:
- case -EBADMSG:
*_error_code = RXGK_SEALEDINCON;
break;
+ case -EMSGSIZE:
+ *_error_code = RXGK_PACKETSHORT;
+ break;
+ case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */
default:
+ *_error_code = RXGK_INCONSISTENCY;
break;
}
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 4e077068e6d9..e4c42731ce39 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -141,6 +141,7 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx);
int wait_on_pending_writer(struct sock *sk, long *timeo);
void tls_err_abort(struct sock *sk, int err);
+void tls_strp_abort_strp(struct tls_strparser *strp, int err);
int init_prot_info(struct tls_prot_info *prot,
const struct tls_crypto_info *crypto_info,
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index d71643b494a1..98e12f0ff57e 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -13,7 +13,7 @@
static struct workqueue_struct *tls_strp_wq;
-static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
+void tls_strp_abort_strp(struct tls_strparser *strp, int err)
{
if (strp->stopped)
return;
@@ -211,11 +211,17 @@ static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
struct sk_buff *in_skb, unsigned int offset,
size_t in_len)
{
+ unsigned int nfrag = skb->len / PAGE_SIZE;
size_t len, chunk;
skb_frag_t *frag;
int sz;
- frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
+ if (unlikely(nfrag >= skb_shinfo(skb)->nr_frags)) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return -EMSGSIZE;
+ }
+
+ frag = &skb_shinfo(skb)->frags[nfrag];
len = in_len;
/* First make sure we got the header */
@@ -520,10 +526,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
tls_strp_load_anchor_with_queue(strp, inq);
if (!strp->stm.full_len) {
sz = tls_rx_msg_size(strp, strp->anchor);
- if (sz < 0) {
- tls_strp_abort_strp(strp, sz);
+ if (sz < 0)
return sz;
- }
strp->stm.full_len = sz;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index bac65d0d4e3e..daac9fd4be7e 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2474,8 +2474,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb)
return data_len + TLS_HEADER_SIZE;
read_failure:
- tls_err_abort(strp->sk, ret);
-
+ tls_strp_abort_strp(strp, ret);
return ret;
}
diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c
index 7ebd352138e4..beaf36657dea 100644
--- a/samples/damon/mtier.c
+++ b/samples/damon/mtier.c
@@ -208,6 +208,9 @@ static int damon_sample_mtier_enable_store(
if (enabled == is_enabled)
return 0;
+ if (!init_called)
+ return 0;
+
if (enabled) {
err = damon_sample_mtier_start();
if (err)
diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c
index 1b839c06a612..0226652f94d5 100644
--- a/samples/damon/prcl.c
+++ b/samples/damon/prcl.c
@@ -137,6 +137,9 @@ static int damon_sample_prcl_enable_store(
if (enabled == is_enabled)
return 0;
+ if (!init_called)
+ return 0;
+
if (enabled) {
err = damon_sample_prcl_start();
if (err)
diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c
index da052023b099..21eaf15f987d 100644
--- a/samples/damon/wsse.c
+++ b/samples/damon/wsse.c
@@ -118,6 +118,9 @@ static int damon_sample_wsse_enable_store(
return 0;
if (enabled) {
+ if (!init_called)
+ return 0;
+
err = damon_sample_wsse_start();
if (err)
enabled = false;
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 9ef569492560..ddaeb4eb3e24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -75,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
size_t ci, cj, ei;
int cmp;
+ if (!excludes->cnt)
+ return;
+
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd49703021fd..078634461df2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -2009,6 +2009,7 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
.cgroups = RB_ROOT,
};
+ struct perf_env host_env;
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
if (!lockhash_table)
@@ -2024,7 +2025,10 @@ static int __cmd_contention(int argc, const char **argv)
eops.mmap = perf_event__process_mmap;
eops.tracing_data = perf_event__process_tracing_data;
- session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ perf_env__init(&host_env);
+ session = __perf_session__new(use_bpf ? NULL : &data, &eops,
+ /*trace_event_repipe=*/false, &host_env);
+
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
err = PTR_ERR(session);
@@ -2142,6 +2146,7 @@ out_delete:
evlist__delete(con.evlist);
lock_contention_finish(&con);
perf_session__delete(session);
+ perf_env__exit(&host_env);
zfree(&lockhash_table);
return err;
}
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 85b2a93a59ac..779f6230130a 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -477,6 +477,7 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
/* Insert the value at the end. */
maps_by_address[nr_maps] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name)
maps_by_name[nr_maps] = map__get(new);
@@ -502,8 +503,6 @@ static int __maps__insert(struct maps *maps, struct map *new)
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- map__set_kmap_maps(new, maps);
-
return 0;
}
@@ -891,6 +890,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+ map__set_kmap_maps(before, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -918,6 +918,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -942,14 +943,13 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
maps_by_name[ni] = map__get(new);
}
- map__set_kmap_maps(new, maps);
-
check_invariants(maps);
return err;
}
@@ -1019,6 +1019,7 @@ int maps__copy_from(struct maps *dest, struct maps *parent)
err = unwind__prepare_access(dest, new, NULL);
if (!err) {
dest_maps_by_address[i] = new;
+ map__set_kmap_maps(new, dest);
if (dest_maps_by_name)
dest_maps_by_name[i] = map__get(new);
RC_CHK_ACCESS(dest)->nr_maps = i + 1;
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 7bc148889ca7..187b478d0ddf 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -7,6 +7,8 @@ ALL_TESTS="
prio
arp_validate
num_grat_arp
+ fail_over_mac
+ vlan_over_bond
"
lib_dir=$(dirname "$0")
@@ -352,8 +354,8 @@ garp_test()
exp_num=$(echo "${param}" | cut -f6 -d ' ')
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
- slowwait_for_counter $((exp_num + 5)) $exp_num \
- tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
+ slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+ "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
# check result
real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -376,6 +378,197 @@ num_grat_arp()
done
}
+check_all_mac_same()
+{
+ RET=0
+ # all slaves should have same mac address (with the first port's mac)
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+ local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+ [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_first()
+{
+ RET=0
+ # bond mac address should be same with the first added slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_active()
+{
+ RET=0
+ # bond mac address should be same with active slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "$active_slave_mac" ]; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_not_change()
+{
+ RET=0
+ # backup slave's mac address is not changed
+ if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+ | select(.linkinfo.info_slave_data.state=="BACKUP")
+ | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_inherit()
+{
+ local backup_mac
+ RET=0
+
+ # backup slaves should use mac[1] or mac[2]
+ local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+ jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+ for backup_mac in $backup_macs; do
+ if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+ RET=1
+ fi
+ done
+}
+
+check_first_slave_random_mac()
+{
+ RET=0
+ # remove the first added slave and added it back
+ ip -n "$s_ns" link set eth0 nomaster
+ ip -n "$s_ns" link set eth0 master bond0
+
+ # the first slave should use random mac address
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" = "${mac[0]}" ] && RET=1
+ log_test "bond fail_over_mac follow" "random first slave mac"
+
+ # remove the first slave, the permanent MAC address should be restored back
+ ip -n "$s_ns" link set eth0 nomaster
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+ slowwait 2 active_slave_changed $active_slave
+ ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+ # Bring down the first interface on the switch to force the bond to
+ # select another active interface instead of the first one that joined.
+ ip -n "$g_ns" link set s0 down
+
+ # fail_over_mac none
+ bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+ check_all_mac_same
+ log_test "fail_over_mac 0" "all slaves have same mac"
+ do_active_backup_failover
+ check_all_mac_same
+ log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+ # fail_over_mac active
+ bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "backup slave mac is not changed"
+ do_active_backup_failover
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+ # fail_over_mac follow
+ bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "backup slave mac inherit"
+ do_active_backup_failover
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+ check_first_slave_random_mac
+ log_test "fail_over_mac 2" "first slave mac random"
+}
+
+vlan_over_bond_arp()
+{
+ local mode="$1"
+ RET=0
+
+ bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond arp" "$mode"
+}
+
+vlan_over_bond_ns()
+{
+ local mode="$1"
+ RET=0
+
+ if skip_ns; then
+ log_test_skip "vlan_over_bond ns" "$mode"
+ return 0
+ fi
+
+ bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond ns" "$mode"
+}
+
+vlan_over_bond()
+{
+ # add vlan 3 for client
+ ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3
+ ip -n "${c_ns}" link set eth0.3 up
+ ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3
+ ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3
+
+ # Add tc rule to check the vlan pkts
+ tc -n "${c_ns}" qdisc add dev eth0.3 clsact
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \
+ handle 101 flower skip_hw arp_op request \
+ arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \
+ handle 102 flower skip_hw ip_proto icmpv6 \
+ type 135 src_ip 2001:db8::3:1 action pass
+
+ vlan_over_bond_arp "active-backup"
+ vlan_over_bond_ns "active-backup"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index 195ef83cfbf1..167aa4a4a12a 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -39,6 +39,8 @@ g_ip4="192.0.2.254"
s_ip6="2001:db8::1"
c_ip6="2001:db8::10"
g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
gateway_create()
{
@@ -62,6 +64,7 @@ server_create()
for i in $(seq 0 1); do
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 3a1333d9a85b..23a2932301cc 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -26,6 +26,7 @@
# +-------------------------------------+
source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
setup_prepare()
{
@@ -36,6 +37,7 @@ setup_prepare()
# Add the extra device as we use 3 down links for bond0
local i=2
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
ip -n ${s_ns} link set eth${i} master bond0
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index 4d16a69ffc65..832fa1caeb66 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -10,3 +10,4 @@ CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_SCH_INGRESS=y
CONFIG_NLMON=y
CONFIG_VETH=y
+CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 4f07ac9fa207..b148cadb96d0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1093,6 +1093,7 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int err = 0;
int fd = 0;
again:
@@ -1125,7 +1126,7 @@ again:
SOCK_TEST_TCPULP(remotesock, 0);
memset(&winfo, 0, sizeof(winfo));
- copyfd_io(fd, remotesock, 1, true, &winfo);
+ err = copyfd_io(fd, remotesock, 1, true, &winfo);
} else {
perror("accept");
return 1;
@@ -1134,10 +1135,10 @@ again:
if (cfg_input)
close(fd);
- if (--cfg_repeat > 0)
+ if (!err && --cfg_repeat > 0)
goto again;
- return 0;
+ return err;
}
static void init_rng(void)
@@ -1247,7 +1248,7 @@ void xdisconnect(int fd)
else
xerror("bad family");
- strcpy(cmd, "ss -M | grep -q ");
+ strcpy(cmd, "ss -Mnt | grep -q ");
cmdlen = strlen(cmd);
if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
sizeof(cmd) - cmdlen))
@@ -1257,7 +1258,7 @@ void xdisconnect(int fd)
/*
* wait until the pending data is completely flushed and all
- * the MPTCP sockets reached the closed status.
+ * the sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index c2ab9f7f0d21..47ecb5b3836e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -211,6 +211,11 @@ if $checksum; then
done
fi
+if $capture; then
+ rndh="${ns1:4}"
+ mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-"
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -361,7 +366,6 @@ do_transfer()
if $capture; then
local capuser
- local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 09cd24b2ae46..d62e653d48b0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -384,7 +384,7 @@ mptcp_lib_make_file() {
mptcp_lib_print_file_err() {
ls -l "${1}" 1>&2
echo "Trailing bytes are: "
- tail -c 27 "${1}"
+ tail -c 32 "${1}" | od -x | head -n2
}
# $1: input file ; $2: output file ; $3: what kind of file
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index e934dd26a59d..112c07c4c37a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -667,22 +667,26 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
- xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+ xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
+ s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
- xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
- xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
- xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
- s.last_sample.mptcpi_bytes_acked, ret2,
- s.last_sample.mptcpi_bytes_acked - ret2);
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
+ s.last_sample.mptcpi_bytes_acked, ret,
+ s.last_sample.mptcpi_bytes_acked - ret);
}
close(fd);
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 994a556f46c1..93fea3442216 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -188,6 +188,13 @@ static int capture_events(int fd, int event_group)
fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
+ __u16 flags = *(__u16 *)RTA_DATA(attrs);
+
+ /* only print when present, easier */
+ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
+ fprintf(stderr, ",deny_join_id0:1");
+ }
attrs = RTA_NEXT(attrs, msg_len);
}
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 970c329735ff..3d45991f24ed 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -201,6 +201,9 @@ make_connection()
is_v6="v4"
fi
+ # set this on the client side only: will not affect the rest
+ ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
+
:>"$client_evts"
:>"$server_evts"
@@ -223,23 +226,28 @@ make_connection()
local client_token
local client_port
local client_serverside
+ local client_nojoin
local server_token
local server_serverside
+ local server_nojoin
client_token=$(mptcp_lib_evts_get_info token "$client_evts")
client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
server_token=$(mptcp_lib_evts_get_info token "$server_evts")
server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+ server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
- if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
- [ "$server_serverside" = 1 ]
+ if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
+ [ "${client_serverside}" = 0 ] && [ "${server_serverside}" = 1 ] &&
+ [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
then
test_pass
print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
- test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
mptcp_lib_result_print_all_tap
exit ${KSFT_FAIL}
fi
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 3c8d3455d8e7..b327d3061ed5 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -25,6 +25,7 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
+ tunnel_metadata ovs: test extraction of tunnel metadata
drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
@@ -113,13 +114,13 @@ ovs_add_dp () {
}
ovs_add_if () {
- info "Adding IF to DP: br:$2 if:$3"
- if [ "$4" != "-u" ]; then
- ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
- || return 1
+ info "Adding IF to DP: br:$3 if:$4 ($2)"
+ if [ "$5" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \
+ -t "$2" "$3" "$4" || return 1
else
python3 $ovs_base/ovs-dpctl.py add-if \
- -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err &
pid=$!
on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
fi
@@ -166,9 +167,9 @@ ovs_add_netns_and_veths () {
fi
if [ "$7" != "-u" ]; then
- ovs_add_if "$1" "$2" "$4" || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" || return 1
else
- ovs_add_if "$1" "$2" "$4" -u || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1
fi
if [ $TRACING -eq 1 ]; then
@@ -756,6 +757,79 @@ test_upcall_interfaces() {
return 0
}
+ovs_add_kernel_tunnel() {
+ local sbxname=$1; shift
+ local ns=$1; shift
+ local tnl_type=$1; shift
+ local name=$1; shift
+ local addr=$1; shift
+
+ info "setting up kernel ${tnl_type} tunnel ${name}"
+ ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1
+ on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1"
+ ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1
+ ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1
+}
+
+test_tunnel_metadata() {
+ which arping >/dev/null 2>&1 || return $ksft_skip
+
+ sbxname="test_tunnel_metadata"
+ sbx_add "${sbxname}" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \
+ 172.31.110.1/24 || return 1
+
+ info "removing veth interface from openvswitch and setting IP"
+ ovs_del_if "${sbxname}" tdp0 left0 || return 1
+ ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1
+ ovs_sbx "${sbxname}" ip link set left0 up || return 1
+
+ info "setting up tunnel port in openvswitch"
+ ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1
+ on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0"
+ ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1
+ ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1
+
+ configs=$(echo '
+ 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\)
+ 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\)
+ 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\)
+ 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d')
+
+ while read -r i addr id ttl df csum flags; do
+ ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \
+ remote 172.31.110.2 id ${id} dstport 4789 \
+ ttl ${ttl} df ${df} ${csum} || return 1
+ done <<< "${configs}"
+
+ ovs_wait grep -q 'listening on upcall packet handler' \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+
+ info "sending arping"
+ for i in 1 2 3 4; do
+ ovs_sbx "${sbxname}" ip netns exec tns \
+ arping -I vxlan${i} 172.31.22${i}.2 -c 1 \
+ >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr
+ done
+
+ info "checking that received decapsulated packets carry correct metadata"
+ while read -r i addr id ttl df csum flags; do
+ arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha="
+ addrs="src=172.31.110.1,dst=172.31.110.2"
+ ports="tp_src=[0-9]*,tp_dst=4789"
+ tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)"
+
+ ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+ done <<< "${configs}"
+
+ return 0
+}
+
run_test() {
(
tname="$1"
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt
new file mode 100644
index 000000000000..26794e7ddfd5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x602 /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK>
+ +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK>
+
+// sk->sk_state is TCP_SYN_RECV
+ +.1 accept(3, ..., ...) = 4
+
+// tcp_disconnect() sets sk->sk_state to TCP_CLOSE
+ +0 connect(4, AF_UNSPEC, ...) = 0
+ +0 > R. 1:1(0) ack 11 win 65535
+
+// connect() sets sk->sk_state to TCP_SYN_SENT
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+ +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// tp->fastopen_rsk must be NULL
+ +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0f5640d8dc7f..dd093f9df6f1 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -2770,6 +2770,22 @@ TEST_F(tls_err, poll_partial_rec_async)
}
}
+/* Use OOB+large send to trigger copy mode due to memory pressure.
+ * OOB causes a short read.
+ */
+TEST_F(tls_err, oob_pressure)
+{
+ char buf[1<<16];
+ int i;
+
+ memrnd(buf, sizeof(buf));
+
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ for (i = 0; i < 64; i++)
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;