summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/arm/boot/dts/da850-lcdk.dts4
-rw-r--r--arch/arm/boot/dts/kirkwood-openblocks_a7.dts10
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi2
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi2
-rw-r--r--arch/arm/configs/sunxi_defconfig2
-rw-r--r--arch/arm/net/bpf_jit_32.c225
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi1
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi13
-rw-r--r--arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi9
-rw-r--r--arch/arm64/net/bpf_jit_comp.c22
-rw-r--r--arch/ia64/include/asm/atomic.h37
-rw-r--r--arch/mips/net/bpf_jit.c2
-rw-r--r--arch/mips/net/ebpf_jit.c2
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/hvcall.h1
-rw-r--r--arch/powerpc/kernel/setup-common.c11
-rw-r--r--arch/powerpc/kernel/setup_64.c38
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c2
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c2
-rw-r--r--arch/powerpc/xmon/xmon.c26
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_32.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c2
-rw-r--r--arch/x86/entry/entry_32.S11
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/events/intel/rapl.c4
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/include/asm/nospec-branch.h6
-rw-r--r--arch/x86/kernel/apic/apic.c49
-rw-r--r--arch/x86/kernel/apic/vector.c7
-rw-r--r--arch/x86/kernel/cpu/bugs.c36
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c8
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/head64.c4
-rw-r--r--arch/x86/kernel/idt.c12
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/mm/fault.c7
-rw-r--r--arch/x86/mm/kasan_init_64.c24
-rw-r--r--arch/x86/mm/mem_encrypt.c356
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S80
-rw-r--r--arch/x86/net/bpf_jit_comp.c37
-rw-r--r--drivers/ata/libata-core.c1
-rw-r--r--drivers/bcma/Kconfig2
-rw-r--r--drivers/gpio/gpio-mmio.c30
-rw-r--r--drivers/gpu/drm/i915/intel_display.c303
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h2
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c83
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/base.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c3
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c41
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c45
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c16
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c9
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c4
-rw-r--r--drivers/i2c/i2c-core-base.c6
-rw-r--r--drivers/i2c/i2c-core-smbus.c13
-rw-r--r--drivers/input/misc/twl4030-vibra.c6
-rw-r--r--drivers/input/misc/twl6040-vibra.c3
-rw-r--r--drivers/input/mouse/alps.c23
-rw-r--r--drivers/input/mouse/alps.h10
-rw-r--r--drivers/input/mouse/synaptics.c1
-rw-r--r--drivers/input/rmi4/rmi_driver.c4
-rw-r--r--drivers/input/touchscreen/88pm860x-ts.c16
-rw-r--r--drivers/input/touchscreen/of_touchscreen.c4
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c14
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_fd.c21
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c6
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1_atu.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1_vtu.c2
-rw-r--r--drivers/net/ethernet/cortina/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c16
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet.h1
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c24
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c20
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c9
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c29
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c47
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_devlink.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c17
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h3
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c62
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c135
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h99
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c19
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_main.c76
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.c73
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.h15
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c42
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c38
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c4
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c2
-rw-r--r--drivers/net/netdevsim/bpf.c246
-rw-r--r--drivers/net/netdevsim/netdevsim.h3
-rw-r--r--drivers/net/tun.c18
-rw-r--r--drivers/net/usb/r8152.c13
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c9
-rw-r--r--drivers/nvme/host/pci.c28
-rw-r--r--drivers/phy/phy-core.c4
-rw-r--r--drivers/ssb/Kconfig2
-rw-r--r--fs/proc/array.c7
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/compiler-gcc.h2
-rw-r--r--include/linux/delayacct.h8
-rw-r--r--include/linux/vermagic.h8
-rw-r--r--include/net/netfilter/nf_tables.h48
-rw-r--r--include/net/netns/nftables.h8
-rw-r--r--include/net/pkt_cls.h10
-rw-r--r--include/net/sch_generic.h7
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/uapi/linux/bpf.h5
-rw-r--r--include/uapi/linux/bpf_common.h7
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h10
-rw-r--r--include/uapi/linux/netfilter_ipv4.h1
-rw-r--r--include/uapi/linux/netfilter_ipv6.h1
-rw-r--r--include/uapi/linux/netfilter_ipv6/ip6t_srh.h57
-rw-r--r--kernel/bpf/arraymap.c49
-rw-r--r--kernel/bpf/core.c23
-rw-r--r--kernel/bpf/lpm_trie.c95
-rw-r--r--kernel/bpf/offload.c81
-rw-r--r--kernel/bpf/syscall.c39
-rw-r--r--kernel/bpf/verifier.c146
-rw-r--r--kernel/cgroup/cgroup.c1
-rw-r--r--kernel/delayacct.c42
-rw-r--r--kernel/futex.c86
-rw-r--r--kernel/locking/rtmutex.c26
-rw-r--r--kernel/locking/rtmutex_common.h1
-rw-r--r--kernel/sched/core.c6
-rw-r--r--kernel/time/timer.c2
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/ring_buffer.c3
-rw-r--r--kernel/trace/trace_events.c16
-rw-r--r--kernel/workqueue.c13
-rw-r--r--lib/test_bpf.c104
-rw-r--r--mm/memory.c10
-rw-r--r--mm/page_owner.c1
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c54
-rw-r--r--net/can/af_can.c36
-rw-r--r--net/core/filter.c11
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/sysctl_net_core.c60
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c31
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.asn1177
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1286
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic_main.c235
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c49
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c49
-rw-r--r--net/ipv4/tcp_bbr.c3
-rw-r--r--net/ipv4/tcp_input.c24
-rw-r--r--net/ipv6/ip6_fib.c10
-rw-r--r--net/ipv6/ip6_gre.c14
-rw-r--r--net/ipv6/netfilter/Kconfig12
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c161
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c31
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c15
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c1
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c49
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/nf_conncount.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_queue.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c932
-rw-r--r--net/netfilter/nf_tables_inet.c49
-rw-r--r--net/netfilter/nf_tables_netdev.c70
-rw-r--r--net/netfilter/nfnetlink.c4
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_log.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c1
-rw-r--r--net/netfilter/nft_compat.c18
-rw-r--r--net/netfilter/nft_ct.c16
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_flow_offload.c4
-rw-r--r--net/netfilter/nft_log.c4
-rw-r--r--net/netfilter/nft_masq.c2
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_nat.c2
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/netfilter/xt_hashlimit.c5
-rw-r--r--net/netfilter/xt_ipcomp.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/sched/cls_api.c64
-rw-r--r--net/sched/cls_basic.c14
-rw-r--r--net/sched/cls_bpf.c22
-rw-r--r--net/sched/cls_cgroup.c9
-rw-r--r--net/sched/cls_flow.c8
-rw-r--r--net/sched/cls_flower.c20
-rw-r--r--net/sched/cls_fw.c17
-rw-r--r--net/sched/cls_matchall.c15
-rw-r--r--net/sched/cls_route.c12
-rw-r--r--net/sched/cls_rsvp.h7
-rw-r--r--net/sched/cls_tcindex.c14
-rw-r--r--net/sched/cls_u32.c76
-rw-r--r--net/socket.c9
-rw-r--r--net/tls/tls_main.c17
-rw-r--r--net/tls/tls_sw.c12
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/wext-compat.c3
-rw-r--r--samples/bpf/xdp2skb_meta_kern.c8
-rw-r--r--samples/bpf/xdp_monitor_kern.c94
-rw-r--r--samples/bpf/xdp_monitor_user.c416
-rw-r--r--scripts/Makefile.build14
-rwxr-xr-xscripts/decodecode8
-rw-r--r--scripts/gdb/linux/tasks.py2
-rw-r--r--tools/bpf/bpf_jit_disasm.c7
-rw-r--r--tools/bpf/bpftool/common.c72
-rw-r--r--tools/bpf/bpftool/jit_disasm.c16
-rw-r--r--tools/bpf/bpftool/main.h5
-rw-r--r--tools/bpf/bpftool/map.c8
-rw-r--r--tools/bpf/bpftool/prog.c12
-rw-r--r--tools/include/uapi/linux/bpf.h15
-rw-r--r--tools/objtool/elf.c4
-rw-r--r--tools/testing/selftests/bpf/.gitignore7
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/sample_map_ret0.c34
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c122
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py206
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c289
256 files changed, 5509 insertions, 3535 deletions
diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts
index eed89e659143..a1f4d6d5a569 100644
--- a/arch/arm/boot/dts/da850-lcdk.dts
+++ b/arch/arm/boot/dts/da850-lcdk.dts
@@ -293,12 +293,12 @@
label = "u-boot env";
reg = <0 0x020000>;
};
- partition@0x020000 {
+ partition@20000 {
/* The LCDK defaults to booting from this partition */
label = "u-boot";
reg = <0x020000 0x080000>;
};
- partition@0x0a0000 {
+ partition@a0000 {
label = "free space";
reg = <0x0a0000 0>;
};
diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
index cf2f5240e176..27cc913ca0f5 100644
--- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
@@ -53,7 +53,8 @@
};
pinctrl: pin-controller@10000 {
- pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+ pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+ &pmx_gpio_header_gpo>;
pinctrl-names = "default";
pmx_uart0: pmx-uart0 {
@@ -85,11 +86,16 @@
* ground.
*/
pmx_gpio_header: pmx-gpio-header {
- marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+ marvell,pins = "mpp17", "mpp29", "mpp28",
"mpp35", "mpp34", "mpp40";
marvell,function = "gpio";
};
+ pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+ marvell,pins = "mpp7";
+ marvell,function = "gpo";
+ };
+
pmx_gpio_init: pmx-init {
marvell,pins = "mpp38";
marvell,function = "gpio";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 5840f5c75c3b..4f2f2eea0755 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -1104,7 +1104,7 @@
be1_out_tcon0: endpoint@0 {
reg = <0>;
- remote-endpoint = <&tcon1_in_be0>;
+ remote-endpoint = <&tcon0_in_be1>;
};
be1_out_tcon1: endpoint@1 {
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 59655e42e4b0..bd0cd3204273 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -1354,7 +1354,7 @@
be1_out_tcon0: endpoint@0 {
reg = <0>;
- remote-endpoint = <&tcon1_in_be0>;
+ remote-endpoint = <&tcon0_in_be1>;
};
be1_out_tcon1: endpoint@1 {
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 5caaf971fb50..df433abfcb02 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -10,6 +10,7 @@ CONFIG_SMP=y
CONFIG_NR_CPUS=8
CONFIG_AEABI=y
CONFIG_HIGHMEM=y
+CONFIG_CMA=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_CPU_FREQ=y
@@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y
# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
CONFIG_BLK_DEV_SD=y
CONFIG_ATA=y
CONFIG_AHCI_SUNXI=y
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 4425189bb24c..41e2feb0cf4f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -25,16 +25,58 @@
#include "bpf_jit_32.h"
-int bpf_jit_enable __read_mostly;
+/*
+ * eBPF prog stack layout:
+ *
+ * high
+ * original ARM_SP => +-----+
+ * | | callee saved registers
+ * +-----+ <= (BPF_FP + SCRATCH_SIZE)
+ * | ... | eBPF JIT scratch space
+ * eBPF fp register => +-----+
+ * (BPF_FP) | ... | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ *
+ * The callee saved registers depends on whether frame pointers are enabled.
+ * With frame pointers (to be compliant with the ABI):
+ *
+ * high
+ * original ARM_SP => +------------------+ \
+ * | pc | |
+ * current ARM_FP => +------------------+ } callee saved registers
+ * |r4-r8,r10,fp,ip,lr| |
+ * +------------------+ /
+ * low
+ *
+ * Without frame pointers:
+ *
+ * high
+ * original ARM_SP => +------------------+
+ * | r4-r8,r10,fp,lr | callee saved registers
+ * current ARM_FP => +------------------+
+ * low
+ *
+ * When popping registers off the stack at the end of a BPF function, we
+ * reference them via the current ARM_FP register.
+ */
+#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
+ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
+ 1 << ARM_FP)
+#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
+#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
#define STACK_OFFSET(k) (k)
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
-/* Flags used for JIT optimization */
-#define SEEN_CALL (1 << 0)
-
#define FLAG_IMM_OVERFLOW (1 << 0)
/*
@@ -95,7 +137,6 @@ static const u8 bpf2a32[][2] = {
* idx : index of current last JITed instruction.
* prologue_bytes : bytes used in prologue.
* epilogue_offset : offset of epilogue starting.
- * seen : bit mask used for JIT optimization.
* offsets : array of eBPF instruction offsets in
* JITed code.
* target : final JITed code.
@@ -110,7 +151,6 @@ struct jit_ctx {
unsigned int idx;
unsigned int prologue_bytes;
unsigned int epilogue_offset;
- u32 seen;
u32 flags;
u32 *offsets;
u32 *target;
@@ -179,8 +219,13 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
}
-/* Stack must be multiples of 16 Bytes */
-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+/* EABI requires the stack to be aligned to 64-bit boundaries */
+#define STACK_ALIGNMENT 8
+#else
+/* Stack must be aligned to 32-bit boundaries */
+#define STACK_ALIGNMENT 4
+#endif
/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
* BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
@@ -194,7 +239,7 @@ static void jit_fill_hole(void *area, unsigned int size)
+ SCRATCH_SIZE + \
+ 4 /* extra for skb_copy_bits buffer */)
-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
/* Get the offset of eBPF REGISTERs stored on scratch space. */
#define STACK_VAR(off) (STACK_SIZE-off-4)
@@ -285,16 +330,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
emit_mov_i_no8m(rd, val, ctx);
}
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
{
- ctx->seen |= SEEN_CALL;
-#if __LINUX_ARM_ARCH__ < 5
- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
-
if (elf_hwcap & HWCAP_THUMB)
emit(ARM_BX(tgt_reg), ctx);
else
emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+}
+
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 5
+ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+ emit_bx_r(tgt_reg, ctx);
#else
emit(ARM_BLX_R(tgt_reg), ctx);
#endif
@@ -354,7 +402,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
}
/* Call appropriate function */
- ctx->seen |= SEEN_CALL;
emit_mov_i(ARM_IP, op == BPF_DIV ?
(u32)jit_udiv32 : (u32)jit_mod32, ctx);
emit_blx_r(ARM_IP, ctx);
@@ -620,8 +667,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do LSH operation */
emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
@@ -656,8 +701,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do the ARSH operation */
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
_emit(ARM_COND_MI, ARM_B(0), ctx);
@@ -692,8 +735,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do LSH operation */
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
@@ -828,8 +869,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do Multiplication */
emit(ARM_MUL(ARM_IP, rd, rn), ctx);
emit(ARM_MUL(ARM_LR, rm, rt), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
@@ -872,33 +911,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
}
/* dst = *(size*)(src + off) */
-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
- const s32 off, struct jit_ctx *ctx, const u8 sz){
+static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
+ s32 off, struct jit_ctx *ctx, const u8 sz){
const u8 *tmp = bpf2a32[TMP_REG_1];
- u8 rd = dstk ? tmp[1] : dst;
+ const u8 *rd = dstk ? tmp : dst;
u8 rm = src;
+ s32 off_max;
- if (off) {
+ if (sz == BPF_H)
+ off_max = 0xff;
+ else
+ off_max = 0xfff;
+
+ if (off < 0 || off > off_max) {
emit_a32_mov_i(tmp[0], off, false, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
rm = tmp[0];
+ off = 0;
+ } else if (rd[1] == rm) {
+ emit(ARM_MOV_R(tmp[0], rm), ctx);
+ rm = tmp[0];
}
switch (sz) {
- case BPF_W:
- /* Load a Word */
- emit(ARM_LDR_I(rd, rm, 0), ctx);
+ case BPF_B:
+ /* Load a Byte */
+ emit(ARM_LDRB_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
break;
case BPF_H:
/* Load a HalfWord */
- emit(ARM_LDRH_I(rd, rm, 0), ctx);
+ emit(ARM_LDRH_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
break;
- case BPF_B:
- /* Load a Byte */
- emit(ARM_LDRB_I(rd, rm, 0), ctx);
+ case BPF_W:
+ /* Load a Word */
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
+ break;
+ case BPF_DW:
+ /* Load a Double Word */
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
break;
}
if (dstk)
- emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+ emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
+ if (dstk && sz == BPF_DW)
+ emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
}
/* Arithmatic Operation */
@@ -906,7 +965,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op) {
switch (op) {
case BPF_JSET:
- ctx->seen |= SEEN_CALL;
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
@@ -945,7 +1003,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const u8 *tcc = bpf2a32[TCALL_CNT];
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (out_offset - (cur_offset) - 2)
u32 off, lo, hi;
/* if (index >= array->map.max_entries)
@@ -956,7 +1014,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_a32_mov_i(tmp[1], off, false, ctx);
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
- /* index (64 bit) */
+ /* index is 32-bit for arrays */
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
/* index >= array->map.max_entries */
emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
@@ -997,7 +1055,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_a32_mov_i(tmp2[1], off, false, ctx);
emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
- emit(ARM_BX(tmp[1]), ctx);
+ emit_bx_r(tmp[1], ctx);
/* out: */
if (out_offset == -1)
@@ -1070,54 +1128,22 @@ static void build_prologue(struct jit_ctx *ctx)
const u8 r2 = bpf2a32[BPF_REG_1][1];
const u8 r3 = bpf2a32[BPF_REG_1][0];
const u8 r4 = bpf2a32[BPF_REG_6][1];
- const u8 r5 = bpf2a32[BPF_REG_6][0];
- const u8 r6 = bpf2a32[TMP_REG_1][1];
- const u8 r7 = bpf2a32[TMP_REG_1][0];
- const u8 r8 = bpf2a32[TMP_REG_2][1];
- const u8 r10 = bpf2a32[TMP_REG_2][0];
const u8 fplo = bpf2a32[BPF_REG_FP][1];
const u8 fphi = bpf2a32[BPF_REG_FP][0];
- const u8 sp = ARM_SP;
const u8 *tcc = bpf2a32[TCALL_CNT];
- u16 reg_set = 0;
-
- /*
- * eBPF prog stack layout
- *
- * high
- * original ARM_SP => +-----+ eBPF prologue
- * |FP/LR|
- * current ARM_FP => +-----+
- * | ... | callee saved registers
- * eBPF fp register => +-----+ <= (BPF_FP)
- * | ... | eBPF JIT scratch space
- * | | eBPF prog stack
- * +-----+
- * |RSVD | JIT scratchpad
- * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
- * | |
- * | ... | Function call stack
- * | |
- * +-----+
- * low
- */
-
/* Save callee saved registers. */
- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
#ifdef CONFIG_FRAME_POINTER
- reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
- emit(ARM_MOV_R(ARM_IP, sp), ctx);
+ u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
+ emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
emit(ARM_PUSH(reg_set), ctx);
emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
#else
- /* Check if call instruction exists in BPF body */
- if (ctx->seen & SEEN_CALL)
- reg_set |= (1<<ARM_LR);
- emit(ARM_PUSH(reg_set), ctx);
+ emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
+ emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
#endif
/* Save frame pointer for later */
- emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+ emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
ctx->stack_size = imm8m(STACK_SIZE);
@@ -1140,33 +1166,19 @@ static void build_prologue(struct jit_ctx *ctx)
/* end of prologue */
}
+/* restore callee saved registers. */
static void build_epilogue(struct jit_ctx *ctx)
{
- const u8 r4 = bpf2a32[BPF_REG_6][1];
- const u8 r5 = bpf2a32[BPF_REG_6][0];
- const u8 r6 = bpf2a32[TMP_REG_1][1];
- const u8 r7 = bpf2a32[TMP_REG_1][0];
- const u8 r8 = bpf2a32[TMP_REG_2][1];
- const u8 r10 = bpf2a32[TMP_REG_2][0];
- u16 reg_set = 0;
-
- /* unwind function call stack */
- emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
-
- /* restore callee saved registers. */
- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
#ifdef CONFIG_FRAME_POINTER
- /* the first instruction of the prologue was: mov ip, sp */
- reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+ /* When using frame pointers, some additional registers need to
+ * be loaded. */
+ u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
+ emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
emit(ARM_LDM(ARM_SP, reg_set), ctx);
#else
- if (ctx->seen & SEEN_CALL)
- reg_set |= (1<<ARM_PC);
/* Restore callee saved registers. */
- emit(ARM_POP(reg_set), ctx);
- /* Return back to the callee function */
- if (!(ctx->seen & SEEN_CALL))
- emit(ARM_BX(ARM_LR), ctx);
+ emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
+ emit(ARM_POP(CALLEE_POP_MASK), ctx);
#endif
}
@@ -1394,8 +1406,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_rev32(rt, rt, ctx);
goto emit_bswap_uxt;
case 64:
- /* Because of the usage of ARM_LR */
- ctx->seen |= SEEN_CALL;
emit_rev32(ARM_LR, rt, ctx);
emit_rev32(rt, rd, ctx);
emit(ARM_MOV_R(rd, ARM_LR), ctx);
@@ -1448,22 +1458,7 @@ exit:
rn = sstk ? tmp2[1] : src_lo;
if (sstk)
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- /* Load a Word */
- case BPF_H:
- /* Load a Half-Word */
- case BPF_B:
- /* Load a Byte */
- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
- emit_a32_mov_i(dst_hi, 0, dstk, ctx);
- break;
- case BPF_DW:
- /* Load a double word */
- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
- emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
- break;
- }
+ emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
case BPF_LD | BPF_ABS | BPF_W:
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c9bdc7ab50b..9db19314c60c 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -66,6 +66,7 @@
<&cpu1>,
<&cpu2>,
<&cpu3>;
+ interrupt-parent = <&intc>;
};
psci {
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index e3b64d03fbd8..9c7724e82aff 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -63,8 +63,10 @@
cpm_ethernet: ethernet@0 {
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x100000>, <0x129000 0xb000>;
- clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>;
- clock-names = "pp_clk", "gop_clk", "mg_clk";
+ clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>,
+ <&cpm_clk 1 5>, <&cpm_clk 1 18>;
+ clock-names = "pp_clk", "gop_clk",
+ "mg_clk","axi_clk";
marvell,system-controller = <&cpm_syscon0>;
status = "disabled";
dma-coherent;
@@ -155,7 +157,8 @@
#size-cells = <0>;
compatible = "marvell,orion-mdio";
reg = <0x12a200 0x10>;
- clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>;
+ clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>,
+ <&cpm_clk 1 6>, <&cpm_clk 1 18>;
status = "disabled";
};
@@ -338,8 +341,8 @@
compatible = "marvell,armada-cp110-sdhci";
reg = <0x780000 0x300>;
interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>;
- clock-names = "core";
- clocks = <&cpm_clk 1 4>;
+ clock-names = "core","axi";
+ clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>;
dma-coherent;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 0d51096c69f8..87ac68b2cf37 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -63,8 +63,10 @@
cps_ethernet: ethernet@0 {
compatible = "marvell,armada-7k-pp22";
reg = <0x0 0x100000>, <0x129000 0xb000>;
- clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>;
- clock-names = "pp_clk", "gop_clk", "mg_clk";
+ clocks = <&cps_clk 1 3>, <&cps_clk 1 9>,
+ <&cps_clk 1 5>, <&cps_clk 1 18>;
+ clock-names = "pp_clk", "gop_clk",
+ "mg_clk", "axi_clk";
marvell,system-controller = <&cps_syscon0>;
status = "disabled";
dma-coherent;
@@ -155,7 +157,8 @@
#size-cells = <0>;
compatible = "marvell,orion-mdio";
reg = <0x12a200 0x10>;
- clocks = <&cps_clk 1 9>, <&cps_clk 1 5>;
+ clocks = <&cps_clk 1 9>, <&cps_clk 1 5>,
+ <&cps_clk 1 6>, <&cps_clk 1 18>;
status = "disabled";
};
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index acaa935ed977..0775d5ab8ee9 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -31,8 +31,6 @@
#include "bpf_jit.h"
-int bpf_jit_enable __read_mostly;
-
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
@@ -162,7 +160,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Stack must be multiples of 16B */
#define STACK_ALIGN(sz) (((sz) + 15) & ~15)
-#define PROLOGUE_OFFSET 8
+/* Tail call offset to jump into */
+#define PROLOGUE_OFFSET 7
static int build_prologue(struct jit_ctx *ctx)
{
@@ -214,19 +213,19 @@ static int build_prologue(struct jit_ctx *ctx)
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
- /* 4 byte extra for skb_copy_bits buffer */
- ctx->stack_size = prog->aux->stack_depth + 4;
- ctx->stack_size = STACK_ALIGN(ctx->stack_size);
-
- /* Set up function call stack */
- emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
-
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
}
+
+ /* 4 byte extra for skb_copy_bits buffer */
+ ctx->stack_size = prog->aux->stack_depth + 4;
+ ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+
+ /* Set up function call stack */
+ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
return 0;
}
@@ -274,11 +273,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_LDR64(prg, tmp, prg), ctx);
emit(A64_CBZ(1, prg, jmp_offset), ctx);
- /* goto *(prog->bpf_func + prologue_size); */
+ /* goto *(prog->bpf_func + prologue_offset); */
off = offsetof(struct bpf_prog, bpf_func);
emit_a64_mov_i64(tmp, off, ctx);
emit(A64_LDR64(tmp, prg, tmp), ctx);
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
emit(A64_BR(tmp), ctx);
/* out: */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 28e02c99be6d..762eeb0fcc1d 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v) \
ATOMIC_OPS(add, +)
ATOMIC_OPS(sub, -)
-#define atomic_add_return(i,v) \
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i) __builtin_constant_p(i) ? \
+ ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \
+ (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0
+
+#define atomic_add_return(i, v) \
({ \
- int __ia64_aar_i = (i); \
- (__builtin_constant_p(i) \
- && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
- || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
- || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
- || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
- ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \
- : ia64_atomic_add(__ia64_aar_i, v); \
+ int __i = (i); \
+ static const int __ia64_atomic_p = __ia64_atomic_const(i); \
+ __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) : \
+ ia64_atomic_add(__i, v); \
})
-#define atomic_sub_return(i,v) \
+#define atomic_sub_return(i, v) \
({ \
- int __ia64_asr_i = (i); \
- (__builtin_constant_p(i) \
- && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
- || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
- || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
- || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
- ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \
- : ia64_atomic_sub(__ia64_asr_i, v); \
+ int __i = (i); \
+ static const int __ia64_atomic_p = __ia64_atomic_const(i); \
+ __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) : \
+ ia64_atomic_sub(__i, v); \
})
+#else
+#define atomic_add_return(i, v) ia64_atomic_add(i, v)
+#define atomic_sub_return(i, v) ia64_atomic_sub(i, v)
+#endif
#define atomic_fetch_add(i,v) \
({ \
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 44b925005dd3..4d8cb9bb8365 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1207,8 +1207,6 @@ jmp_cmp:
return 0;
}
-int bpf_jit_enable __read_mostly;
-
void bpf_jit_compile(struct bpf_prog *fp)
{
struct jit_ctx ctx;
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 97069a1b6f43..4e347030ed2c 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -177,8 +177,6 @@ static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
(ctx->idx * 4) - 4;
}
-int bpf_jit_enable __read_mostly;
-
enum which_ebpf_reg {
src_reg,
src_reg_no_fp,
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c51e6ce42e7a..2ed525a44734 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -166,6 +166,7 @@ config PPC
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index f0461618bf7b..eca3f9c68907 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -353,6 +353,7 @@
#define PROC_TABLE_GTSE 0x01
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/**
* plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9d213542a48b..8fd3a70047f1 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
unsigned short maj;
unsigned short min;
- /* We only show online cpus: disable preempt (overzealous, I
- * knew) to prevent cpu going down. */
- preempt_disable();
- if (!cpu_online(cpu_id)) {
- preempt_enable();
- return 0;
- }
-
#ifdef CONFIG_SMP
pvr = per_cpu(cpu_pvr, cpu_id);
#else
@@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_SMP
seq_printf(m, "\n");
#endif
-
- preempt_enable();
-
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 491be4179ddd..e67413f4a8f0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -38,6 +38,7 @@
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
@@ -901,4 +902,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
if (!no_rfi_flush)
rfi_flush_enable(enable);
}
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+ if (val == 1)
+ rfi_flush_enable(true);
+ else if (val == 0)
+ rfi_flush_enable(false);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (rfi_flush)
+ return sprintf(buf, "Mitigation: RFI Flush\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index f9941b3b5770..872d1f6dd11e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -18,8 +18,6 @@
#include "bpf_jit32.h"
-int bpf_jit_enable __read_mostly;
-
static inline void bpf_flush_icache(void *start, void *end)
{
smp_wmb();
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 6771c63b2bec..217a78e84865 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -21,8 +21,6 @@
#include "bpf_jit64.h"
-int bpf_jit_enable __read_mostly;
-
static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
memset32(area, BREAKPOINT_INSTRUCTION, size/4);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index cab24f549e7c..0ddc7ac6c5f1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu)
DUMP(p, kernel_toc, "lx");
DUMP(p, kernelbase, "lx");
DUMP(p, kernel_msr, "lx");
- DUMP(p, emergency_sp, "p");
+ DUMP(p, emergency_sp, "px");
#ifdef CONFIG_PPC_BOOK3S_64
- DUMP(p, nmi_emergency_sp, "p");
- DUMP(p, mc_emergency_sp, "p");
+ DUMP(p, nmi_emergency_sp, "px");
+ DUMP(p, mc_emergency_sp, "px");
DUMP(p, in_nmi, "x");
DUMP(p, in_mce, "x");
DUMP(p, hmi_event_available, "x");
@@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu)
DUMP(p, slb_cache_ptr, "x");
for (i = 0; i < SLB_CACHE_ENTRIES; i++)
printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]);
+
+ DUMP(p, rfi_flush_fallback_area, "px");
+ DUMP(p, l1d_flush_congruence, "llx");
+ DUMP(p, l1d_flush_sets, "llx");
#endif
DUMP(p, dscr_default, "llx");
#ifdef CONFIG_PPC_BOOK3E
- DUMP(p, pgd, "p");
- DUMP(p, kernel_pgd, "p");
- DUMP(p, tcd_ptr, "p");
- DUMP(p, mc_kstack, "p");
- DUMP(p, crit_kstack, "p");
- DUMP(p, dbg_kstack, "p");
+ DUMP(p, pgd, "px");
+ DUMP(p, kernel_pgd, "px");
+ DUMP(p, tcd_ptr, "px");
+ DUMP(p, mc_kstack, "px");
+ DUMP(p, crit_kstack, "px");
+ DUMP(p, dbg_kstack, "px");
#endif
- DUMP(p, __current, "p");
+ DUMP(p, __current, "px");
DUMP(p, kstack, "lx");
printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
DUMP(p, stab_rr, "lx");
@@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu)
#endif
#ifdef CONFIG_PPC_POWERNV
- DUMP(p, core_idle_state_ptr, "p");
+ DUMP(p, core_idle_state_ptr, "px");
DUMP(p, thread_idle_state, "x");
DUMP(p, thread_mask, "x");
DUMP(p, subcore_sibling_mask, "x");
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1dfadbd126f3..e50188773ff3 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -28,8 +28,6 @@
#include <asm/set_memory.h>
#include "bpf_jit.h"
-int bpf_jit_enable __read_mostly;
-
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
u32 seen_reg[16]; /* Array to remember which registers are used */
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index 09e318eb34ee..3bd8ca95e521 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -11,8 +11,6 @@
#include "bpf_jit_32.h"
-int bpf_jit_enable __read_mostly;
-
static inline bool is_simm13(unsigned int value)
{
return value + 0x1000 < 0x2000;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 635fdefd4ae2..50a24d7bd4c5 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -12,8 +12,6 @@
#include "bpf_jit_64.h"
-int bpf_jit_enable __read_mostly;
-
static inline bool is_simm13(unsigned int value)
{
return value + 0x1000 < 0x2000;
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a54f23a..60c4c342316c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl %esi
popl %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4f8e1d35a97c..aa15b4c0e3d1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -491,6 +491,17 @@ ENTRY(__switch_to_asm)
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
#endif
+#ifdef CONFIG_RETPOLINE
+ /*
+ * When switching from a shallower to a deeper call stack
+ * the RSB may either underflow or use entries populated
+ * with userspace addresses. On CPUs where those concerns
+ * exist, overwrite the RSB with entries which capture
+ * speculative execution to prevent attack.
+ */
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq %r15
popq %r14
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 005908ee9333..a2efb490f743 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
extern void apic_intr_mode_init(void);
extern void setup_local_APIC(void);
extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447862f4..25b9375c1484 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -206,11 +206,11 @@
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -245,6 +245,7 @@
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 402a11c803c3..7b45d8424150 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,7 +11,7 @@
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; jmp' loop to capture speculative execution.
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
*
* This is required in various cases for retpoline and IBRS-based
* mitigations for the Spectre variant 2 vulnerability. Sometimes to
@@ -38,11 +38,13 @@
call 772f; \
773: /* speculation trap */ \
pause; \
+ lfence; \
jmp 773b; \
772: \
call 774f; \
775: /* speculation trap */ \
pause; \
+ lfence; \
jmp 775b; \
774: \
dec reg; \
@@ -73,6 +75,7 @@
call .Ldo_rop_\@
.Lspec_trap_\@:
pause
+ lfence
jmp .Lspec_trap_\@
.Ldo_rop_\@:
mov \reg, (%_ASM_SP)
@@ -165,6 +168,7 @@
" .align 16\n" \
"901: call 903f;\n" \
"902: pause;\n" \
+ " lfence;\n" \
" jmp 902b;\n" \
" .align 16\n" \
"903: addl $4, %%esp;\n" \
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
return APIC_SYMMETRIC_IO;
}
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+ unsigned int value;
+
+ /*
+ * Don't do the setup now if we have a SMP BIOS as the
+ * through-I/O-APIC virtual wire mode might be active.
+ */
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+ return;
+
+ /*
+ * Do not trust the local APIC being empty at bootup.
+ */
+ clear_local_APIC();
+
+ /*
+ * Enable APIC.
+ */
+ value = apic_read(APIC_SPIV);
+ value &= ~APIC_VECTOR_MASK;
+ value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+ /* This bit is reserved on P4/Xeon and should be cleared */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ (boot_cpu_data.x86 == 15))
+ value &= ~APIC_SPIV_FOCUS_DISABLED;
+ else
+#endif
+ value |= APIC_SPIV_FOCUS_DISABLED;
+ value |= SPURIOUS_APIC_VECTOR;
+ apic_write(APIC_SPIV, value);
+
+ /*
+ * Set up the virtual wire mode.
+ */
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
+ value = APIC_DM_NMI;
+ if (!lapic_is_integrated()) /* 82489DX */
+ value |= APIC_LVT_LEVEL_TRIGGER;
+ if (apic_extnmi == APIC_EXTNMI_NONE)
+ value |= APIC_LVT_MASKED;
+ apic_write(APIC_LVT1, value);
+}
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err);
- if (err)
+ if (err) {
+ irqd->chip_data = NULL;
+ free_apic_chip_data(apicd);
goto error;
+ }
}
return 0;
error:
- x86_vector_free_irqs(domain, virq, i + 1);
+ x86_vector_free_irqs(domain, virq, i);
return err;
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc26185aa7..390b3dc3d438 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
#include <asm/alternative.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
+#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
@@ -155,6 +156,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
}
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+ boot_cpu_data.x86 == 6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
+ return true;
+ }
+ }
+ return false;
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +231,24 @@ retpoline_auto:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
+
+ /*
+ * If neither SMEP or KPTI are available, there is a risk of
+ * hitting userspace addresses in the RSB after a context switch
+ * from a shallow call stack to a deeper one. To prevent this fill
+ * the entire RSB, even when using IBRS.
+ *
+ * Skylake era CPUs have a separate issue with *underflow* of the
+ * RSB, when they will predict 'ret' targets from the generic BTB.
+ * The proper mitigation for this is IBRS. If IBRS is not supported
+ * or deactivated in favour of retpolines the RSB fill on context
+ * switch is required.
+ */
+ if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+ !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+ pr_info("Filling RSB on context switch\n");
+ }
}
#undef pr_fmt
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id);
- kfree(d->ctrl_val);
- kfree(d->rmid_busy_llc);
- kfree(d->mbm_total);
- kfree(d->mbm_local);
list_del(&d->list);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
+ kfree(d->ctrl_val);
+ kfree(d->rmid_busy_llc);
+ kfree(d->mbm_total);
+ kfree(d->mbm_local);
kfree(d);
return;
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad3db46..d0e69769abfd 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
{ X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask();
- /* Encrypt the kernel (if SME is active) */
- sme_encrypt_kernel();
+ /* Encrypt the kernel and related (if SME is active) */
+ sme_encrypt_kernel(bp);
/*
* Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS.
*/
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/*
* The APIC and SMP idt entries
*/
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init().
*/
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault),
};
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3),
};
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized.
*/
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip;
int i;
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+ init_bsp_APIC();
+#endif
legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */
- /*
- * If SME is active, this memory will be marked encrypted by the
- * kernel when it is accessed (including relocation). However, the
- * ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it. For SEV the
- * ramdisk will already be encrypted, so only do this for SME.
- */
- if (sme_active())
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */
break;
- case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
}
}
+ if (crystal_khz == 0)
+ return 0;
/*
* TSC frequency determined by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
+ if (cpu_khz != tsc_khz) {
+ pr_info("Detected %lu.%03lu MHz TSC",
+ (unsigned long)tsc_khz / 1000,
+ (unsigned long)tsc_khz % 1000);
+ }
+
/* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+ u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
/* Fault not from Protection Keys: nothing to do */
- if (si_code != SEGV_PKUERR)
+ if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return;
/*
* force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, pkey);
+ fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 47388f0c0e59..af6f2f9c6a26 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
-static __init void *early_alloc(size_t size, int nid)
+static __init void *early_alloc(size_t size, int nid, bool panic)
{
- return memblock_virt_alloc_try_nid_nopanic(size, size,
- __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+ if (panic)
+ return memblock_virt_alloc_try_nid(size, size,
+ __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+ else
+ return memblock_virt_alloc_try_nid_nopanic(size, size,
+ __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
}
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
@@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
if (boot_cpu_has(X86_FEATURE_PSE) &&
((end - addr) == PMD_SIZE) &&
IS_ALIGNED(addr, PMD_SIZE)) {
- p = early_alloc(PMD_SIZE, nid);
+ p = early_alloc(PMD_SIZE, nid, false);
if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
return;
else if (p)
memblock_free(__pa(p), PMD_SIZE);
}
- p = early_alloc(PAGE_SIZE, nid);
+ p = early_alloc(PAGE_SIZE, nid, true);
pmd_populate_kernel(&init_mm, pmd, p);
}
@@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
if (!pte_none(*pte))
continue;
- p = early_alloc(PAGE_SIZE, nid);
+ p = early_alloc(PAGE_SIZE, nid, true);
entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
set_pte_at(&init_mm, addr, pte, entry);
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
((end - addr) == PUD_SIZE) &&
IS_ALIGNED(addr, PUD_SIZE)) {
- p = early_alloc(PUD_SIZE, nid);
+ p = early_alloc(PUD_SIZE, nid, false);
if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
return;
else if (p)
memblock_free(__pa(p), PUD_SIZE);
}
- p = early_alloc(PAGE_SIZE, nid);
+ p = early_alloc(PAGE_SIZE, nid, true);
pud_populate(&init_mm, pud, p);
}
@@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
unsigned long next;
if (p4d_none(*p4d)) {
- void *p = early_alloc(PAGE_SIZE, nid);
+ void *p = early_alloc(PAGE_SIZE, nid, true);
p4d_populate(&init_mm, p4d, p);
}
@@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
unsigned long next;
if (pgd_none(*pgd)) {
- p = early_alloc(PAGE_SIZE, nid);
+ p = early_alloc(PAGE_SIZE, nid, true);
pgd_populate(&init_mm, pgd, p);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
- unsigned long end)
+struct sme_populate_pgd_data {
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ pmdval_t pmd_flags;
+ pteval_t pte_flags;
+ unsigned long paddr;
+
+ unsigned long vaddr;
+ unsigned long vaddr_end;
+};
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
- pgd_start = start & PGDIR_MASK;
- pgd_end = end & PGDIR_MASK;
+ pgd_start = ppd->vaddr & PGDIR_MASK;
+ pgd_end = ppd->vaddr_end & PGDIR_MASK;
- pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
- pgd_size *= sizeof(pgd_t);
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
- pgd_p = pgd_base + pgd_index(start);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
memset(pgd_p, 0, pgd_size);
}
-#define PGD_FLAGS _KERNPG_TABLE_NOENC
-#define P4D_FLAGS _KERNPG_TABLE_NOENC
-#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
- unsigned long vaddr, pmdval_t pmd_val)
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
pud_t *pud_p;
pmd_t *pmd_p;
- pgd_p = pgd_base + pgd_index(vaddr);
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
if (native_pgd_val(*pgd_p)) {
if (IS_ENABLED(CONFIG_X86_5LEVEL))
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
pgd_t pgd;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p = pgtable_area;
+ p4d_p = ppd->pgtable_area;
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
- pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+ ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
} else {
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
}
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
}
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p += p4d_index(vaddr);
+ p4d_p += p4d_index(ppd->vaddr);
if (native_p4d_val(*p4d_p)) {
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
} else {
p4d_t p4d;
- pud_p = pgtable_area;
+ pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+ ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
native_set_p4d(p4d_p, p4d);
}
}
- pud_p += pud_index(vaddr);
+ pud_p += pud_index(ppd->vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
- goto out;
+ return NULL;
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
pud_t pud;
- pmd_p = pgtable_area;
+ pmd_p = ppd->pgtable_area;
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
- pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+ ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
native_set_pud(pud_p, pud);
}
- pmd_p += pmd_index(vaddr);
+ return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+ pmd_t *pmd_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+ native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
-out:
- return pgtable_area;
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pmd_t *pmd_p;
+ pte_t *pte_p;
+
+ pmd_p = sme_prepare_pgd(ppd);
+ if (!pmd_p)
+ return;
+
+ pmd_p += pmd_index(ppd->vaddr);
+ if (native_pmd_val(*pmd_p)) {
+ if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+ return;
+
+ pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+ } else {
+ pmd_t pmd;
+
+ pte_p = ppd->pgtable_area;
+ memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+ pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+ native_set_pmd(pmd_p, pmd);
+ }
+
+ pte_p += pte_index(ppd->vaddr);
+ if (!native_pte_val(*pte_p))
+ native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+
+ ppd->vaddr += PMD_PAGE_SIZE;
+ ppd->paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+
+ ppd->vaddr += PAGE_SIZE;
+ ppd->paddr += PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+{
+ unsigned long vaddr_end;
+
+ ppd->pmd_flags = pmd_flags;
+ ppd->pte_flags = pte_flags;
+
+ /* Save original end value since we modify the struct value */
+ vaddr_end = ppd->vaddr_end;
+
+ /* If start is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+ __sme_map_range_pte(ppd);
+
+ /* Create PMD entries */
+ ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+ __sme_map_range_pmd(ppd);
+
+ /* If end is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = vaddr_end;
+ __sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
{
- unsigned long p4d_size, pud_size, pmd_size;
+ unsigned long p4d_size, pud_size, pmd_size, pte_size;
unsigned long total;
/*
* Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. That mappings will be covered by 2MB
- * PMD entries so we can conservatively calculate the required
+ * entries that are needed. Those mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
* number of P4D, PUD and PMD structures needed to perform the
- * mappings. Incrementing the count for each covers the case where
- * the addresses cross entries.
+ * mappings. For mappings that are not 2MB aligned, PTE mappings
+ * would be needed for the start and end portion of the address range
+ * that fall outside of the 2MB alignment. This results in, at most,
+ * two extra pages to hold PTE entries for each range that is mapped.
+ * Incrementing the count for each covers the case where the addresses
+ * cross entries.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
}
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+ pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
- total = p4d_size + pud_size + pmd_size;
+ total = p4d_size + pud_size + pmd_size + pte_size;
/*
* Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
+ struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len;
- unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
- void *pgtable_area;
- pgd_t *pgd;
if (!sme_active())
return;
/*
- * Prepare for encrypting the kernel by building new pagetables with
- * the necessary attributes needed to encrypt the kernel in place.
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
*
* One range of virtual addresses will map the memory occupied
- * by the kernel as encrypted.
+ * by the kernel and initrd as encrypted.
*
* Another range of virtual addresses will map the memory occupied
- * by the kernel as decrypted and write-protected.
+ * by the kernel and initrd as decrypted and write-protected.
*
* The use of write-protect attribute will prevent any of the
* memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start;
+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
/* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end;
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
*/
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
/* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
/*
* The total workarea includes the executable encryption area and
- * the pagetable area.
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
*/
workarea_len = execute_len + pgtable_area_len;
- workarea_end = workarea_start + workarea_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
/*
* Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
* pagetables and when the new encrypted and decrypted kernel
* mappings are populated.
*/
- pgtable_area = (void *)execute_end;
+ ppd.pgtable_area = (void *)execute_end;
/*
* Make sure the current pagetable structure has entries for
* addressing the workarea.
*/
- pgd = (pgd_t *)native_read_cr3_pa();
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.pgd = (pgd_t *)native_read_cr3_pa();
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
/*
* A new pagetable structure is being built to allow for the kernel
- * to be encrypted. It starts with an empty PGD that will then be
- * populated with new PUDs and PMDs as the encrypted and decrypted
- * kernel mappings are created.
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
*/
- pgd = pgtable_area;
- memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
- pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
-
- /* Add encrypted kernel (identity) mappings */
- pmd_flags = PMD_FLAGS | _PAGE_ENC;
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + pmd_flags);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.pgd = ppd.pgtable_area;
+ memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
/*
* A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
* the base of the mapping.
*/
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
decrypted_base <<= PGDIR_SHIFT;
+ /* Add encrypted kernel (identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start;
+ ppd.vaddr_end = kernel_end;
+ sme_map_range_encrypted(&ppd);
+
/* Add decrypted, write-protected kernel (non-identity) mappings */
- pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
- paddr = kernel_start;
- while (paddr < kernel_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + pmd_flags);
-
- paddr += PMD_PAGE_SIZE;
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start;
+ ppd.vaddr_end = initrd_end;
+ sme_map_range_encrypted(&ppd);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
}
/* Add decrypted workarea mappings to both kernel mappings */
- paddr = workarea_start;
- while (paddr < workarea_end) {
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr,
- paddr + PMD_FLAGS);
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
- pgtable_area = sme_populate_pgd(pgd, pgtable_area,
- paddr + decrypted_base,
- paddr + PMD_FLAGS);
-
- paddr += PMD_PAGE_SIZE;
- }
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(&ppd);
/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
- kernel_len, workarea_start, (unsigned long)pgd);
+ kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)ppd.pgd);
/*
* At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries.
*/
- sme_clear_pgd(pgd, kernel_start + decrypted_base,
- kernel_end + decrypted_base);
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ if (initrd_len) {
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+ }
- sme_clear_pgd(pgd, workarea_start + decrypted_base,
- workarea_end + decrypted_base);
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_clear_pgd(&ppd);
/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
/*
* Entry parameters:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
- * RDX - length of kernel
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
+ * RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */
push %r12
- movq %rdi, %r10 /* Encrypted kernel */
- movq %rsi, %r11 /* Decrypted kernel */
- movq %rdx, %r12 /* Kernel length */
+ movq %rdi, %r10 /* Encrypted area */
+ movq %rsi, %r11 /* Decrypted area */
+ movq %rdx, %r12 /* Area length */
/* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb
/* Setup registers for call */
- movq %r10, %rdi /* Encrypted kernel */
- movq %r11, %rsi /* Decrypted kernel */
+ movq %r10, %rdi /* Encrypted area */
+ movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */
- movq %r12, %rcx /* Kernel length */
+ movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
ENTRY(__enc_copy)
/*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution.
*
* On entry the registers must be:
- * RDI - virtual address for the encrypted kernel mapping
- * RSI - virtual address for the decrypted kernel mapping
+ * RDI - virtual address for the encrypted mapping
+ * RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption
- * RCX - length of kernel
+ * RCX - length of area
* R8 - intermediate copy buffer
*
* RAX - points to this routine
*
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
*/
/* Enable the new page tables */
mov %rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
orq $X86_CR4_PGE, %rdx
mov %rdx, %cr4
+ push %r15
+ push %r12
+
+ movq %rcx, %r9 /* Save area length */
+ movq %rdi, %r10 /* Save encrypted area address */
+ movq %rsi, %r11 /* Save decrypted area address */
+
/* Set the PAT register PA5 entry to write-protect */
- push %rcx
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- push %rdx /* Save original PAT value */
+ mov %rdx, %r15 /* Save original PAT value */
andl $0xffff00ff, %edx /* Clear PA5 */
orl $0x00000500, %edx /* Set PA5 to WP */
wrmsr
- pop %rdx /* RDX contains original PAT value */
- pop %rcx
-
- movq %rcx, %r9 /* Save kernel length */
- movq %rdi, %r10 /* Save encrypted kernel address */
- movq %rsi, %r11 /* Save decrypted kernel address */
wbinvd /* Invalidate any cache entries */
- /* Copy/encrypt 2MB at a time */
+ /* Copy/encrypt up to 2MB at a time */
+ movq $PMD_PAGE_SIZE, %r12
1:
- movq %r11, %rsi /* Source - decrypted kernel */
+ cmpq %r12, %r9
+ jnb 2f
+ movq %r9, %r12
+
+2:
+ movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r12, %rcx
rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */
- movq %r10, %rdi /* Dest - encrypted kernel */
- movq $PMD_PAGE_SIZE, %rcx /* 2MB length */
+ movq %r10, %rdi /* Dest - encrypted area */
+ movq %r12, %rcx
rep movsb
- addq $PMD_PAGE_SIZE, %r11
- addq $PMD_PAGE_SIZE, %r10
- subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */
+ addq %r12, %r11
+ addq %r12, %r10
+ subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */
/* Restore PAT register */
- push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx
rdmsr
- pop %rdx /* Restore original PAT value */
+ mov %r15, %rdx /* Restore original PAT value */
wrmsr
+ pop %r12
+ pop %r15
+
ret
.L__enc_copy_end:
ENDPROC(__enc_copy)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 87f214fbe66e..5acee5139e28 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -15,8 +15,6 @@
#include <asm/set_memory.h>
#include <linux/bpf.h>
-int bpf_jit_enable __read_mostly;
-
/*
* assembly code in arch/x86/net/bpf_jit.S
*/
@@ -154,6 +152,11 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_AX));
}
+static bool is_axreg(u32 reg)
+{
+ return reg == BPF_REG_0;
+}
+
/* add modifiers if 'reg' maps to x64 registers r8..r15 */
static u8 add_1mod(u8 byte, u32 reg)
{
@@ -447,16 +450,36 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
+ /* b3 holds 'normal' opcode, b2 short form only valid
+ * in case dst is eax/rax.
+ */
switch (BPF_OP(insn->code)) {
- case BPF_ADD: b3 = 0xC0; break;
- case BPF_SUB: b3 = 0xE8; break;
- case BPF_AND: b3 = 0xE0; break;
- case BPF_OR: b3 = 0xC8; break;
- case BPF_XOR: b3 = 0xF0; break;
+ case BPF_ADD:
+ b3 = 0xC0;
+ b2 = 0x05;
+ break;
+ case BPF_SUB:
+ b3 = 0xE8;
+ b2 = 0x2D;
+ break;
+ case BPF_AND:
+ b3 = 0xE0;
+ b2 = 0x25;
+ break;
+ case BPF_OR:
+ b3 = 0xC8;
+ b2 = 0x0D;
+ break;
+ case BPF_XOR:
+ b3 = 0xF0;
+ b2 = 0x35;
+ break;
}
if (is_imm8(imm32))
EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
+ else if (is_axreg(dst_reg))
+ EMIT1_off32(b2, imm32);
else
EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
break;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8193b38a1cae..3c09122bf038 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4449,6 +4449,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
* https://bugzilla.kernel.org/show_bug.cgi?id=121671
*/
{ "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 },
+ { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 },
/* Devices we expect to fail diagnostics */
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 02d78f6cecbb..ba8acca036df 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI
config BCMA_DRIVER_PCI_HOSTMODE
bool "Driver for PCI core working in hostmode"
- depends on MIPS && BCMA_DRIVER_PCI
+ depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY
help
PCI core hostmode operation (external PCI bus).
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index f9042bcc27a4..7b14d6280e44 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -152,14 +152,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask,
{
unsigned long get_mask = 0;
unsigned long set_mask = 0;
- int bit = 0;
- while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) {
- if (gc->bgpio_dir & BIT(bit))
- set_mask |= BIT(bit);
- else
- get_mask |= BIT(bit);
- }
+ /* Make sure we first clear any bits that are zero when we read the register */
+ *bits &= ~*mask;
+
+ /* Exploit the fact that we know which directions are set */
+ set_mask = *mask & gc->bgpio_dir;
+ get_mask = *mask & ~gc->bgpio_dir;
if (set_mask)
*bits |= gc->read_reg(gc->reg_set) & set_mask;
@@ -176,13 +175,13 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio)
/*
* This only works if the bits in the GPIO register are in native endianness.
- * It is dirt simple and fast in this case. (Also the most common case.)
*/
static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask,
unsigned long *bits)
{
-
- *bits = gc->read_reg(gc->reg_dat) & *mask;
+ /* Make sure we first clear any bits that are zero when we read the register */
+ *bits &= ~*mask;
+ *bits |= gc->read_reg(gc->reg_dat) & *mask;
return 0;
}
@@ -196,9 +195,12 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask,
unsigned long val;
int bit;
+ /* Make sure we first clear any bits that are zero when we read the register */
+ *bits &= ~*mask;
+
/* Create a mirrored mask */
- bit = 0;
- while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio)
+ bit = -1;
+ while ((bit = find_next_bit(mask, gc->ngpio, bit + 1)) < gc->ngpio)
readmask |= bgpio_line2mask(gc, bit);
/* Read the register */
@@ -208,8 +210,8 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask,
* Mirror the result into the "bits" result, this will give line 0
* in bit 0 ... line 31 in bit 31 for a 32bit register.
*/
- bit = 0;
- while ((bit = find_next_bit(&val, gc->ngpio, bit)) != gc->ngpio)
+ bit = -1;
+ while ((bit = find_next_bit(&val, gc->ngpio, bit + 1)) < gc->ngpio)
*bits |= bgpio_line2mask(gc, bit);
return 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 123585eeb87d..50f8443641b8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1211,23 +1211,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe)
pipe_name(pipe));
}
-static void assert_cursor(struct drm_i915_private *dev_priv,
- enum pipe pipe, bool state)
-{
- bool cur_state;
-
- if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
- cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
- else
- cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
- I915_STATE_WARN(cur_state != state,
- "cursor on pipe %c assertion failure (expected %s, current %s)\n",
- pipe_name(pipe), onoff(state), onoff(cur_state));
-}
-#define assert_cursor_enabled(d, p) assert_cursor(d, p, true)
-#define assert_cursor_disabled(d, p) assert_cursor(d, p, false)
-
void assert_pipe(struct drm_i915_private *dev_priv,
enum pipe pipe, bool state)
{
@@ -1255,77 +1238,25 @@ void assert_pipe(struct drm_i915_private *dev_priv,
pipe_name(pipe), onoff(state), onoff(cur_state));
}
-static void assert_plane(struct drm_i915_private *dev_priv,
- enum plane plane, bool state)
+static void assert_plane(struct intel_plane *plane, bool state)
{
- u32 val;
- bool cur_state;
+ bool cur_state = plane->get_hw_state(plane);
- val = I915_READ(DSPCNTR(plane));
- cur_state = !!(val & DISPLAY_PLANE_ENABLE);
I915_STATE_WARN(cur_state != state,
- "plane %c assertion failure (expected %s, current %s)\n",
- plane_name(plane), onoff(state), onoff(cur_state));
+ "%s assertion failure (expected %s, current %s)\n",
+ plane->base.name, onoff(state), onoff(cur_state));
}
-#define assert_plane_enabled(d, p) assert_plane(d, p, true)
-#define assert_plane_disabled(d, p) assert_plane(d, p, false)
-
-static void assert_planes_disabled(struct drm_i915_private *dev_priv,
- enum pipe pipe)
-{
- int i;
-
- /* Primary planes are fixed to pipes on gen4+ */
- if (INTEL_GEN(dev_priv) >= 4) {
- u32 val = I915_READ(DSPCNTR(pipe));
- I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE,
- "plane %c assertion failure, should be disabled but not\n",
- plane_name(pipe));
- return;
- }
+#define assert_plane_enabled(p) assert_plane(p, true)
+#define assert_plane_disabled(p) assert_plane(p, false)
- /* Need to check both planes against the pipe */
- for_each_pipe(dev_priv, i) {
- u32 val = I915_READ(DSPCNTR(i));
- enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
- DISPPLANE_SEL_PIPE_SHIFT;
- I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe,
- "plane %c assertion failure, should be off on pipe %c but is still active\n",
- plane_name(i), pipe_name(pipe));
- }
-}
-
-static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
- enum pipe pipe)
+static void assert_planes_disabled(struct intel_crtc *crtc)
{
- int sprite;
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ struct intel_plane *plane;
- if (INTEL_GEN(dev_priv) >= 9) {
- for_each_sprite(dev_priv, pipe, sprite) {
- u32 val = I915_READ(PLANE_CTL(pipe, sprite));
- I915_STATE_WARN(val & PLANE_CTL_ENABLE,
- "plane %d assertion failure, should be off on pipe %c but is still active\n",
- sprite, pipe_name(pipe));
- }
- } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
- for_each_sprite(dev_priv, pipe, sprite) {
- u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite));
- I915_STATE_WARN(val & SP_ENABLE,
- "sprite %c assertion failure, should be off on pipe %c but is still active\n",
- sprite_name(pipe, sprite), pipe_name(pipe));
- }
- } else if (INTEL_GEN(dev_priv) >= 7) {
- u32 val = I915_READ(SPRCTL(pipe));
- I915_STATE_WARN(val & SPRITE_ENABLE,
- "sprite %c assertion failure, should be off on pipe %c but is still active\n",
- plane_name(pipe), pipe_name(pipe));
- } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
- u32 val = I915_READ(DVSCNTR(pipe));
- I915_STATE_WARN(val & DVS_ENABLE,
- "sprite %c assertion failure, should be off on pipe %c but is still active\n",
- plane_name(pipe), pipe_name(pipe));
- }
+ for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane)
+ assert_plane_disabled(plane);
}
static void assert_vblank_disabled(struct drm_crtc *crtc)
@@ -1918,9 +1849,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe));
- assert_planes_disabled(dev_priv, pipe);
- assert_cursor_disabled(dev_priv, pipe);
- assert_sprites_disabled(dev_priv, pipe);
+ assert_planes_disabled(crtc);
/*
* A pipe without a PLL won't actually be able to drive bits from
@@ -1989,9 +1918,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
* Make sure planes won't keep trying to pump pixels to us,
* or we might hang the display.
*/
- assert_planes_disabled(dev_priv, pipe);
- assert_cursor_disabled(dev_priv, pipe);
- assert_sprites_disabled(dev_priv, pipe);
+ assert_planes_disabled(crtc);
reg = PIPECONF(cpu_transcoder);
val = I915_READ(reg);
@@ -2820,6 +2747,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state,
crtc_state->active_planes);
}
+static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
+ struct intel_plane *plane)
+{
+ struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
+ struct intel_plane_state *plane_state =
+ to_intel_plane_state(plane->base.state);
+
+ intel_set_plane_visible(crtc_state, plane_state, false);
+
+ if (plane->id == PLANE_PRIMARY)
+ intel_pre_disable_primary_noatomic(&crtc->base);
+
+ trace_intel_disable_plane(&plane->base, crtc);
+ plane->disable_plane(plane, crtc);
+}
+
static void
intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
struct intel_initial_plane_config *plane_config)
@@ -2877,12 +2821,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
* simplest solution is to just disable the primary plane now and
* pretend the BIOS never had it enabled.
*/
- intel_set_plane_visible(to_intel_crtc_state(crtc_state),
- to_intel_plane_state(plane_state),
- false);
- intel_pre_disable_primary_noatomic(&intel_crtc->base);
- trace_intel_disable_plane(primary, intel_crtc);
- intel_plane->disable_plane(intel_plane, intel_crtc);
+ intel_plane_disable_noatomic(intel_crtc, intel_plane);
return;
@@ -3385,6 +3324,31 @@ static void i9xx_disable_primary_plane(struct intel_plane *primary,
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+static bool i9xx_plane_get_hw_state(struct intel_plane *primary)
+{
+
+ struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum plane plane = primary->plane;
+ enum pipe pipe = primary->pipe;
+ bool ret;
+
+ /*
+ * Not 100% correct for planes that can move between pipes,
+ * but that's only the case for gen2-4 which don't have any
+ * display power wells.
+ */
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static u32
intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane)
{
@@ -4866,7 +4830,8 @@ void hsw_enable_ips(struct intel_crtc *crtc)
* a vblank wait.
*/
- assert_plane_enabled(dev_priv, crtc->plane);
+ assert_plane_enabled(to_intel_plane(crtc->base.primary));
+
if (IS_BROADWELL(dev_priv)) {
mutex_lock(&dev_priv->pcu_lock);
WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
@@ -4899,7 +4864,8 @@ void hsw_disable_ips(struct intel_crtc *crtc)
if (!crtc->config->ips_enabled)
return;
- assert_plane_enabled(dev_priv, crtc->plane);
+ assert_plane_enabled(to_intel_plane(crtc->base.primary));
+
if (IS_BROADWELL(dev_priv)) {
mutex_lock(&dev_priv->pcu_lock);
WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
@@ -5899,6 +5865,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct drm_i915_private *dev_priv = to_i915(crtc->dev);
enum intel_display_power_domain domain;
+ struct intel_plane *plane;
u64 domains;
struct drm_atomic_state *state;
struct intel_crtc_state *crtc_state;
@@ -5907,11 +5874,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
if (!intel_crtc->active)
return;
- if (crtc->primary->state->visible) {
- intel_pre_disable_primary_noatomic(crtc);
+ for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) {
+ const struct intel_plane_state *plane_state =
+ to_intel_plane_state(plane->base.state);
- intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
- crtc->primary->state->visible = false;
+ if (plane_state->base.visible)
+ intel_plane_disable_noatomic(intel_crtc, plane);
}
state = drm_atomic_state_alloc(crtc->dev);
@@ -9477,6 +9445,23 @@ static void i845_disable_cursor(struct intel_plane *plane,
i845_update_cursor(plane, NULL, NULL);
}
+static bool i845_cursor_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ bool ret;
+
+ power_domain = POWER_DOMAIN_PIPE(PIPE_A);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state)
{
@@ -9670,6 +9655,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane,
i9xx_update_cursor(plane, NULL, NULL);
}
+static bool i9xx_cursor_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum pipe pipe = plane->pipe;
+ bool ret;
+
+ /*
+ * Not 100% correct for planes that can move between pipes,
+ * but that's only the case for gen2-3 which don't have any
+ * display power wells.
+ */
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
/* VESA 640x480x72Hz mode to set on the pipe */
static const struct drm_display_mode load_detect_mode = {
@@ -13205,6 +13212,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->update_plane = skl_update_plane;
primary->disable_plane = skl_disable_plane;
+ primary->get_hw_state = skl_plane_get_hw_state;
} else if (INTEL_GEN(dev_priv) >= 9) {
intel_primary_formats = skl_primary_formats;
num_formats = ARRAY_SIZE(skl_primary_formats);
@@ -13215,6 +13223,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->update_plane = skl_update_plane;
primary->disable_plane = skl_disable_plane;
+ primary->get_hw_state = skl_plane_get_hw_state;
} else if (INTEL_GEN(dev_priv) >= 4) {
intel_primary_formats = i965_primary_formats;
num_formats = ARRAY_SIZE(i965_primary_formats);
@@ -13222,6 +13231,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->update_plane = i9xx_update_primary_plane;
primary->disable_plane = i9xx_disable_primary_plane;
+ primary->get_hw_state = i9xx_plane_get_hw_state;
} else {
intel_primary_formats = i8xx_primary_formats;
num_formats = ARRAY_SIZE(i8xx_primary_formats);
@@ -13229,6 +13239,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
primary->update_plane = i9xx_update_primary_plane;
primary->disable_plane = i9xx_disable_primary_plane;
+ primary->get_hw_state = i9xx_plane_get_hw_state;
}
if (INTEL_GEN(dev_priv) >= 9)
@@ -13318,10 +13329,12 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
cursor->update_plane = i845_update_cursor;
cursor->disable_plane = i845_disable_cursor;
+ cursor->get_hw_state = i845_cursor_get_hw_state;
cursor->check_plane = i845_check_cursor;
} else {
cursor->update_plane = i9xx_update_cursor;
cursor->disable_plane = i9xx_disable_cursor;
+ cursor->get_hw_state = i9xx_cursor_get_hw_state;
cursor->check_plane = i9xx_check_cursor;
}
@@ -14671,8 +14684,11 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n",
pipe_name(pipe));
- assert_plane_disabled(dev_priv, PLANE_A);
- assert_plane_disabled(dev_priv, PLANE_B);
+ WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE);
+ WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE);
+ WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE);
+ WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE);
+ WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE);
I915_WRITE(PIPECONF(pipe), 0);
POSTING_READ(PIPECONF(pipe));
@@ -14683,22 +14699,36 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
POSTING_READ(DPLL(pipe));
}
-static bool
-intel_check_plane_mapping(struct intel_crtc *crtc)
+static bool intel_plane_mapping_ok(struct intel_crtc *crtc,
+ struct intel_plane *primary)
{
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
- u32 val;
+ enum plane plane = primary->plane;
+ u32 val = I915_READ(DSPCNTR(plane));
- if (INTEL_INFO(dev_priv)->num_pipes == 1)
- return true;
+ return (val & DISPLAY_PLANE_ENABLE) == 0 ||
+ (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe);
+}
- val = I915_READ(DSPCNTR(!crtc->plane));
+static void
+intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv)
+{
+ struct intel_crtc *crtc;
- if ((val & DISPLAY_PLANE_ENABLE) &&
- (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe))
- return false;
+ if (INTEL_GEN(dev_priv) >= 4)
+ return;
- return true;
+ for_each_intel_crtc(&dev_priv->drm, crtc) {
+ struct intel_plane *plane =
+ to_intel_plane(crtc->base.primary);
+
+ if (intel_plane_mapping_ok(crtc, plane))
+ continue;
+
+ DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n",
+ plane->base.name);
+ intel_plane_disable_noatomic(crtc, plane);
+ }
}
static bool intel_crtc_has_encoders(struct intel_crtc *crtc)
@@ -14754,33 +14784,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
/* Disable everything but the primary plane */
for_each_intel_plane_on_crtc(dev, crtc, plane) {
- if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
- continue;
+ const struct intel_plane_state *plane_state =
+ to_intel_plane_state(plane->base.state);
- trace_intel_disable_plane(&plane->base, crtc);
- plane->disable_plane(plane, crtc);
+ if (plane_state->base.visible &&
+ plane->base.type != DRM_PLANE_TYPE_PRIMARY)
+ intel_plane_disable_noatomic(crtc, plane);
}
}
- /* We need to sanitize the plane -> pipe mapping first because this will
- * disable the crtc (and hence change the state) if it is wrong. Note
- * that gen4+ has a fixed plane -> pipe mapping. */
- if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) {
- bool plane;
-
- DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n",
- crtc->base.base.id, crtc->base.name);
-
- /* Pipe has the wrong plane attached and the plane is active.
- * Temporarily change the plane mapping and disable everything
- * ... */
- plane = crtc->plane;
- crtc->base.primary->state->visible = true;
- crtc->plane = !plane;
- intel_crtc_disable_noatomic(&crtc->base, ctx);
- crtc->plane = plane;
- }
-
/* Adjust the state of the output pipe according to whether we
* have active connectors/encoders. */
if (crtc->active && !intel_crtc_has_encoders(crtc))
@@ -14885,24 +14897,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv)
intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
}
-static bool primary_get_hw_state(struct intel_plane *plane)
-{
- struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-
- return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE;
-}
-
/* FIXME read out full plane state for all planes */
static void readout_plane_state(struct intel_crtc *crtc)
{
- struct intel_plane *primary = to_intel_plane(crtc->base.primary);
- bool visible;
+ struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
+ struct intel_plane *plane;
- visible = crtc->active && primary_get_hw_state(primary);
+ for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+ struct intel_plane_state *plane_state =
+ to_intel_plane_state(plane->base.state);
+ bool visible = plane->get_hw_state(plane);
- intel_set_plane_visible(to_intel_crtc_state(crtc->base.state),
- to_intel_plane_state(primary->base.state),
- visible);
+ intel_set_plane_visible(crtc_state, plane_state, visible);
+ }
}
static void intel_modeset_readout_hw_state(struct drm_device *dev)
@@ -15100,6 +15109,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
/* HW state is read out, now we need to sanitize this mess. */
get_encoder_power_domains(dev_priv);
+ intel_sanitize_plane_mapping(dev_priv);
+
for_each_intel_encoder(dev, encoder) {
intel_sanitize_encoder(encoder);
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6c7f8bca574e..5d77f75a9f9c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -862,6 +862,7 @@ struct intel_plane {
const struct intel_plane_state *plane_state);
void (*disable_plane)(struct intel_plane *plane,
struct intel_crtc *crtc);
+ bool (*get_hw_state)(struct intel_plane *plane);
int (*check_plane)(struct intel_plane *plane,
struct intel_crtc_state *crtc_state,
struct intel_plane_state *state);
@@ -1924,6 +1925,7 @@ void skl_update_plane(struct intel_plane *plane,
const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state);
void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc);
+bool skl_plane_get_hw_state(struct intel_plane *plane);
/* intel_tv.c */
void intel_tv_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 4fcf80ca91dd..4a8a5d918a83 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -329,6 +329,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+bool
+skl_plane_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum plane_id plane_id = plane->id;
+ enum pipe pipe = plane->pipe;
+ bool ret;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static void
chv_update_csc(struct intel_plane *plane, uint32_t format)
{
@@ -506,6 +526,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+static bool
+vlv_plane_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum plane_id plane_id = plane->id;
+ enum pipe pipe = plane->pipe;
+ bool ret;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state)
{
@@ -646,6 +686,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+static bool
+ivb_plane_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum pipe pipe = plane->pipe;
+ bool ret;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state)
{
@@ -777,6 +836,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
}
+static bool
+g4x_plane_get_hw_state(struct intel_plane *plane)
+{
+ struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+ enum intel_display_power_domain power_domain;
+ enum pipe pipe = plane->pipe;
+ bool ret;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ return false;
+
+ ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE;
+
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
+}
+
static int
intel_check_sprite_plane(struct intel_plane *plane,
struct intel_crtc_state *crtc_state,
@@ -1232,6 +1310,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
intel_plane->update_plane = skl_update_plane;
intel_plane->disable_plane = skl_disable_plane;
+ intel_plane->get_hw_state = skl_plane_get_hw_state;
plane_formats = skl_plane_formats;
num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1242,6 +1321,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
intel_plane->update_plane = skl_update_plane;
intel_plane->disable_plane = skl_disable_plane;
+ intel_plane->get_hw_state = skl_plane_get_hw_state;
plane_formats = skl_plane_formats;
num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1252,6 +1332,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
intel_plane->update_plane = vlv_update_plane;
intel_plane->disable_plane = vlv_disable_plane;
+ intel_plane->get_hw_state = vlv_plane_get_hw_state;
plane_formats = vlv_plane_formats;
num_plane_formats = ARRAY_SIZE(vlv_plane_formats);
@@ -1267,6 +1348,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
intel_plane->update_plane = ivb_update_plane;
intel_plane->disable_plane = ivb_disable_plane;
+ intel_plane->get_hw_state = ivb_plane_get_hw_state;
plane_formats = snb_plane_formats;
num_plane_formats = ARRAY_SIZE(snb_plane_formats);
@@ -1277,6 +1359,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
intel_plane->update_plane = g4x_update_plane;
intel_plane->disable_plane = g4x_disable_plane;
+ intel_plane->get_hw_state = g4x_plane_get_hw_state;
modifiers = i9xx_plane_format_modifiers;
if (IS_GEN6(dev_priv)) {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 0760b93e9d1f..baab93398e54 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -121,6 +121,7 @@ int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
+int mcp77_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 435ff8662cfa..ef687414969e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1447,11 +1447,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
args.nv50.ro = 0;
args.nv50.kind = mem->kind;
args.nv50.comp = mem->comp;
+ argc = sizeof(args.nv50);
break;
case NVIF_CLASS_MEM_GF100:
args.gf100.version = 0;
args.gf100.ro = 0;
args.gf100.kind = mem->kind;
+ argc = sizeof(args.gf100);
break;
default:
WARN_ON(1);
@@ -1459,7 +1461,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
}
ret = nvif_object_map_handle(&mem->mem.object,
- &argc, argc,
+ &args, argc,
&handle, &length);
if (ret != 1)
return ret ? ret : -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 00eeaaffeae5..08e77cd55e6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1251,7 +1251,7 @@ nvaa_chipset = {
.i2c = g94_i2c_new,
.imem = nv50_instmem_new,
.mc = g98_mc_new,
- .mmu = g84_mmu_new,
+ .mmu = mcp77_mmu_new,
.mxm = nv50_mxm_new,
.pci = g94_pci_new,
.therm = g84_therm_new,
@@ -1283,7 +1283,7 @@ nvac_chipset = {
.i2c = g94_i2c_new,
.imem = nv50_instmem_new,
.mc = g98_mc_new,
- .mmu = g84_mmu_new,
+ .mmu = mcp77_mmu_new,
.mxm = nv50_mxm_new,
.pci = g94_pci_new,
.therm = g84_therm_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
index 9646adec57cb..243f0a5c8a62 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
@@ -73,7 +73,8 @@ static int
nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend)
{
struct nvkm_bar *bar = nvkm_bar(subdev);
- bar->func->bar1.fini(bar);
+ if (bar->func->bar1.fini)
+ bar->func->bar1.fini(bar);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
index b10077d38839..35878fb538f2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
@@ -26,7 +26,6 @@ gk20a_bar_func = {
.dtor = gf100_bar_dtor,
.oneinit = gf100_bar_oneinit,
.bar1.init = gf100_bar_bar1_init,
- .bar1.fini = gf100_bar_bar1_fini,
.bar1.wait = gf100_bar_bar1_wait,
.bar1.vmm = gf100_bar_bar1_vmm,
.flush = g84_bar_flush,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
index 352a65f9371c..67ee983bb026 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
@@ -4,6 +4,7 @@ nvkm-y += nvkm/subdev/mmu/nv41.o
nvkm-y += nvkm/subdev/mmu/nv44.o
nvkm-y += nvkm/subdev/mmu/nv50.o
nvkm-y += nvkm/subdev/mmu/g84.o
+nvkm-y += nvkm/subdev/mmu/mcp77.o
nvkm-y += nvkm/subdev/mmu/gf100.o
nvkm-y += nvkm/subdev/mmu/gk104.o
nvkm-y += nvkm/subdev/mmu/gk20a.o
@@ -22,6 +23,7 @@ nvkm-y += nvkm/subdev/mmu/vmmnv04.o
nvkm-y += nvkm/subdev/mmu/vmmnv41.o
nvkm-y += nvkm/subdev/mmu/vmmnv44.o
nvkm-y += nvkm/subdev/mmu/vmmnv50.o
+nvkm-y += nvkm/subdev/mmu/vmmmcp77.o
nvkm-y += nvkm/subdev/mmu/vmmgf100.o
nvkm-y += nvkm/subdev/mmu/vmmgk104.o
nvkm-y += nvkm/subdev/mmu/vmmgk20a.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c
new file mode 100644
index 000000000000..0527b50730d9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "mem.h"
+#include "vmm.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_mmu_func
+mcp77_mmu = {
+ .dma_bits = 40,
+ .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}},
+ .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map },
+ .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, mcp77_vmm_new, false, 0x0200 },
+ .kind = nv50_mmu_kind,
+ .kind_sys = true,
+};
+
+int
+mcp77_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+{
+ return nvkm_mmu_new_(&mcp77_mmu, device, index, pmmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 6d8f61ea467a..da06e64d8a7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -95,6 +95,9 @@ struct nvkm_vmm_desc {
const struct nvkm_vmm_desc_func *func;
};
+extern const struct nvkm_vmm_desc nv50_vmm_desc_12[];
+extern const struct nvkm_vmm_desc nv50_vmm_desc_16[];
+
extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[];
extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[];
extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[];
@@ -169,6 +172,11 @@ int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32,
const char *, struct nvkm_vmm **);
int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
+int nv50_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
+void nv50_vmm_part(struct nvkm_vmm *, struct nvkm_memory *);
+int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
+void nv50_vmm_flush(struct nvkm_vmm *, int);
+
int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
struct nvkm_mmu *, u64, u64, void *, u32,
struct lock_class_key *, const char *, struct nvkm_vmm **);
@@ -200,6 +208,8 @@ int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
struct lock_class_key *, const char *, struct nvkm_vmm **);
int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
struct lock_class_key *, const char *, struct nvkm_vmm **);
+int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+ struct lock_class_key *, const char *, struct nvkm_vmm **);
int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
struct lock_class_key *, const char *, struct nvkm_vmm **);
int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
new file mode 100644
index 000000000000..e63d984cbfd4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "vmm.h"
+
+static const struct nvkm_vmm_func
+mcp77_vmm = {
+ .join = nv50_vmm_join,
+ .part = nv50_vmm_part,
+ .valid = nv50_vmm_valid,
+ .flush = nv50_vmm_flush,
+ .page_block = 1 << 29,
+ .page = {
+ { 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxx },
+ { 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx },
+ {}
+ }
+};
+
+int
+mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
+ struct lock_class_key *key, const char *name,
+ struct nvkm_vmm **pvmm)
+{
+ return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size,
+ argv, argc, key, name, pvmm);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
index 863a2edd9861..64f75d906202 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
@@ -32,7 +32,7 @@ static inline void
nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr)
{
- u64 next = addr | map->type, data;
+ u64 next = addr + map->type, data;
u32 pten;
int log2blk;
@@ -69,7 +69,7 @@ nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes);
nvkm_kmap(pt->memory);
while (ptes--) {
- const u64 data = *map->dma++ | map->type;
+ const u64 data = *map->dma++ + map->type;
VMM_WO064(pt, vmm, ptei++ * 8, data);
map->type += map->ctag;
}
@@ -163,21 +163,21 @@ nv50_vmm_pgd = {
.pde = nv50_vmm_pgd_pde,
};
-static const struct nvkm_vmm_desc
+const struct nvkm_vmm_desc
nv50_vmm_desc_12[] = {
{ PGT, 17, 8, 0x1000, &nv50_vmm_pgt },
{ PGD, 11, 0, 0x0000, &nv50_vmm_pgd },
{}
};
-static const struct nvkm_vmm_desc
+const struct nvkm_vmm_desc
nv50_vmm_desc_16[] = {
{ PGT, 13, 8, 0x1000, &nv50_vmm_pgt },
{ PGD, 11, 0, 0x0000, &nv50_vmm_pgd },
{}
};
-static void
+void
nv50_vmm_flush(struct nvkm_vmm *vmm, int level)
{
struct nvkm_subdev *subdev = &vmm->mmu->subdev;
@@ -223,7 +223,7 @@ nv50_vmm_flush(struct nvkm_vmm *vmm, int level)
mutex_unlock(&subdev->mutex);
}
-static int
+int
nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
struct nvkm_vmm_map *map)
{
@@ -321,7 +321,7 @@ nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
return 0;
}
-static void
+void
nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
{
struct nvkm_vmm_join *join;
@@ -335,7 +335,7 @@ nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
}
}
-static int
+int
nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
{
const u32 pd_offset = vmm->mmu->func->vmm.pd_offset;
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
index dc332ea56f6c..3ecffa52c814 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
@@ -102,10 +102,13 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw,
goto out;
}
- if (abs(rate - rounded / i) <
- abs(rate - best_parent / best_div)) {
+ if (!best_parent ||
+ abs(rate - rounded / i / j) <
+ abs(rate - best_parent / best_half /
+ best_div)) {
best_parent = rounded;
- best_div = i;
+ best_half = i;
+ best_div = j;
}
}
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 641294aef165..fcd58145d0da 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1863,7 +1863,7 @@ u32 vmw_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
*/
int vmw_enable_vblank(struct drm_device *dev, unsigned int pipe)
{
- return -ENOSYS;
+ return -EINVAL;
}
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b8a09807c5de..3824595fece1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -266,8 +266,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = {
.set_property = vmw_du_connector_set_property,
.destroy = vmw_ldu_connector_destroy,
.reset = vmw_du_connector_reset,
- .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
- .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+ .atomic_duplicate_state = vmw_du_connector_duplicate_state,
+ .atomic_destroy_state = vmw_du_connector_destroy_state,
.atomic_set_property = vmw_du_connector_atomic_set_property,
.atomic_get_property = vmw_du_connector_atomic_get_property,
};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index bc5f6026573d..63a4cd794b73 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -420,8 +420,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = {
.set_property = vmw_du_connector_set_property,
.destroy = vmw_sou_connector_destroy,
.reset = vmw_du_connector_reset,
- .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
- .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+ .atomic_duplicate_state = vmw_du_connector_duplicate_state,
+ .atomic_destroy_state = vmw_du_connector_destroy_state,
.atomic_set_property = vmw_du_connector_atomic_set_property,
.atomic_get_property = vmw_du_connector_atomic_get_property,
};
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 706164b4c5be..f7829a74140c 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -821,8 +821,12 @@ void i2c_unregister_device(struct i2c_client *client)
{
if (!client)
return;
- if (client->dev.of_node)
+
+ if (client->dev.of_node) {
of_node_clear_flag(client->dev.of_node, OF_POPULATED);
+ of_node_put(client->dev.of_node);
+ }
+
if (ACPI_COMPANION(&client->dev))
acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
device_unregister(&client->dev);
diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 4bb9927afd01..a1082c04ac5c 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -397,16 +397,17 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
the underlying bus driver */
break;
case I2C_SMBUS_I2C_BLOCK_DATA:
+ if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
+ dev_err(&adapter->dev, "Invalid block %s size %d\n",
+ read_write == I2C_SMBUS_READ ? "read" : "write",
+ data->block[0]);
+ return -EINVAL;
+ }
+
if (read_write == I2C_SMBUS_READ) {
msg[1].len = data->block[0];
} else {
msg[0].len = data->block[0] + 1;
- if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
- dev_err(&adapter->dev,
- "Invalid block write size %d\n",
- data->block[0]);
- return -EINVAL;
- }
for (i = 1; i <= data->block[0]; i++)
msgbuf0[i] = data->block[i];
}
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index 6c51d404874b..c37aea9ac272 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
twl4030_vibra_suspend, twl4030_vibra_resume);
static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata,
- struct device_node *node)
+ struct device_node *parent)
{
+ struct device_node *node;
+
if (pdata && pdata->coexist)
return true;
- node = of_find_node_by_name(node, "codec");
+ node = of_get_child_by_name(parent, "codec");
if (node) {
of_node_put(node);
return true;
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 5690eb7ff954..15e0d352c4cc 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev)
int vddvibr_uV = 0;
int error;
- of_node_get(twl6040_core_dev->of_node);
- twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
+ twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node,
"vibra");
if (!twl6040_core_node) {
dev_err(&pdev->dev, "parent of node is missing?\n");
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 579b899add26..dbe57da8c1a1 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1250,29 +1250,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
case SS4_PACKET_ID_MULTI:
if (priv->flags & ALPS_BUTTONPAD) {
if (IS_SS4PLUS_DEV(priv->dev_id)) {
- f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
- f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+ no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL;
} else {
f->mt[2].x = SS4_BTL_MF_X_V2(p, 0);
f->mt[3].x = SS4_BTL_MF_X_V2(p, 1);
+ no_data_x = SS4_MFPACKET_NO_AX_BL;
}
+ no_data_y = SS4_MFPACKET_NO_AY_BL;
f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0);
f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1);
- no_data_x = SS4_MFPACKET_NO_AX_BL;
- no_data_y = SS4_MFPACKET_NO_AY_BL;
} else {
if (IS_SS4PLUS_DEV(priv->dev_id)) {
- f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0);
- f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+ no_data_x = SS4_PLUS_MFPACKET_NO_AX;
} else {
- f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
- f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
+ f->mt[2].x = SS4_STD_MF_X_V2(p, 0);
+ f->mt[3].x = SS4_STD_MF_X_V2(p, 1);
+ no_data_x = SS4_MFPACKET_NO_AX;
}
+ no_data_y = SS4_MFPACKET_NO_AY;
+
f->mt[2].y = SS4_STD_MF_Y_V2(p, 0);
f->mt[3].y = SS4_STD_MF_Y_V2(p, 1);
- no_data_x = SS4_MFPACKET_NO_AX;
- no_data_y = SS4_MFPACKET_NO_AY;
}
f->first_mp = 0;
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index c80a7c76cb76..79b6d69d1486 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -141,10 +141,12 @@ enum SS4_PACKET_ID {
#define SS4_TS_Z_V2(_b) (s8)(_b[4] & 0x7F)
-#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */
-#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */
-#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */
-#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */
+#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */
+#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */
+#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */
+#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */
/*
* enum V7_PACKET_ID - defines the packet type for V7
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index ee5466a374bf..cd9f61cb3fc6 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = {
"LEN0046", /* X250 */
"LEN004a", /* W541 */
"LEN200f", /* T450s */
+ "LEN2018", /* T460p */
NULL
};
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 4f2bb5947a4e..141ea228aac6 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -230,8 +230,10 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
"Failed to process interrupt request: %d\n", ret);
- if (count)
+ if (count) {
kfree(attn_data.data);
+ attn_data.data = NULL;
+ }
if (!kfifo_is_empty(&drvdata->attn_fifo))
return rmi_irq_fn(irq, dev_id);
diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 7ed828a51f4c..3486d9403805 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
int data, n, ret;
if (!np)
return -ENODEV;
- np = of_find_node_by_name(np, "touch");
+ np = of_get_child_by_name(np, "touch");
if (!np) {
dev_err(&pdev->dev, "Can't find touch node\n");
return -EINVAL;
@@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
if (data) {
ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set tsi prebias time */
if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) {
ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
/* set prebias & prechg time of pen detect */
data = 0;
@@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
if (data) {
ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
if (ret < 0)
- return -EINVAL;
+ goto err_put_node;
}
of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x);
+
+ of_node_put(np);
+
return 0;
+
+err_put_node:
+ of_node_put(np);
+
+ return -EINVAL;
}
#else
#define pm860x_touch_dt_init(x, y, z) (-1)
diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c
index 8d7f9c8f2771..9642f103b726 100644
--- a/drivers/input/touchscreen/of_touchscreen.c
+++ b/drivers/input/touchscreen/of_touchscreen.c
@@ -13,6 +13,7 @@
#include <linux/input.h>
#include <linux/input/mt.h>
#include <linux/input/touchscreen.h>
+#include <linux/module.h>
static bool touchscreen_get_prop_u32(struct device *dev,
const char *property,
@@ -185,3 +186,6 @@ void touchscreen_report_pos(struct input_dev *input,
input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y);
}
EXPORT_SYMBOL(touchscreen_report_pos);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Device-tree helpers functions for touchscreen devices");
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 85140c9af581..8b941f814472 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -687,6 +687,20 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
return;
}
+ /* For i.MX53 eSDHCv3, SYSCTL.SDCLKFS may not be set to 0. */
+ if (is_imx53_esdhc(imx_data)) {
+ /*
+ * According to the i.MX53 reference manual, if DLLCTRL[10] can
+ * be set, then the controller is eSDHCv3, else it is eSDHCv2.
+ */
+ val = readl(host->ioaddr + ESDHC_DLL_CTRL);
+ writel(val | BIT(10), host->ioaddr + ESDHC_DLL_CTRL);
+ temp = readl(host->ioaddr + ESDHC_DLL_CTRL);
+ writel(val, host->ioaddr + ESDHC_DLL_CTRL);
+ if (temp & BIT(10))
+ pre_div = 2;
+ }
+
temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN
| ESDHC_CLOCK_MASK);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 18ff127020c0..dd161c5eea8e 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
void *cmd_head = pcan_usb_fd_cmd_buffer(dev);
int err = 0;
u8 *packet_ptr;
- int i, n = 1, packet_len;
+ int packet_len;
ptrdiff_t cmd_len;
/* usb device unregistered? */
@@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
}
packet_ptr = cmd_head;
+ packet_len = cmd_len;
/* firmware is not able to re-assemble 512 bytes buffer in full-speed */
- if ((dev->udev->speed != USB_SPEED_HIGH) &&
- (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) {
- packet_len = PCAN_UFD_LOSPD_PKT_SIZE;
- n += cmd_len / packet_len;
- } else {
- packet_len = cmd_len;
- }
+ if (unlikely(dev->udev->speed != USB_SPEED_HIGH))
+ packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE);
- for (i = 0; i < n; i++) {
+ do {
err = usb_bulk_msg(dev->udev,
usb_sndbulkpipe(dev->udev,
PCAN_USBPRO_EP_CMDOUT),
@@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
}
packet_ptr += packet_len;
- }
+ cmd_len -= packet_len;
+
+ if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE)
+ packet_len = cmd_len;
+
+ } while (packet_len > 0);
return err;
}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 54cb00a27408..eb328bade225 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3999,9 +3999,11 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
out_mdio:
mv88e6xxx_mdios_unregister(chip);
out_g1_vtu_prob_irq:
- mv88e6xxx_g1_vtu_prob_irq_free(chip);
+ if (chip->irq > 0)
+ mv88e6xxx_g1_vtu_prob_irq_free(chip);
out_g1_atu_prob_irq:
- mv88e6xxx_g1_atu_prob_irq_free(chip);
+ if (chip->irq > 0)
+ mv88e6xxx_g1_atu_prob_irq_free(chip);
out_g2_irq:
if (chip->info->g2_irqs > 0 && chip->irq > 0)
mv88e6xxx_g2_irq_free(chip);
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index b97de9d36337..20d941f4273b 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -377,7 +377,7 @@ int mv88e6xxx_g1_atu_prob_irq_setup(struct mv88e6xxx_chip *chip)
chip->atu_prob_irq = irq_find_mapping(chip->g1_irq.domain,
MV88E6XXX_G1_STS_IRQ_ATU_PROB);
if (chip->atu_prob_irq < 0)
- return chip->device_irq;
+ return chip->atu_prob_irq;
err = request_threaded_irq(chip->atu_prob_irq, NULL,
mv88e6xxx_g1_atu_prob_irq_thread_fn,
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 53d58a01484a..7997961647de 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -570,7 +570,7 @@ int mv88e6xxx_g1_vtu_prob_irq_setup(struct mv88e6xxx_chip *chip)
chip->vtu_prob_irq = irq_find_mapping(chip->g1_irq.domain,
MV88E6XXX_G1_STS_IRQ_VTU_PROB);
if (chip->vtu_prob_irq < 0)
- return chip->device_irq;
+ return chip->vtu_prob_irq;
err = request_threaded_irq(chip->vtu_prob_irq, NULL,
mv88e6xxx_g1_vtu_prob_irq_thread_fn,
diff --git a/drivers/net/ethernet/cortina/Kconfig b/drivers/net/ethernet/cortina/Kconfig
index 0df743ea51f1..89bc4579724d 100644
--- a/drivers/net/ethernet/cortina/Kconfig
+++ b/drivers/net/ethernet/cortina/Kconfig
@@ -14,6 +14,7 @@ if NET_VENDOR_CORTINA
config GEMINI_ETHERNET
tristate "Gemini Gigabit Ethernet support"
depends on OF
+ depends on HAS_IOMEM
select PHYLIB
select CRC32
---help---
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 7892f2f0c6b5..2c2976a2dda6 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static void fs_timeout(struct net_device *dev)
+static void fs_timeout_work(struct work_struct *work)
{
- struct fs_enet_private *fep = netdev_priv(dev);
+ struct fs_enet_private *fep = container_of(work, struct fs_enet_private,
+ timeout_work);
+ struct net_device *dev = fep->ndev;
unsigned long flags;
int wake = 0;
@@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev)
phy_stop(dev->phydev);
(*fep->ops->stop)(dev);
(*fep->ops->restart)(dev);
- phy_start(dev->phydev);
}
phy_start(dev->phydev);
@@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev)
netif_wake_queue(dev);
}
+static void fs_timeout(struct net_device *dev)
+{
+ struct fs_enet_private *fep = netdev_priv(dev);
+
+ schedule_work(&fep->timeout_work);
+}
+
/*-----------------------------------------------------------------------------
* generic link-change handler - should be sufficient for most cases
*-----------------------------------------------------------------------------*/
@@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev)
netif_stop_queue(dev);
netif_carrier_off(dev);
napi_disable(&fep->napi);
+ cancel_work_sync(&fep->timeout_work);
phy_stop(dev->phydev);
spin_lock_irqsave(&fep->lock, flags);
@@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
ndev->netdev_ops = &fs_enet_netdev_ops;
ndev->watchdog_timeo = 2 * HZ;
+ INIT_WORK(&fep->timeout_work, fs_timeout_work);
netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight);
ndev->ethtool_ops = &fs_ethtool_ops;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 92e06b37a199..195fae6aec4a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -125,6 +125,7 @@ struct fs_enet_private {
spinlock_t lock; /* during all ops except TX pckt processing */
spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */
struct fs_platform_info *fpi;
+ struct work_struct timeout_work;
const struct fs_ops *ops;
int rx_ring, tx_ring;
dma_addr_t ring_mem_addr;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 736df59c16f5..be2ce8dece4a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1280,6 +1280,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned char *dst;
u64 *handle_array;
int index = 0;
+ u8 proto = 0;
int ret = 0;
if (adapter->resetting) {
@@ -1368,17 +1369,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
}
if (skb->protocol == htons(ETH_P_IP)) {
- if (ip_hdr(skb)->version == 4)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
- else if (ip_hdr(skb)->version == 6)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
-
- if (ip_hdr(skb)->protocol == IPPROTO_TCP)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
- else if (ip_hdr(skb)->protocol != IPPROTO_TCP)
- tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
+ proto = ip_hdr(skb)->protocol;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
+ proto = ipv6_hdr(skb)->nexthdr;
}
+ if (proto == IPPROTO_TCP)
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
+ else if (proto == IPPROTO_UDP)
+ tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
hdrs += 2;
@@ -3357,7 +3359,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
return;
}
+ adapter->ip_offload_ctrl.len =
+ cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB);
+ adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum;
+ adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum;
adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 7f605221a686..a434fecfdfeb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -2463,7 +2463,6 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
return err;
}
-#ifdef CONFIG_PM
/**
* fm10k_resume - Generic PM resume hook
* @dev: generic device structure
@@ -2472,7 +2471,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
* suspend or hibernation. This function does not need to handle lower PCIe
* device state as the stack takes care of that for us.
**/
-static int fm10k_resume(struct device *dev)
+static int __maybe_unused fm10k_resume(struct device *dev)
{
struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
struct net_device *netdev = interface->netdev;
@@ -2499,7 +2498,7 @@ static int fm10k_resume(struct device *dev)
* system suspend or hibernation. This function does not need to handle lower
* PCIe device state as the stack takes care of that for us.
**/
-static int fm10k_suspend(struct device *dev)
+static int __maybe_unused fm10k_suspend(struct device *dev)
{
struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
struct net_device *netdev = interface->netdev;
@@ -2511,8 +2510,6 @@ static int fm10k_suspend(struct device *dev)
return 0;
}
-#endif /* CONFIG_PM */
-
/**
* fm10k_io_error_detected - called when PCI error is detected
* @pdev: Pointer to PCI device
@@ -2643,11 +2640,9 @@ static struct pci_driver fm10k_driver = {
.id_table = fm10k_pci_tbl,
.probe = fm10k_probe,
.remove = fm10k_remove,
-#ifdef CONFIG_PM
.driver = {
.pm = &fm10k_pm_ops,
},
-#endif /* CONFIG_PM */
.sriov_configure = fm10k_iov_configure,
.err_handler = &fm10k_err_handler
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bbe48917dcad..7e2b552c2237 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -76,12 +76,7 @@
#define MLXSW_FWREV_MAJOR 13
#define MLXSW_FWREV_MINOR 1530
#define MLXSW_FWREV_SUBMINOR 152
-
-static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = {
- .major = MLXSW_FWREV_MAJOR,
- .minor = MLXSW_FWREV_MINOR,
- .subminor = MLXSW_FWREV_SUBMINOR
-};
+#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
#define MLXSW_SP_FW_FILENAME \
"mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \
@@ -339,28 +334,25 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
return mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware);
}
-static bool mlxsw_sp_fw_rev_ge(const struct mlxsw_fw_rev *a,
- const struct mlxsw_fw_rev *b)
-{
- if (a->major != b->major)
- return a->major > b->major;
- if (a->minor != b->minor)
- return a->minor > b->minor;
- return a->subminor >= b->subminor;
-}
-
static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
{
const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
const struct firmware *firmware;
int err;
- if (mlxsw_sp_fw_rev_ge(rev, &mlxsw_sp_supported_fw_rev))
+ /* Validate driver & FW are compatible */
+ if (rev->major != MLXSW_FWREV_MAJOR) {
+ WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
+ rev->major, MLXSW_FWREV_MAJOR);
+ return -EINVAL;
+ }
+ if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) ==
+ MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR))
return 0;
- dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is out of date\n",
+ dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
rev->major, rev->minor, rev->subminor);
- dev_info(mlxsw_sp->bus_info->dev, "Upgrading firmware using file %s\n",
+ dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
MLXSW_SP_FW_FILENAME);
err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 01ff5ba6796e..31891ae11c9b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -821,13 +821,18 @@ static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
int err;
- err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
- if (err)
- return err;
fib->lpm_tree = new_tree;
mlxsw_sp_lpm_tree_hold(new_tree);
+ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+ if (err)
+ goto err_tree_bind;
mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
return 0;
+
+err_tree_bind:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+ fib->lpm_tree = old_tree;
+ return err;
}
static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
@@ -868,11 +873,14 @@ err_tree_replace:
return err;
no_replace:
- err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
- if (err)
- return err;
fib->lpm_tree = new_tree;
mlxsw_sp_lpm_tree_hold(new_tree);
+ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+ if (err) {
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+ fib->lpm_tree = NULL;
+ return err;
+ }
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 064f00e23a19..d5866d708dfa 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -22,6 +22,7 @@ nfp-objs := \
nfp_hwmon.o \
nfp_main.o \
nfp_net_common.o \
+ nfp_net_ctrl.o \
nfp_net_debugdump.o \
nfp_net_ethtool.o \
nfp_net_main.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 71e6586acc36..80d3aa0fc9d3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -157,7 +157,14 @@ nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
int tag)
{
struct sk_buff *skb;
- int err;
+ int i, err;
+
+ for (i = 0; i < 50; i++) {
+ udelay(4);
+ skb = nfp_bpf_reply(bpf, tag);
+ if (skb)
+ return skb;
+ }
err = wait_event_interruptible_timeout(bpf->cmsg_wq,
skb = nfp_bpf_reply(bpf, tag),
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 8823c8360047..4ee11bf2aed7 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -389,6 +389,8 @@ const struct nfp_app_type app_bpf = {
.id = NFP_APP_BPF_NIC,
.name = "ebpf",
+ .ctrl_cap_mask = 0,
+
.init = nfp_bpf_init,
.clean = nfp_bpf_clean,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index e2859b2e9c6a..1a357aacc444 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -127,6 +127,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int stack_size;
unsigned int max_instr;
+ int err;
stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64;
if (prog->aux->stack_depth > stack_size) {
@@ -143,7 +144,14 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
if (!nfp_prog->prog)
return -ENOMEM;
- return nfp_bpf_jit(nfp_prog);
+ err = nfp_bpf_jit(nfp_prog);
+ if (err)
+ return err;
+
+ prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
+ prog->aux->offload->jited_image = nfp_prog->prog;
+
+ return 0;
}
static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -168,6 +176,8 @@ nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
static int
nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
{
+ if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
+ return -EINVAL;
return nfp_bpf_ctrl_del_entry(offmap, key);
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 615314d9e7c6..baaea6f1a9d8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -211,12 +211,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
- if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) {
- nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n",
- cmsg_hdr->version);
- goto out;
- }
-
type = cmsg_hdr->type;
switch (type) {
case NFP_FLOWER_CMSG_TYPE_PORT_REIFY:
@@ -225,9 +219,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
nfp_flower_cmsg_portmod_rx(app, skb);
break;
- case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
- nfp_flower_rx_flow_stats(app, skb);
- break;
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
nfp_tunnel_request_route(app, skb);
break;
@@ -263,7 +254,23 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work)
void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
{
struct nfp_flower_priv *priv = app->priv;
+ struct nfp_flower_cmsg_hdr *cmsg_hdr;
+
+ cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
+
+ if (unlikely(cmsg_hdr->version != NFP_FLOWER_CMSG_VER1)) {
+ nfp_flower_cmsg_warn(app, "Cannot handle repr control version %u\n",
+ cmsg_hdr->version);
+ dev_kfree_skb_any(skb);
+ return;
+ }
- skb_queue_tail(&priv->cmsg_skbs, skb);
- schedule_work(&priv->cmsg_work);
+ if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_FLOW_STATS) {
+ /* We need to deal with stats updates from HW asap */
+ nfp_flower_rx_flow_stats(app, skb);
+ dev_consume_skb_any(skb);
+ } else {
+ skb_queue_tail(&priv->cmsg_skbs, skb);
+ schedule_work(&priv->cmsg_work);
+ }
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 67c406815365..742d6f1575b5 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -99,7 +99,7 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
if (port >= reprs->num_reprs)
return NULL;
- return reprs->reprs[port];
+ return rcu_dereference(reprs->reprs[port]);
}
static int
@@ -114,15 +114,19 @@ nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type,
if (!reprs)
return 0;
- for (i = 0; i < reprs->num_reprs; i++)
- if (reprs->reprs[i]) {
- struct nfp_repr *repr = netdev_priv(reprs->reprs[i]);
+ for (i = 0; i < reprs->num_reprs; i++) {
+ struct net_device *netdev;
+
+ netdev = nfp_repr_get_locked(app, reprs, i);
+ if (netdev) {
+ struct nfp_repr *repr = netdev_priv(netdev);
err = nfp_flower_cmsg_portreify(repr, exists);
if (err)
return err;
count++;
}
+ }
return count;
}
@@ -234,19 +238,21 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
return -ENOMEM;
for (i = 0; i < cnt; i++) {
+ struct net_device *repr;
struct nfp_port *port;
u32 port_id;
- reprs->reprs[i] = nfp_repr_alloc(app);
- if (!reprs->reprs[i]) {
+ repr = nfp_repr_alloc(app);
+ if (!repr) {
err = -ENOMEM;
goto err_reprs_clean;
}
+ RCU_INIT_POINTER(reprs->reprs[i], repr);
/* For now we only support 1 PF */
WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
- port = nfp_port_alloc(app, port_type, reprs->reprs[i]);
+ port = nfp_port_alloc(app, port_type, repr);
if (repr_type == NFP_REPR_TYPE_PF) {
port->pf_id = i;
port->vnic = priv->nn->dp.ctrl_bar;
@@ -257,11 +263,11 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
app->pf->vf_cfg_mem + i * NFP_NET_CFG_BAR_SZ;
}
- eth_hw_addr_random(reprs->reprs[i]);
+ eth_hw_addr_random(repr);
port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type,
i, queue);
- err = nfp_repr_init(app, reprs->reprs[i],
+ err = nfp_repr_init(app, repr,
port_id, port, priv->nn->dp.netdev);
if (err) {
nfp_port_free(port);
@@ -270,7 +276,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
nfp_info(app->cpp, "%s%d Representor(%s) created\n",
repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i,
- reprs->reprs[i]->name);
+ repr->name);
}
nfp_app_reprs_set(app, repr_type, reprs);
@@ -291,7 +297,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
err_reprs_remove:
reprs = nfp_app_reprs_set(app, repr_type, NULL);
err_reprs_clean:
- nfp_reprs_clean_and_free(reprs);
+ nfp_reprs_clean_and_free(app, reprs);
return err;
}
@@ -329,17 +335,18 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
for (i = 0; i < eth_tbl->count; i++) {
unsigned int phys_port = eth_tbl->ports[i].index;
+ struct net_device *repr;
struct nfp_port *port;
u32 cmsg_port_id;
- reprs->reprs[phys_port] = nfp_repr_alloc(app);
- if (!reprs->reprs[phys_port]) {
+ repr = nfp_repr_alloc(app);
+ if (!repr) {
err = -ENOMEM;
goto err_reprs_clean;
}
+ RCU_INIT_POINTER(reprs->reprs[phys_port], repr);
- port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT,
- reprs->reprs[phys_port]);
+ port = nfp_port_alloc(app, NFP_PORT_PHYS_PORT, repr);
if (IS_ERR(port)) {
err = PTR_ERR(port);
goto err_reprs_clean;
@@ -350,11 +357,11 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
goto err_reprs_clean;
}
- SET_NETDEV_DEV(reprs->reprs[phys_port], &priv->nn->pdev->dev);
+ SET_NETDEV_DEV(repr, &priv->nn->pdev->dev);
nfp_net_get_mac_addr(app->pf, port);
cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
- err = nfp_repr_init(app, reprs->reprs[phys_port],
+ err = nfp_repr_init(app, repr,
cmsg_port_id, port, priv->nn->dp.netdev);
if (err) {
nfp_port_free(port);
@@ -367,7 +374,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
phys_port);
nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n",
- phys_port, reprs->reprs[phys_port]->name);
+ phys_port, repr->name);
}
nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs);
@@ -397,7 +404,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
err_reprs_remove:
reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, NULL);
err_reprs_clean:
- nfp_reprs_clean_and_free(reprs);
+ nfp_reprs_clean_and_free(app, reprs);
err_free_ctrl_skb:
kfree_skb(ctrl_skb);
return err;
@@ -558,6 +565,8 @@ static void nfp_flower_stop(struct nfp_app *app)
const struct nfp_app_type app_flower = {
.id = NFP_APP_FLOWER_NIC,
.name = "flower",
+
+ .ctrl_cap_mask = ~0U,
.ctrl_has_meta = true,
.extra_cap = nfp_flower_extra_cap,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 955a9f44d244..6aedef0ad433 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -32,6 +32,8 @@
*/
#include <linux/bug.h>
+#include <linux/lockdep.h>
+#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -99,13 +101,19 @@ nfp_app_ctrl_msg_alloc(struct nfp_app *app, unsigned int size, gfp_t priority)
}
struct nfp_reprs *
+nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type)
+{
+ return rcu_dereference_protected(app->reprs[type],
+ lockdep_is_held(&app->pf->lock));
+}
+
+struct nfp_reprs *
nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
struct nfp_reprs *reprs)
{
struct nfp_reprs *old;
- old = rcu_dereference_protected(app->reprs[type],
- lockdep_is_held(&app->pf->lock));
+ old = nfp_reprs_get_locked(app, type);
rcu_assign_pointer(app->reprs[type], reprs);
return old;
@@ -116,7 +124,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
struct nfp_app *app;
if (id >= ARRAY_SIZE(apps) || !apps[id]) {
- nfp_err(pf->cpp, "failed to find app with ID 0x%02hhx\n", id);
+ nfp_err(pf->cpp, "unknown FW app ID 0x%02hhx, driver too old or support for FW not built in\n", id);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 6a6eb02b516e..7e474df90598 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -66,6 +66,9 @@ extern const struct nfp_app_type app_flower;
* struct nfp_app_type - application definition
* @id: application ID
* @name: application name
+ * @ctrl_cap_mask: ctrl vNIC capability mask, allows disabling features like
+ * IRQMOD which are on by default but counter-productive for
+ * control messages which are often latency-sensitive
* @ctrl_has_meta: control messages have prepend of type:5/port:CTRL
*
* Callbacks
@@ -100,6 +103,7 @@ struct nfp_app_type {
enum nfp_app_id id;
const char *name;
+ u32 ctrl_cap_mask;
bool ctrl_has_meta;
int (*init)(struct nfp_app *app);
@@ -385,6 +389,8 @@ static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
struct nfp_reprs *
+nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type);
+struct nfp_reprs *
nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
struct nfp_reprs *reprs);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 6c9f29c2e975..eb0fc614673d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -152,18 +152,8 @@ out:
static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
struct nfp_pf *pf = devlink_priv(devlink);
- int ret;
-
- mutex_lock(&pf->lock);
- if (!pf->app) {
- ret = -EBUSY;
- goto out;
- }
- ret = nfp_app_eswitch_mode_get(pf->app, mode);
-out:
- mutex_unlock(&pf->lock);
- return ret;
+ return nfp_app_eswitch_mode_get(pf->app, mode);
}
const struct devlink_ops nfp_devlink_ops = {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 0953fa8f3109..c5b91040b12e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -499,13 +499,9 @@ static int nfp_pci_probe(struct pci_dev *pdev,
if (err)
goto err_hwinfo_free;
- err = devlink_register(devlink, &pdev->dev);
- if (err)
- goto err_hwinfo_free;
-
err = nfp_nsp_init(pdev, pf);
if (err)
- goto err_devlink_unreg;
+ goto err_hwinfo_free;
pf->mip = nfp_mip_open(pf->cpp);
pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip);
@@ -549,8 +545,6 @@ err_fw_unload:
kfree(pf->eth_tbl);
kfree(pf->nspi);
vfree(pf->dumpspec);
-err_devlink_unreg:
- devlink_unregister(devlink);
err_hwinfo_free:
kfree(pf->hwinfo);
nfp_cpp_free(pf->cpp);
@@ -571,18 +565,13 @@ err_pci_disable:
static void nfp_pci_remove(struct pci_dev *pdev)
{
struct nfp_pf *pf = pci_get_drvdata(pdev);
- struct devlink *devlink;
nfp_hwmon_unregister(pf);
- devlink = priv_to_devlink(pf);
-
- nfp_net_pci_remove(pf);
-
nfp_pcie_sriov_disable(pdev);
pci_sriov_set_totalvfs(pf->pdev, 0);
- devlink_unregister(devlink);
+ nfp_net_pci_remove(pf);
vfree(pf->dumpspec);
kfree(pf->rtbl);
@@ -598,7 +587,7 @@ static void nfp_pci_remove(struct pci_dev *pdev)
kfree(pf->eth_tbl);
kfree(pf->nspi);
mutex_destroy(&pf->lock);
- devlink_free(devlink);
+ devlink_free(priv_to_devlink(pf));
pci_release_regions(pdev);
pci_disable_device(pdev);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6f6e3d6fd935..d88eda9707e6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -578,6 +578,7 @@ struct nfp_net_dp {
* @qcp_cfg: Pointer to QCP queue used for configuration notification
* @tx_bar: Pointer to mapped TX queues
* @rx_bar: Pointer to mapped FL/RX queues
+ * @tlv_caps: Parsed TLV capabilities
* @debugfs_dir: Device directory in debugfs
* @vnic_list: Entry on device vNIC list
* @pdev: Backpointer to PCI device
@@ -644,6 +645,8 @@ struct nfp_net {
u8 __iomem *tx_bar;
u8 __iomem *rx_bar;
+ struct nfp_net_tlv_caps tlv_caps;
+
struct dentry *debugfs_dir;
struct list_head vnic_list;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2b5cad3069a7..cdf52421eaca 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -293,9 +293,15 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
*/
static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
{
+ u32 mbox = nn->tlv_caps.mbox_off;
int ret;
- nn_writeq(nn, NFP_NET_CFG_MBOX_CMD, mbox_cmd);
+ if (!nfp_net_has_mbox(&nn->tlv_caps)) {
+ nn_err(nn, "no mailbox present, command: %u\n", mbox_cmd);
+ return -EIO;
+ }
+
+ nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd);
ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX);
if (ret) {
@@ -303,7 +309,7 @@ static int nfp_net_reconfig_mbox(struct nfp_net *nn, u32 mbox_cmd)
return ret;
}
- return -nn_readl(nn, NFP_NET_CFG_MBOX_RET);
+ return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET);
}
/* Interrupt configuration and handling
@@ -2458,7 +2464,7 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
* ME timestamp ticks. There are 16 ME clock cycles for each timestamp
* count.
*/
- factor = nn->me_freq_mhz / 16;
+ factor = nn->tlv_caps.me_freq_mhz / 16;
/* copy RX interrupt coalesce parameters */
value = (nn->rx_coalesce_max_frames << 16) |
@@ -3084,8 +3090,9 @@ nfp_net_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
if (!vid)
return 0;
- nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_VID, vid);
- nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_PROTO, ETH_P_8021Q);
+ nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
+ nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
+ ETH_P_8021Q);
return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD);
}
@@ -3101,8 +3108,9 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
if (!vid)
return 0;
- nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_VID, vid);
- nn_writew(nn, NFP_NET_CFG_VLAN_FILTER_PROTO, ETH_P_8021Q);
+ nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_VID, vid);
+ nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_VLAN_FILTER_PROTO,
+ ETH_P_8021Q);
return nfp_net_reconfig_mbox(nn, NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL);
}
@@ -3748,18 +3756,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
nfp_net_set_ethtool_ops(netdev);
}
-/**
- * nfp_net_init() - Initialise/finalise the nfp_net structure
- * @nn: NFP Net device structure
- *
- * Return: 0 on success or negative errno on error.
- */
-int nfp_net_init(struct nfp_net *nn)
+static int nfp_net_read_caps(struct nfp_net *nn)
{
- int err;
-
- nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
-
/* Get some of the read-only fields from the BAR */
nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
@@ -3792,6 +3790,29 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.rx_offset = NFP_NET_RX_OFFSET;
}
+ /* For control vNICs mask out the capabilities app doesn't want. */
+ if (!nn->dp.netdev)
+ nn->cap &= nn->app->type->ctrl_cap_mask;
+
+ return 0;
+}
+
+/**
+ * nfp_net_init() - Initialise/finalise the nfp_net structure
+ * @nn: NFP Net device structure
+ *
+ * Return: 0 on success or negative errno on error.
+ */
+int nfp_net_init(struct nfp_net *nn)
+{
+ int err;
+
+ nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
+
+ err = nfp_net_read_caps(nn);
+ if (err)
+ return err;
+
/* Set default MTU and Freelist buffer size */
if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
nn->dp.mtu = nn->max_mtu;
@@ -3815,6 +3836,11 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
+ err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar,
+ &nn->tlv_caps);
+ if (err)
+ return err;
+
if (nn->dp.netdev)
nfp_net_netdev_init(nn);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
new file mode 100644
index 000000000000..ffb402746ad4
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps)
+{
+ memset(caps, 0, sizeof(*caps));
+ caps->me_freq_mhz = 1200;
+ caps->mbox_off = NFP_NET_CFG_MBOX_BASE;
+ caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ;
+}
+
+int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
+ struct nfp_net_tlv_caps *caps)
+{
+ u8 __iomem *data = ctrl_mem + NFP_NET_CFG_TLV_BASE;
+ u8 __iomem *end = ctrl_mem + NFP_NET_CFG_BAR_SZ;
+ u32 hdr;
+
+ nfp_net_tlv_caps_reset(caps);
+
+ hdr = readl(data);
+ if (!hdr)
+ return 0;
+
+ while (true) {
+ unsigned int length, offset;
+ u32 hdr = readl(data);
+
+ length = FIELD_GET(NFP_NET_CFG_TLV_HEADER_LENGTH, hdr);
+ offset = data - ctrl_mem + NFP_NET_CFG_TLV_BASE;
+
+ /* Advance past the header */
+ data += 4;
+
+ if (length % NFP_NET_CFG_TLV_LENGTH_INC) {
+ dev_err(dev, "TLV size not multiple of %u len:%u\n",
+ NFP_NET_CFG_TLV_LENGTH_INC, length);
+ return -EINVAL;
+ }
+ if (data + length > end) {
+ dev_err(dev, "oversized TLV offset:%u len:%u\n",
+ offset, length);
+ return -EINVAL;
+ }
+
+ switch (FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr)) {
+ case NFP_NET_CFG_TLV_TYPE_UNKNOWN:
+ dev_err(dev, "NULL TLV at offset:%u\n", offset);
+ return -EINVAL;
+ case NFP_NET_CFG_TLV_TYPE_RESERVED:
+ break;
+ case NFP_NET_CFG_TLV_TYPE_END:
+ if (!length)
+ return 0;
+
+ dev_err(dev, "END TLV should be empty, has len:%d\n",
+ length);
+ return -EINVAL;
+ case NFP_NET_CFG_TLV_TYPE_ME_FREQ:
+ if (length != 4) {
+ dev_err(dev,
+ "ME FREQ TLV should be 4B, is %dB\n",
+ length);
+ return -EINVAL;
+ }
+
+ caps->me_freq_mhz = readl(data);
+ break;
+ case NFP_NET_CFG_TLV_TYPE_MBOX:
+ if (!length) {
+ caps->mbox_off = 0;
+ caps->mbox_len = 0;
+ } else {
+ caps->mbox_off = data - ctrl_mem;
+ caps->mbox_len = length;
+ }
+ break;
+ default:
+ if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
+ break;
+
+ dev_err(dev, "unknown TLV type:%u offset:%u len:%u\n",
+ FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr),
+ offset, length);
+ return -EINVAL;
+ }
+
+ data += length;
+ if (data + 4 > end) {
+ dev_err(dev, "reached end of BAR without END TLV\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Not reached */
+ return -EINVAL;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 25c36001bffa..eeecef2caac6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -43,9 +43,7 @@
#ifndef _NFP_NET_CTRL_H_
#define _NFP_NET_CTRL_H_
-/* IMPORTANT: This header file is shared with the FW,
- * no OS specific constructs, please!
- */
+#include <linux/types.h>
/**
* Configuration BAR size.
@@ -236,6 +234,12 @@
#define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000
/**
+ * TLV area start
+ * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area
+ */
+#define NFP_NET_CFG_TLV_BASE 0x0058
+
+/**
* VXLAN/UDP encap configuration
* %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports
* %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes
@@ -409,11 +413,14 @@
* 4B used for update command and 4B return code
* followed by a max of 504B of variable length value
*/
-#define NFP_NET_CFG_MBOX_CMD 0x1800
-#define NFP_NET_CFG_MBOX_RET 0x1804
-#define NFP_NET_CFG_MBOX_VAL 0x1808
+#define NFP_NET_CFG_MBOX_BASE 0x1800
#define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8
+#define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0
+#define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4
+#define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8
+#define NFP_NET_CFG_MBOX_SIMPLE_LEN 0x12
+
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_ADD 1
#define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2
@@ -424,9 +431,87 @@
* %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter
* %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes
*/
-#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL
+#define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_SIMPLE_VAL
#define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER
#define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2)
#define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004
+/**
+ * TLV capabilities
+ * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV
+ * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
+ * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV
+ * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments
+ * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV
+ *
+ * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE.
+ * Last structure must be of type %NFP_NET_CFG_TLV_TYPE_END. Presence of TLVs
+ * is indicated by %NFP_NET_CFG_TLV_BASE being non-zero. TLV structures may
+ * fill the entire remainder of the BAR or be shorter. FW must make sure TLVs
+ * don't conflict with other features which allocate space beyond
+ * %NFP_NET_CFG_TLV_BASE. %NFP_NET_CFG_TLV_TYPE_RESERVED should be used to wrap
+ * space used by such features.
+ * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH.
+ */
+#define NFP_NET_CFG_TLV_TYPE 0x00
+#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000
+#define NFP_NET_CFG_TLV_LENGTH 0x02
+#define NFP_NET_CFG_TLV_LENGTH_INC 4
+#define NFP_NET_CFG_TLV_VALUE 0x04
+
+#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000
+#define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000
+#define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff
+
+/**
+ * Capability TLV types
+ *
+ * %NFP_NET_CFG_TLV_TYPE_UNKNOWN:
+ * Special TLV type to catch bugs, should never be encountered. Drivers should
+ * treat encountering this type as error and refuse to probe.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_RESERVED:
+ * Reserved space, may contain legacy fixed-offset fields, or be used for
+ * padding. The use of this type should be otherwise avoided.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_END:
+ * Empty, end of TLV list. Must be the last TLV. Drivers will stop processing
+ * further TLVs when encountered.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_ME_FREQ:
+ * Single word, ME frequency in MHz as used in calculation for
+ * %NFP_NET_CFG_RXR_IRQ_MOD and %NFP_NET_CFG_TXR_IRQ_MOD.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_MBOX:
+ * Variable, mailbox area. Overwrites the default location which is
+ * %NFP_NET_CFG_MBOX_BASE and length %NFP_NET_CFG_MBOX_VAL_MAX_SZ.
+ */
+#define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0
+#define NFP_NET_CFG_TLV_TYPE_RESERVED 1
+#define NFP_NET_CFG_TLV_TYPE_END 2
+#define NFP_NET_CFG_TLV_TYPE_ME_FREQ 3
+#define NFP_NET_CFG_TLV_TYPE_MBOX 4
+
+struct device;
+
+/**
+ * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities
+ * @me_freq_mhz: ME clock_freq (MHz)
+ * @mbox_off: vNIC mailbox area offset
+ * @mbox_len: vNIC mailbox area length
+ */
+struct nfp_net_tlv_caps {
+ u32 me_freq_mhz;
+ unsigned int mbox_off;
+ unsigned int mbox_len;
+};
+
+int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
+ struct nfp_net_tlv_caps *caps);
+
+static inline bool nfp_net_has_mbox(struct nfp_net_tlv_caps *caps)
+{
+ return caps->mbox_len >= NFP_NET_CFG_MBOX_SIMPLE_LEN;
+}
+
#endif /* _NFP_NET_CTRL_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
index 173646e17e94..e6f19f44b461 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
@@ -518,16 +518,15 @@ nfp_dump_csr_range(struct nfp_pf *pf, struct nfp_dumpspec_csr *spec_csr,
max_rd_addr = cpp_rd_addr + be32_to_cpu(spec_csr->cpp.dump_length);
while (cpp_rd_addr < max_rd_addr) {
- if (is_xpb_read(&spec_csr->cpp.cpp_id))
- bytes_read = nfp_xpb_readl(pf->cpp, cpp_rd_addr,
- (u32 *)dest);
- else
+ if (is_xpb_read(&spec_csr->cpp.cpp_id)) {
+ err = nfp_xpb_readl(pf->cpp, cpp_rd_addr, (u32 *)dest);
+ } else {
bytes_read = nfp_cpp_read(pf->cpp, cpp_id, cpp_rd_addr,
dest, reg_sz);
- if (bytes_read != reg_sz) {
- if (bytes_read >= 0)
- bytes_read = -EIO;
- dump_header->error = cpu_to_be32(bytes_read);
+ err = bytes_read == reg_sz ? 0 : -EIO;
+ }
+ if (err) {
+ dump_header->error = cpu_to_be32(err);
dump_header->error_offset = cpu_to_be32(cpp_rd_addr);
break;
}
@@ -555,8 +554,8 @@ nfp_read_indirect_csr(struct nfp_cpp *cpp,
NFP_IND_ME_REFL_WR_SIG_INIT,
cpp_params.token, cpp_params.island);
result = nfp_cpp_writel(cpp, cpp_id, csr_ctx_ptr_offs, context);
- if (result != sizeof(context))
- return result < 0 ? result : -EIO;
+ if (result)
+ return result;
cpp_id = nfp_get_numeric_cpp_id(&cpp_params);
result = nfp_cpp_read(cpp, cpp_id, csr_ctx_ptr_offs, dest, reg_sz);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index c505014121c4..15fa47f622aa 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -208,12 +208,6 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id)
{
int err;
- /* Get ME clock frequency from ctrl BAR
- * XXX for now frequency is hardcoded until we figure out how
- * to get the value from nfp-hwinfo into ctrl bar
- */
- nn->me_freq_mhz = 1200;
-
err = nfp_net_init(nn);
if (err)
return err;
@@ -373,7 +367,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
if (IS_ERR(pf->app))
return PTR_ERR(pf->app);
+ mutex_lock(&pf->lock);
err = nfp_app_init(pf->app);
+ mutex_unlock(&pf->lock);
if (err)
goto err_free;
@@ -401,7 +397,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
err_unmap:
nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
err_app_clean:
+ mutex_lock(&pf->lock);
nfp_app_clean(pf->app);
+ mutex_unlock(&pf->lock);
err_free:
nfp_app_free(pf->app);
pf->app = NULL;
@@ -414,7 +412,11 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf)
nfp_net_pf_free_vnic(pf, pf->ctrl_vnic);
nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
}
+
+ mutex_lock(&pf->lock);
nfp_app_clean(pf->app);
+ mutex_unlock(&pf->lock);
+
nfp_app_free(pf->app);
pf->app = NULL;
}
@@ -570,17 +572,6 @@ err_unmap_ctrl:
return err;
}
-static void nfp_net_pci_remove_finish(struct nfp_pf *pf)
-{
- nfp_net_pf_app_stop(pf);
- /* stop app first, to avoid double free of ctrl vNIC's ddir */
- nfp_net_debugfs_dir_clean(&pf->ddir);
-
- nfp_net_pf_free_irqs(pf);
- nfp_net_pf_app_clean(pf);
- nfp_net_pci_unmap_mem(pf);
-}
-
static int
nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port,
struct nfp_eth_table *eth_table)
@@ -655,9 +646,6 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf)
nfp_net_pf_free_vnic(pf, nn);
}
- if (list_empty(&pf->vnics))
- nfp_net_pci_remove_finish(pf);
-
return 0;
}
@@ -707,6 +695,7 @@ int nfp_net_refresh_eth_port(struct nfp_port *port)
*/
int nfp_net_pci_probe(struct nfp_pf *pf)
{
+ struct devlink *devlink = priv_to_devlink(pf);
struct nfp_net_fw_version fw_ver;
u8 __iomem *ctrl_bar, *qc_bar;
int stride;
@@ -720,16 +709,13 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
return -EINVAL;
}
- mutex_lock(&pf->lock);
pf->max_data_vnics = nfp_net_pf_get_num_ports(pf);
- if ((int)pf->max_data_vnics < 0) {
- err = pf->max_data_vnics;
- goto err_unlock;
- }
+ if ((int)pf->max_data_vnics < 0)
+ return pf->max_data_vnics;
err = nfp_net_pci_map_mem(pf);
if (err)
- goto err_unlock;
+ return err;
ctrl_bar = nfp_cpp_area_iomem(pf->data_vnic_bar);
qc_bar = nfp_cpp_area_iomem(pf->qc_area);
@@ -768,6 +754,11 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
if (err)
goto err_unmap;
+ err = devlink_register(devlink, &pf->pdev->dev);
+ if (err)
+ goto err_app_clean;
+
+ mutex_lock(&pf->lock);
pf->ddir = nfp_net_debugfs_device_add(pf->pdev);
/* Allocate the vnics and do basic init */
@@ -799,32 +790,39 @@ err_free_vnics:
nfp_net_pf_free_vnics(pf);
err_clean_ddir:
nfp_net_debugfs_dir_clean(&pf->ddir);
+ mutex_unlock(&pf->lock);
+ cancel_work_sync(&pf->port_refresh_work);
+ devlink_unregister(devlink);
+err_app_clean:
nfp_net_pf_app_clean(pf);
err_unmap:
nfp_net_pci_unmap_mem(pf);
-err_unlock:
- mutex_unlock(&pf->lock);
- cancel_work_sync(&pf->port_refresh_work);
return err;
}
void nfp_net_pci_remove(struct nfp_pf *pf)
{
- struct nfp_net *nn;
+ struct nfp_net *nn, *next;
mutex_lock(&pf->lock);
- if (list_empty(&pf->vnics))
- goto out;
-
- list_for_each_entry(nn, &pf->vnics, vnic_list)
- if (nfp_net_is_data_vnic(nn))
- nfp_net_pf_clean_vnic(pf, nn);
+ list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) {
+ if (!nfp_net_is_data_vnic(nn))
+ continue;
+ nfp_net_pf_clean_vnic(pf, nn);
+ nfp_net_pf_free_vnic(pf, nn);
+ }
- nfp_net_pf_free_vnics(pf);
+ nfp_net_pf_app_stop(pf);
+ /* stop app first, to avoid double free of ctrl vNIC's ddir */
+ nfp_net_debugfs_dir_clean(&pf->ddir);
- nfp_net_pci_remove_finish(pf);
-out:
mutex_unlock(&pf->lock);
+ devlink_unregister(priv_to_devlink(pf));
+
+ nfp_net_pf_free_irqs(pf);
+ nfp_net_pf_app_clean(pf);
+ nfp_net_pci_unmap_mem(pf);
+
cancel_work_sync(&pf->port_refresh_work);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 317f87cc3cc6..f67da6bde9da 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -46,6 +46,13 @@
#include "nfp_net_sriov.h"
#include "nfp_port.h"
+struct net_device *
+nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id)
+{
+ return rcu_dereference_protected(set->reprs[id],
+ lockdep_is_held(&app->pf->lock));
+}
+
static void
nfp_repr_inc_tx_stats(struct net_device *netdev, unsigned int len,
int tx_status)
@@ -369,21 +376,24 @@ static void nfp_repr_clean_and_free(struct nfp_repr *repr)
nfp_repr_free(repr);
}
-void nfp_reprs_clean_and_free(struct nfp_reprs *reprs)
+void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs)
{
+ struct net_device *netdev;
unsigned int i;
- for (i = 0; i < reprs->num_reprs; i++)
- if (reprs->reprs[i])
- nfp_repr_clean_and_free(netdev_priv(reprs->reprs[i]));
+ for (i = 0; i < reprs->num_reprs; i++) {
+ netdev = nfp_repr_get_locked(app, reprs, i);
+ if (netdev)
+ nfp_repr_clean_and_free(netdev_priv(netdev));
+ }
kfree(reprs);
}
void
-nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
- enum nfp_repr_type type)
+nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type)
{
+ struct net_device *netdev;
struct nfp_reprs *reprs;
int i;
@@ -395,14 +405,16 @@ nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
/* Preclean must happen before we remove the reprs reference from the
* app below.
*/
- for (i = 0; i < reprs->num_reprs; i++)
- if (reprs->reprs[i])
- nfp_app_repr_preclean(app, reprs->reprs[i]);
+ for (i = 0; i < reprs->num_reprs; i++) {
+ netdev = nfp_repr_get_locked(app, reprs, i);
+ if (netdev)
+ nfp_app_repr_preclean(app, netdev);
+ }
reprs = nfp_app_reprs_set(app, type, NULL);
synchronize_rcu();
- nfp_reprs_clean_and_free(reprs);
+ nfp_reprs_clean_and_free(app, reprs);
}
struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs)
@@ -420,48 +432,29 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs)
int nfp_reprs_resync_phys_ports(struct nfp_app *app)
{
- struct nfp_reprs *reprs, *old_reprs;
+ struct net_device *netdev;
+ struct nfp_reprs *reprs;
struct nfp_repr *repr;
int i;
- old_reprs =
- rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT],
- lockdep_is_held(&app->pf->lock));
- if (!old_reprs)
- return 0;
-
- reprs = nfp_reprs_alloc(old_reprs->num_reprs);
+ reprs = nfp_reprs_get_locked(app, NFP_REPR_TYPE_PHYS_PORT);
if (!reprs)
- return -ENOMEM;
-
- for (i = 0; i < old_reprs->num_reprs; i++) {
- if (!old_reprs->reprs[i])
- continue;
-
- repr = netdev_priv(old_reprs->reprs[i]);
- if (repr->port->type == NFP_PORT_INVALID) {
- nfp_app_repr_preclean(app, old_reprs->reprs[i]);
- continue;
- }
-
- reprs->reprs[i] = old_reprs->reprs[i];
- }
-
- old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs);
- synchronize_rcu();
+ return 0;
- /* Now we free up removed representors */
- for (i = 0; i < old_reprs->num_reprs; i++) {
- if (!old_reprs->reprs[i])
+ for (i = 0; i < reprs->num_reprs; i++) {
+ netdev = nfp_repr_get_locked(app, reprs, i);
+ if (!netdev)
continue;
- repr = netdev_priv(old_reprs->reprs[i]);
+ repr = netdev_priv(netdev);
if (repr->port->type != NFP_PORT_INVALID)
continue;
+ nfp_app_repr_preclean(app, netdev);
+ rcu_assign_pointer(reprs->reprs[i], NULL);
+ synchronize_rcu();
nfp_repr_clean(repr);
}
- kfree(old_reprs);
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index cbc7badf40a0..a621e8ff528e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -35,6 +35,7 @@
#define NFP_NET_REPR_H
struct metadata_dst;
+struct nfp_app;
struct nfp_net;
struct nfp_port;
@@ -47,7 +48,7 @@ struct nfp_port;
*/
struct nfp_reprs {
unsigned int num_reprs;
- struct net_device *reprs[0];
+ struct net_device __rcu *reprs[0];
};
/**
@@ -114,16 +115,18 @@ static inline int nfp_repr_get_port_id(struct net_device *netdev)
return priv->dst->u.port_info.port_id;
}
+struct net_device *
+nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set,
+ unsigned int id);
+
void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
u32 cmsg_port_id, struct nfp_port *port,
struct net_device *pf_netdev);
struct net_device *nfp_repr_alloc(struct nfp_app *app);
-void
-nfp_reprs_clean_and_free(struct nfp_reprs *reprs);
-void
-nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
- enum nfp_repr_type type);
+void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs);
+void nfp_reprs_clean_and_free_by_type(struct nfp_app *app,
+ enum nfp_repr_type type);
struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs);
int nfp_reprs_resync_phys_ports(struct nfp_app *app);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index c879626e035b..b802a1d55449 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -277,12 +277,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
}
nfp_net_irqs_assign(nn, vf->irq_entries, num_irqs);
- /* Get ME clock frequency from ctrl BAR
- * XXX for now frequency is hardcoded until we figure out how
- * to get the value from nfp-hwinfo into ctrl bar
- */
- nn->me_freq_mhz = 1200;
-
err = nfp_net_init(nn);
if (err)
goto err_irqs_disable;
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
index 28262470dabf..ef30597aa319 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -674,18 +674,20 @@ void __iomem *nfp_cpp_area_iomem(struct nfp_cpp_area *area)
* @offset: Offset into area
* @value: Pointer to read buffer
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_area_readl(struct nfp_cpp_area *area,
unsigned long offset, u32 *value)
{
u8 tmp[4];
- int err;
+ int n;
- err = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp));
- *value = get_unaligned_le32(tmp);
+ n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp));
+ if (n != sizeof(tmp))
+ return n < 0 ? n : -EIO;
- return err;
+ *value = get_unaligned_le32(tmp);
+ return 0;
}
/**
@@ -694,16 +696,18 @@ int nfp_cpp_area_readl(struct nfp_cpp_area *area,
* @offset: Offset into area
* @value: Value to write
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_area_writel(struct nfp_cpp_area *area,
unsigned long offset, u32 value)
{
u8 tmp[4];
+ int n;
put_unaligned_le32(value, tmp);
+ n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp));
- return nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp));
+ return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO;
}
/**
@@ -712,18 +716,20 @@ int nfp_cpp_area_writel(struct nfp_cpp_area *area,
* @offset: Offset into area
* @value: Pointer to read buffer
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_area_readq(struct nfp_cpp_area *area,
unsigned long offset, u64 *value)
{
u8 tmp[8];
- int err;
+ int n;
- err = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp));
- *value = get_unaligned_le64(tmp);
+ n = nfp_cpp_area_read(area, offset, &tmp, sizeof(tmp));
+ if (n != sizeof(tmp))
+ return n < 0 ? n : -EIO;
- return err;
+ *value = get_unaligned_le64(tmp);
+ return 0;
}
/**
@@ -732,16 +738,18 @@ int nfp_cpp_area_readq(struct nfp_cpp_area *area,
* @offset: Offset into area
* @value: Value to write
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_area_writeq(struct nfp_cpp_area *area,
unsigned long offset, u64 value)
{
u8 tmp[8];
+ int n;
put_unaligned_le64(value, tmp);
+ n = nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp));
- return nfp_cpp_area_write(area, offset, &tmp, sizeof(tmp));
+ return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO;
}
/**
@@ -1080,7 +1088,7 @@ static u32 nfp_xpb_to_cpp(struct nfp_cpp *cpp, u32 *xpb_addr)
* @xpb_addr: Address for operation
* @value: Pointer to read buffer
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_addr, u32 *value)
{
@@ -1095,7 +1103,7 @@ int nfp_xpb_readl(struct nfp_cpp *cpp, u32 xpb_addr, u32 *value)
* @xpb_addr: Address for operation
* @value: Value to write
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_addr, u32 value)
{
@@ -1113,7 +1121,7 @@ int nfp_xpb_writel(struct nfp_cpp *cpp, u32 xpb_addr, u32 value)
*
* KERNEL: This operation is safe to call in interrupt or softirq context.
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_xpb_writelm(struct nfp_cpp *cpp, u32 xpb_tgt,
u32 mask, u32 value)
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
index ab86bceb93f2..20bad05e2e92 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
@@ -64,18 +64,20 @@
* @address: Address for operation
* @value: Pointer to read buffer
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address, u32 *value)
{
u8 tmp[4];
- int err;
+ int n;
- err = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp));
- *value = get_unaligned_le32(tmp);
+ n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp));
+ if (n != sizeof(tmp))
+ return n < 0 ? n : -EIO;
- return err;
+ *value = get_unaligned_le32(tmp);
+ return 0;
}
/**
@@ -85,15 +87,18 @@ int nfp_cpp_readl(struct nfp_cpp *cpp, u32 cpp_id,
* @address: Address for operation
* @value: Value to write
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address, u32 value)
{
u8 tmp[4];
+ int n;
put_unaligned_le32(value, tmp);
- return nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp));
+ n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp));
+
+ return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO;
}
/**
@@ -103,18 +108,20 @@ int nfp_cpp_writel(struct nfp_cpp *cpp, u32 cpp_id,
* @address: Address for operation
* @value: Pointer to read buffer
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address, u64 *value)
{
u8 tmp[8];
- int err;
+ int n;
- err = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp));
- *value = get_unaligned_le64(tmp);
+ n = nfp_cpp_read(cpp, cpp_id, address, tmp, sizeof(tmp));
+ if (n != sizeof(tmp))
+ return n < 0 ? n : -EIO;
- return err;
+ *value = get_unaligned_le64(tmp);
+ return 0;
}
/**
@@ -124,15 +131,18 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id,
* @address: Address for operation
* @value: Value to write
*
- * Return: length of the io, or -ERRNO
+ * Return: 0 on success, or -ERRNO
*/
int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id,
unsigned long long address, u64 value)
{
u8 tmp[8];
+ int n;
put_unaligned_le64(value, tmp);
- return nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp));
+ n = nfp_cpp_write(cpp, cpp_id, address, tmp, sizeof(tmp));
+
+ return n == sizeof(tmp) ? 0 : n < 0 ? n : -EIO;
}
/* NOTE: This code should not use nfp_xpb_* functions,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
index ecda474ac7c3..46107aefad1c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
@@ -277,10 +277,6 @@ u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
break;
}
- if (err == sym->size)
- err = 0;
- else if (err >= 0)
- err = -EIO;
exit:
if (error)
*error = err;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index ed58c746e4af..f5a7eb22d0f5 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -715,7 +715,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
/* warning!!!! We are retrieving the virtual ptr in the sw_data
* field as a 32bit value. Will not work on 64bit machines
*/
- page = (struct page *)GET_SW_DATA0(desc);
+ page = (struct page *)GET_SW_DATA0(ndesc);
if (likely(dma_buff && buf_len && page)) {
dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 5134d5c1306c..b3851bbefad3 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -17,6 +17,7 @@
#include <linux/bpf_verifier.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
+#include <linux/mutex.h>
#include <linux/rtnetlink.h>
#include <net/pkt_cls.h>
@@ -31,6 +32,19 @@ struct nsim_bpf_bound_prog {
struct list_head l;
};
+#define NSIM_BPF_MAX_KEYS 2
+
+struct nsim_bpf_bound_map {
+ struct netdevsim *ns;
+ struct bpf_offloaded_map *map;
+ struct mutex mutex;
+ struct nsim_map_entry {
+ void *key;
+ void *value;
+ } entry[NSIM_BPF_MAX_KEYS];
+ struct list_head l;
+};
+
static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data)
{
const char **str = file->private;
@@ -284,6 +298,224 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
return 0;
}
+static bool
+nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key)
+{
+ return e->key && !memcmp(key, e->key, map->key_size);
+}
+
+static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(nmap->entry); i++)
+ if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key))
+ return i;
+
+ return -ENOENT;
+}
+
+static int
+nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+
+ nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER);
+ if (!nmap->entry[idx].key)
+ return -ENOMEM;
+ nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER);
+ if (!nmap->entry[idx].value) {
+ kfree(nmap->entry[idx].key);
+ nmap->entry[idx].key = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int
+nsim_map_get_next_key(struct bpf_offloaded_map *offmap,
+ void *key, void *next_key)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ int idx = -ENOENT;
+
+ mutex_lock(&nmap->mutex);
+
+ if (key)
+ idx = nsim_map_key_find(offmap, key);
+ if (idx == -ENOENT)
+ idx = 0;
+ else
+ idx++;
+
+ for (; idx < ARRAY_SIZE(nmap->entry); idx++) {
+ if (nmap->entry[idx].key) {
+ memcpy(next_key, nmap->entry[idx].key,
+ offmap->map.key_size);
+ break;
+ }
+ }
+
+ mutex_unlock(&nmap->mutex);
+
+ if (idx == ARRAY_SIZE(nmap->entry))
+ return -ENOENT;
+ return 0;
+}
+
+static int
+nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ int idx;
+
+ mutex_lock(&nmap->mutex);
+
+ idx = nsim_map_key_find(offmap, key);
+ if (idx >= 0)
+ memcpy(value, nmap->entry[idx].value, offmap->map.value_size);
+
+ mutex_unlock(&nmap->mutex);
+
+ return idx < 0 ? idx : 0;
+}
+
+static int
+nsim_map_update_elem(struct bpf_offloaded_map *offmap,
+ void *key, void *value, u64 flags)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ int idx, err = 0;
+
+ mutex_lock(&nmap->mutex);
+
+ idx = nsim_map_key_find(offmap, key);
+ if (idx < 0 && flags == BPF_EXIST) {
+ err = idx;
+ goto exit_unlock;
+ }
+ if (idx >= 0 && flags == BPF_NOEXIST) {
+ err = -EEXIST;
+ goto exit_unlock;
+ }
+
+ if (idx < 0) {
+ for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++)
+ if (!nmap->entry[idx].key)
+ break;
+ if (idx == ARRAY_SIZE(nmap->entry)) {
+ err = -E2BIG;
+ goto exit_unlock;
+ }
+
+ err = nsim_map_alloc_elem(offmap, idx);
+ if (err)
+ goto exit_unlock;
+ }
+
+ memcpy(nmap->entry[idx].key, key, offmap->map.key_size);
+ memcpy(nmap->entry[idx].value, value, offmap->map.value_size);
+exit_unlock:
+ mutex_unlock(&nmap->mutex);
+
+ return err;
+}
+
+static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ int idx;
+
+ if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY)
+ return -EINVAL;
+
+ mutex_lock(&nmap->mutex);
+
+ idx = nsim_map_key_find(offmap, key);
+ if (idx >= 0) {
+ kfree(nmap->entry[idx].key);
+ kfree(nmap->entry[idx].value);
+ memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx]));
+ }
+
+ mutex_unlock(&nmap->mutex);
+
+ return idx < 0 ? idx : 0;
+}
+
+static const struct bpf_map_dev_ops nsim_bpf_map_ops = {
+ .map_get_next_key = nsim_map_get_next_key,
+ .map_lookup_elem = nsim_map_lookup_elem,
+ .map_update_elem = nsim_map_update_elem,
+ .map_delete_elem = nsim_map_delete_elem,
+};
+
+static int
+nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap)
+{
+ struct nsim_bpf_bound_map *nmap;
+ unsigned int i;
+ int err;
+
+ if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY &&
+ offmap->map.map_type != BPF_MAP_TYPE_HASH))
+ return -EINVAL;
+ if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS)
+ return -ENOMEM;
+ if (offmap->map.map_flags)
+ return -EINVAL;
+
+ nmap = kzalloc(sizeof(*nmap), GFP_USER);
+ if (!nmap)
+ return -ENOMEM;
+
+ offmap->dev_priv = nmap;
+ nmap->ns = ns;
+ nmap->map = offmap;
+ mutex_init(&nmap->mutex);
+
+ if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) {
+ for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
+ u32 *key;
+
+ err = nsim_map_alloc_elem(offmap, i);
+ if (err)
+ goto err_free;
+ key = nmap->entry[i].key;
+ *key = i;
+ }
+ }
+
+ offmap->dev_ops = &nsim_bpf_map_ops;
+ list_add_tail(&nmap->l, &ns->bpf_bound_maps);
+
+ return 0;
+
+err_free:
+ while (--i) {
+ kfree(nmap->entry[i].key);
+ kfree(nmap->entry[i].value);
+ }
+ kfree(nmap);
+ return err;
+}
+
+static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap)
+{
+ struct nsim_bpf_bound_map *nmap = offmap->dev_priv;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) {
+ kfree(nmap->entry[i].key);
+ kfree(nmap->entry[i].value);
+ }
+ list_del_init(&nmap->l);
+ mutex_destroy(&nmap->mutex);
+ kfree(nmap);
+}
+
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
{
struct netdevsim *ns = netdev_priv(dev);
@@ -328,6 +560,14 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
return err;
return nsim_xdp_set_prog(ns, bpf);
+ case BPF_OFFLOAD_MAP_ALLOC:
+ if (!ns->bpf_map_accept)
+ return -EOPNOTSUPP;
+
+ return nsim_bpf_map_alloc(ns, bpf->offmap);
+ case BPF_OFFLOAD_MAP_FREE:
+ nsim_bpf_map_free(bpf->offmap);
+ return 0;
default:
return -EINVAL;
}
@@ -336,6 +576,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
int nsim_bpf_init(struct netdevsim *ns)
{
INIT_LIST_HEAD(&ns->bpf_bound_progs);
+ INIT_LIST_HEAD(&ns->bpf_bound_maps);
debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
&ns->bpf_offloaded_id);
@@ -362,12 +603,17 @@ int nsim_bpf_init(struct netdevsim *ns)
debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
&ns->bpf_xdpoffload_accept);
+ ns->bpf_map_accept = true;
+ debugfs_create_bool("bpf_map_accept", 0600, ns->ddir,
+ &ns->bpf_map_accept);
+
return 0;
}
void nsim_bpf_uninit(struct netdevsim *ns)
{
WARN_ON(!list_empty(&ns->bpf_bound_progs));
+ WARN_ON(!list_empty(&ns->bpf_bound_maps));
WARN_ON(ns->xdp_prog);
WARN_ON(ns->bpf_offloaded);
}
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 32270de9395a..b80361200302 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -61,6 +61,9 @@ struct netdevsim {
bool bpf_tc_non_bound_accept;
bool bpf_xdpdrv_accept;
bool bpf_xdpoffload_accept;
+
+ bool bpf_map_accept;
+ struct list_head bpf_bound_maps;
};
extern struct dentry *nsim_ddir;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 170a3e89b5af..698874684b4e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -679,6 +679,15 @@ static void tun_queue_purge(struct tun_file *tfile)
skb_queue_purge(&tfile->sk.sk_error_queue);
}
+static void tun_cleanup_tx_ring(struct tun_file *tfile)
+{
+ if (tfile->tx_ring.queue) {
+ ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+ }
+}
+
static void __tun_detach(struct tun_file *tfile, bool clean)
{
struct tun_file *ntfile;
@@ -725,10 +734,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- if (tun) {
- ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
- xdp_rxq_info_unreg(&tfile->xdp_rxq);
- }
+ tun_cleanup_tx_ring(tfile);
sock_put(&tfile->sk);
}
}
@@ -770,12 +776,14 @@ static void tun_detach_all(struct net_device *dev)
tun_queue_purge(tfile);
xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
+ tun_cleanup_tx_ring(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
+ tun_cleanup_tx_ring(tfile);
}
BUG_ON(tun->numdisabled != 0);
@@ -3145,6 +3153,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
+ memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+
return 0;
}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d51d9abf7986..0657203ffb91 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -606,6 +606,7 @@ enum rtl8152_flags {
PHY_RESET,
SCHEDULE_NAPI,
GREEN_ETHERNET,
+ DELL_TB_RX_AGG_BUG,
};
/* Define these values to match your device */
@@ -1798,6 +1799,9 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
dev_kfree_skb_any(skb);
remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
+
+ if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ break;
}
if (!skb_queue_empty(&skb_head)) {
@@ -4133,6 +4137,9 @@ static void r8153_init(struct r8152 *tp)
/* rx aggregation */
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+ if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+ ocp_data |= RX_AGG_DISABLE;
+
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
rtl_tally_reset(tp);
@@ -5207,6 +5214,12 @@ static int rtl8152_probe(struct usb_interface *intf,
netdev->hw_features &= ~NETIF_F_RXCSUM;
}
+ if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 &&
+ udev->serial && !strcmp(udev->serial, "000001000000")) {
+ dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
+ set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+ }
+
netdev->ethtool_ops = &ops;
netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 6a59d0609d30..9be0b051066a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -182,12 +182,9 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
err = request_firmware(&clm, clm_name, dev);
if (err) {
- if (err == -ENOENT) {
- brcmf_dbg(INFO, "continue with CLM data currently present in firmware\n");
- return 0;
- }
- brcmf_err("request CLM blob file failed (%d)\n", err);
- return err;
+ brcmf_info("no clm_blob available(err=%d), device may have limited channels available\n",
+ err);
+ return 0;
}
chunk_buf = kzalloc(sizeof(*chunk_buf) + MAX_CHUNK_LEN - 1, GFP_KERNEL);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d53550e612bc..4276ebfff22b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -451,10 +451,13 @@ static void **nvme_pci_iod_list(struct request *req)
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ int nseg = blk_rq_nr_phys_segments(req);
unsigned int avg_seg_size;
- avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
- blk_rq_nr_phys_segments(req));
+ if (nseg == 0)
+ return false;
+
+ avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
return false;
@@ -722,20 +725,19 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
}
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
- struct request *req, struct nvme_rw_command *cmd)
+ struct request *req, struct nvme_rw_command *cmd, int entries)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- int length = blk_rq_payload_bytes(req);
struct dma_pool *pool;
struct nvme_sgl_desc *sg_list;
struct scatterlist *sg = iod->sg;
- int entries = iod->nents, i = 0;
dma_addr_t sgl_dma;
+ int i = 0;
/* setting the transfer type as SGL */
cmd->flags = NVME_CMD_SGL_METABUF;
- if (length == sg_dma_len(sg)) {
+ if (entries == 1) {
nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
return BLK_STS_OK;
}
@@ -775,13 +777,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
}
nvme_pci_sgl_set_data(&sg_list[i++], sg);
-
- length -= sg_dma_len(sg);
sg = sg_next(sg);
- entries--;
- } while (length > 0);
+ } while (--entries > 0);
- WARN_ON(entries > 0);
return BLK_STS_OK;
}
@@ -793,6 +791,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
enum dma_data_direction dma_dir = rq_data_dir(req) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE;
blk_status_t ret = BLK_STS_IOERR;
+ int nr_mapped;
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
@@ -800,12 +799,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
ret = BLK_STS_RESOURCE;
- if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
- DMA_ATTR_NO_WARN))
+ nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
+ DMA_ATTR_NO_WARN);
+ if (!nr_mapped)
goto out;
if (iod->use_sgl)
- ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
+ ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index b4964b067aec..8f6e8e28996d 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -410,6 +410,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index)
if (ret)
return ERR_PTR(-ENODEV);
+ /* This phy type handled by the usb-phy subsystem for now */
+ if (of_device_is_compatible(args.np, "usb-nop-xceiv"))
+ return ERR_PTR(-ENODEV);
+
mutex_lock(&phy_provider_mutex);
phy_provider = of_phy_provider_lookup(args.np);
if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) {
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index f48a2ee587a4..ee18428a051f 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -31,7 +31,7 @@ config SSB_BLOCKIO
config SSB_PCIHOST_POSSIBLE
bool
- depends on SSB && (PCI = y || PCI = SSB)
+ depends on SSB && (PCI = y || PCI = SSB) && PCI_DRIVERS_LEGACY
default y
config SSB_PCIHOST
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 79375fc115d2..d67a72dcb92c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* safe because the task has stopped executing permanently.
*/
if (permitted && (task->flags & PF_DUMPCORE)) {
- eip = KSTK_EIP(task);
- esp = KSTK_ESP(task);
+ if (try_get_task_stack(task)) {
+ eip = KSTK_EIP(task);
+ esp = KSTK_ESP(task);
+ put_task_stack(task);
+ }
}
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5c2c104dc2c5..66df387106de 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -234,6 +234,8 @@ struct bpf_prog_offload {
struct list_head offloads;
bool dev_state;
const struct bpf_prog_offload_ops *dev_ops;
+ void *jited_image;
+ u32 jited_len;
};
struct bpf_prog_aux {
@@ -584,6 +586,8 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog);
+int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map);
+
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
int bpf_map_offload_update_elem(struct bpf_map *map,
void *key, void *value, u64 flags);
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2272ded07496..631354acfa72 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -219,7 +219,7 @@
/* Mark a function definition as prohibited from being cloned. */
#define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
-#ifdef RANDSTRUCT_PLUGIN
+#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
#define __randomize_layout __attribute__((randomize_layout))
#define __no_randomize_layout __attribute__((no_randomize_layout))
#endif
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 4178d2493547..5e335b6203f4 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -71,7 +71,7 @@ extern void delayacct_init(void);
extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
-extern void __delayacct_blkio_end(void);
+extern void __delayacct_blkio_end(struct task_struct *);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
extern void __delayacct_freepages_start(void);
@@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void)
__delayacct_blkio_start();
}
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
{
if (current->delays)
- __delayacct_blkio_end();
+ __delayacct_blkio_end(p);
delayacct_clear_flag(DELAYACCT_PF_BLKIO);
}
@@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
{}
static inline void delayacct_blkio_start(void)
{}
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
{}
static inline int delayacct_add_tsk(struct taskstats *d,
struct task_struct *tsk)
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index bae807eb2933..853291714ae0 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -31,11 +31,17 @@
#else
#define MODULE_RANDSTRUCT_PLUGIN
#endif
+#ifdef RETPOLINE
+#define MODULE_VERMAGIC_RETPOLINE "retpoline "
+#else
+#define MODULE_VERMAGIC_RETPOLINE ""
+#endif
#define VERMAGIC_STRING \
UTS_RELEASE " " \
MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
MODULE_ARCH_VERMAGIC \
- MODULE_RANDSTRUCT_PLUGIN
+ MODULE_RANDSTRUCT_PLUGIN \
+ MODULE_VERMAGIC_RETPOLINE
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index dd238950df81..663b015dace5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -143,22 +143,22 @@ static inline void nft_data_debug(const struct nft_data *data)
* struct nft_ctx - nf_tables rule/set context
*
* @net: net namespace
- * @afi: address family info
* @table: the table the chain is contained in
* @chain: the chain the rule is contained in
* @nla: netlink attributes
* @portid: netlink portID of the original message
* @seq: netlink sequence number
+ * @family: protocol family
* @report: notify via unicast netlink message
*/
struct nft_ctx {
struct net *net;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
const struct nlattr * const *nla;
u32 portid;
u32 seq;
+ u8 family;
bool report;
};
@@ -374,6 +374,7 @@ void nft_unregister_set(struct nft_set_type *type);
* @list: table set list node
* @bindings: list of set bindings
* @name: name of the set
+ * @handle: unique handle of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
* @objtype: object type (see NFT_OBJECT_* definitions)
@@ -396,6 +397,7 @@ struct nft_set {
struct list_head list;
struct list_head bindings;
char *name;
+ u64 handle;
u32 ktype;
u32 dtype;
u32 objtype;
@@ -946,9 +948,11 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @objects: stateful objects in the table
* @flowtables: flow tables in the table
* @hgenerator: handle generator state
+ * @handle: table handle
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
+ * @afinfo: address family info
* @name: name of the table
*/
struct nft_table {
@@ -958,38 +962,14 @@ struct nft_table {
struct list_head objects;
struct list_head flowtables;
u64 hgenerator;
+ u64 handle;
u32 use;
- u16 flags:14,
+ u16 family:6,
+ flags:8,
genmask:2;
char *name;
};
-enum nft_af_flags {
- NFT_AF_NEEDS_DEV = (1 << 0),
-};
-
-/**
- * struct nft_af_info - nf_tables address family info
- *
- * @list: used internally
- * @family: address family
- * @nhooks: number of hooks in this family
- * @owner: module owner
- * @tables: used internally
- * @flags: family flags
- */
-struct nft_af_info {
- struct list_head list;
- int family;
- unsigned int nhooks;
- struct module *owner;
- struct list_head tables;
- u32 flags;
-};
-
-int nft_register_afinfo(struct net *, struct nft_af_info *);
-void nft_unregister_afinfo(struct net *, struct nft_af_info *);
-
int nft_register_chain_type(const struct nf_chain_type *);
void nft_unregister_chain_type(const struct nf_chain_type *);
@@ -1007,9 +987,9 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
* @name: name of this stateful object
* @genmask: generation mask
* @use: number of references to this stateful object
- * @data: object data, layout depends on type
+ * @handle: unique object handle
* @ops: object operations
- * @data: pointer to object data
+ * @data: object data, layout depends on type
*/
struct nft_object {
struct list_head list;
@@ -1017,6 +997,7 @@ struct nft_object {
struct nft_table *table;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
const struct nft_object_ops *ops ____cacheline_aligned;
unsigned char data[]
@@ -1098,6 +1079,7 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
* @ops_len: number of hooks in array
* @genmask: generation mask
* @use: number of references to this flow table
+ * @handle: unique object handle
* @data: rhashtable and garbage collector
* @ops: array of hooks
*/
@@ -1110,6 +1092,7 @@ struct nft_flowtable {
int ops_len;
u32 genmask:2,
use:30;
+ u64 handle;
/* runtime data below here */
struct nf_hook_ops *ops ____cacheline_aligned;
struct nf_flowtable data;
@@ -1154,9 +1137,6 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
void nft_trace_notify(struct nft_traceinfo *info);
-#define MODULE_ALIAS_NFT_FAMILY(family) \
- MODULE_ALIAS("nft-afinfo-" __stringify(family))
-
#define MODULE_ALIAS_NFT_CHAIN(family, name) \
MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 4109b5f3010f..48134353411d 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,14 +7,8 @@
struct nft_af_info;
struct netns_nftables {
- struct list_head af_info;
+ struct list_head tables;
struct list_head commit_list;
- struct nft_af_info *ipv4;
- struct nft_af_info *ipv6;
- struct nft_af_info *inet;
- struct nft_af_info *arp;
- struct nft_af_info *bridge;
- struct nft_af_info *netdev;
unsigned int base_seq;
u8 gencursor;
};
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 789d818c4a61..2e4b8e436d25 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -376,7 +376,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr);
+ struct tcf_exts *exts, bool ovr,
+ struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
@@ -556,13 +557,16 @@ static inline int tcf_valid_offset(const struct sk_buff *skb,
#include <net/net_namespace.h>
static inline int
-tcf_change_indev(struct net *net, struct nlattr *indev_tlv)
+tcf_change_indev(struct net *net, struct nlattr *indev_tlv,
+ struct netlink_ext_ack *extack)
{
char indev[IFNAMSIZ];
struct net_device *dev;
- if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ)
+ if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "Interface name too long");
return -EINVAL;
+ }
dev = __dev_get_by_name(net, indev);
if (!dev)
return -ENODEV;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cfc19d0ba2ad..cd1be1f25c36 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -239,8 +239,11 @@ struct tcf_proto_ops {
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool);
- int (*delete)(struct tcf_proto*, void *, bool*);
+ void **, bool,
+ struct netlink_ext_ack *);
+ int (*delete)(struct tcf_proto *tp, void *arg,
+ bool *last,
+ struct netlink_ext_ack *);
void (*walk)(struct tcf_proto*, struct tcf_walker *arg);
void (*bind_class)(void *, u32, unsigned long);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6939e69d3c37..5a1d26a18599 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -953,6 +953,7 @@ struct rate_sample {
u32 prior_in_flight; /* in flight before this ACK */
bool is_app_limited; /* is sample from packet with bubble in pipe? */
bool is_retrans; /* is sample from retransmission? */
+ bool is_ack_delayed; /* is this (likely) a delayed ACK? */
};
struct tcp_congestion_ops {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7c2259e8bc54..406c19d6016b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -17,7 +17,7 @@
#define BPF_ALU64 0x07 /* alu mode in double word width */
/* ld/ldx fields */
-#define BPF_DW 0x18 /* double word */
+#define BPF_DW 0x18 /* double word (64-bit) */
#define BPF_XADD 0xc0 /* exclusive add */
/* alu/jmp fields */
@@ -938,6 +938,9 @@ struct bpf_map_info {
__u32 max_entries;
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h
index 18be90725ab0..ee97668bdadb 100644
--- a/include/uapi/linux/bpf_common.h
+++ b/include/uapi/linux/bpf_common.h
@@ -15,9 +15,10 @@
/* ld/ldx fields */
#define BPF_SIZE(code) ((code) & 0x18)
-#define BPF_W 0x00
-#define BPF_H 0x08
-#define BPF_B 0x10
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
#define BPF_MODE(code) ((code) & 0xe0)
#define BPF_IMM 0x00
#define BPF_ABS 0x20
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 53e8dd2a3a03..66dceee0ae30 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -174,6 +174,8 @@ enum nft_table_attributes {
NFTA_TABLE_NAME,
NFTA_TABLE_FLAGS,
NFTA_TABLE_USE,
+ NFTA_TABLE_HANDLE,
+ NFTA_TABLE_PAD,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
@@ -317,6 +319,7 @@ enum nft_set_desc_attributes {
* @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
* @NFTA_SET_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
+ * @NFTA_SET_HANDLE: set handle (NLA_U64)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -335,6 +338,7 @@ enum nft_set_attributes {
NFTA_SET_USERDATA,
NFTA_SET_PAD,
NFTA_SET_OBJ_TYPE,
+ NFTA_SET_HANDLE,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -1314,6 +1318,7 @@ enum nft_ct_helper_attributes {
* @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
* @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
* @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
+ * @NFTA_OBJ_HANDLE: object handle (NLA_U64)
*/
enum nft_object_attributes {
NFTA_OBJ_UNSPEC,
@@ -1322,6 +1327,8 @@ enum nft_object_attributes {
NFTA_OBJ_TYPE,
NFTA_OBJ_DATA,
NFTA_OBJ_USE,
+ NFTA_OBJ_HANDLE,
+ NFTA_OBJ_PAD,
__NFTA_OBJ_MAX
};
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
@@ -1333,6 +1340,7 @@ enum nft_object_attributes {
* @NFTA_FLOWTABLE_NAME: name of this flow table (NLA_STRING)
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
+ * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
*/
enum nft_flowtable_attributes {
NFTA_FLOWTABLE_UNSPEC,
@@ -1340,6 +1348,8 @@ enum nft_flowtable_attributes {
NFTA_FLOWTABLE_NAME,
NFTA_FLOWTABLE_HOOK,
NFTA_FLOWTABLE_USE,
+ NFTA_FLOWTABLE_HANDLE,
+ NFTA_FLOWTABLE_PAD,
__NFTA_FLOWTABLE_MAX
};
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h
index e6b1a84f5dd3..c3b060775e13 100644
--- a/include/uapi/linux/netfilter_ipv4.h
+++ b/include/uapi/linux/netfilter_ipv4.h
@@ -57,6 +57,7 @@
enum nf_ip_hook_priorities {
NF_IP_PRI_FIRST = INT_MIN,
+ NF_IP_PRI_RAW_BEFORE_DEFRAG = -450,
NF_IP_PRI_CONNTRACK_DEFRAG = -400,
NF_IP_PRI_RAW = -300,
NF_IP_PRI_SELINUX_FIRST = -225,
diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h
index 2f9724611cc2..dc624fd24d25 100644
--- a/include/uapi/linux/netfilter_ipv6.h
+++ b/include/uapi/linux/netfilter_ipv6.h
@@ -62,6 +62,7 @@
enum nf_ip6_hook_priorities {
NF_IP6_PRI_FIRST = INT_MIN,
+ NF_IP6_PRI_RAW_BEFORE_DEFRAG = -450,
NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
NF_IP6_PRI_RAW = -300,
NF_IP6_PRI_SELINUX_FIRST = -225,
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_srh.h b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
new file mode 100644
index 000000000000..f3cc0ef514a7
--- /dev/null
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_srh.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _IP6T_SRH_H
+#define _IP6T_SRH_H
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+
+/* Values for "mt_flags" field in struct ip6t_srh */
+#define IP6T_SRH_NEXTHDR 0x0001
+#define IP6T_SRH_LEN_EQ 0x0002
+#define IP6T_SRH_LEN_GT 0x0004
+#define IP6T_SRH_LEN_LT 0x0008
+#define IP6T_SRH_SEGS_EQ 0x0010
+#define IP6T_SRH_SEGS_GT 0x0020
+#define IP6T_SRH_SEGS_LT 0x0040
+#define IP6T_SRH_LAST_EQ 0x0080
+#define IP6T_SRH_LAST_GT 0x0100
+#define IP6T_SRH_LAST_LT 0x0200
+#define IP6T_SRH_TAG 0x0400
+#define IP6T_SRH_MASK 0x07FF
+
+/* Values for "mt_invflags" field in struct ip6t_srh */
+#define IP6T_SRH_INV_NEXTHDR 0x0001
+#define IP6T_SRH_INV_LEN_EQ 0x0002
+#define IP6T_SRH_INV_LEN_GT 0x0004
+#define IP6T_SRH_INV_LEN_LT 0x0008
+#define IP6T_SRH_INV_SEGS_EQ 0x0010
+#define IP6T_SRH_INV_SEGS_GT 0x0020
+#define IP6T_SRH_INV_SEGS_LT 0x0040
+#define IP6T_SRH_INV_LAST_EQ 0x0080
+#define IP6T_SRH_INV_LAST_GT 0x0100
+#define IP6T_SRH_INV_LAST_LT 0x0200
+#define IP6T_SRH_INV_TAG 0x0400
+#define IP6T_SRH_INV_MASK 0x07FF
+
+/**
+ * struct ip6t_srh - SRH match options
+ * @ next_hdr: Next header field of SRH
+ * @ hdr_len: Extension header length field of SRH
+ * @ segs_left: Segments left field of SRH
+ * @ last_entry: Last entry field of SRH
+ * @ tag: Tag field of SRH
+ * @ mt_flags: match options
+ * @ mt_invflags: Invert the sense of match options
+ */
+
+struct ip6t_srh {
+ __u8 next_hdr;
+ __u8 hdr_len;
+ __u8 segs_left;
+ __u8 last_entry;
+ __u16 tag;
+ __u16 mt_flags;
+ __u16 mt_invflags;
+};
+
+#endif /*_IP6T_SRH_H*/
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index ab94d304a634..b1f66480135b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,27 +49,35 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
}
/* Called from syscall */
-static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+static int array_map_alloc_check(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
- u32 elem_size, index_mask, max_entries;
- bool unpriv = !capable(CAP_SYS_ADMIN);
- struct bpf_array *array;
- u64 array_size, mask64;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
(percpu && numa_node != NUMA_NO_NODE))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
+
+ return 0;
+}
+
+static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+{
+ bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+ int numa_node = bpf_map_attr_numa_node(attr);
+ u32 elem_size, index_mask, max_entries;
+ bool unpriv = !capable(CAP_SYS_ADMIN);
+ struct bpf_array *array;
+ u64 array_size, mask64;
elem_size = round_up(attr->value_size, 8);
@@ -112,12 +120,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
- array->map.map_type = attr->map_type;
- array->map.key_size = attr->key_size;
- array->map.value_size = attr->value_size;
- array->map.max_entries = attr->max_entries;
- array->map.map_flags = attr->map_flags;
- array->map.numa_node = numa_node;
+ bpf_map_init_from_attr(&array->map, attr);
array->elem_size = elem_size;
if (!percpu)
@@ -327,6 +330,7 @@ static void array_map_free(struct bpf_map *map)
}
const struct bpf_map_ops array_map_ops = {
+ .map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
@@ -337,6 +341,7 @@ const struct bpf_map_ops array_map_ops = {
};
const struct bpf_map_ops percpu_array_map_ops = {
+ .map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
@@ -345,12 +350,12 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem,
};
-static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
+static int fd_array_map_alloc_check(union bpf_attr *attr)
{
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
- return ERR_PTR(-EINVAL);
- return array_map_alloc(attr);
+ return -EINVAL;
+ return array_map_alloc_check(attr);
}
static void fd_array_map_free(struct bpf_map *map)
@@ -474,7 +479,8 @@ void bpf_fd_array_map_clear(struct bpf_map *map)
}
const struct bpf_map_ops prog_array_map_ops = {
- .map_alloc = fd_array_map_alloc,
+ .map_alloc_check = fd_array_map_alloc_check,
+ .map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
@@ -561,7 +567,8 @@ static void perf_event_fd_array_release(struct bpf_map *map,
}
const struct bpf_map_ops perf_event_array_map_ops = {
- .map_alloc = fd_array_map_alloc,
+ .map_alloc_check = fd_array_map_alloc_check,
+ .map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
@@ -592,7 +599,8 @@ static void cgroup_fd_array_free(struct bpf_map *map)
}
const struct bpf_map_ops cgroup_array_map_ops = {
- .map_alloc = fd_array_map_alloc,
+ .map_alloc_check = fd_array_map_alloc_check,
+ .map_alloc = array_map_alloc,
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
@@ -610,7 +618,7 @@ static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
if (IS_ERR(inner_map_meta))
return inner_map_meta;
- map = fd_array_map_alloc(attr);
+ map = array_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
@@ -673,6 +681,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
}
const struct bpf_map_ops array_of_maps_map_ops = {
+ .map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_of_map_alloc,
.map_free = array_of_map_free,
.map_get_next_key = array_map_get_next_key,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 25e723b0dfd4..3aa0658add76 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -300,6 +300,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
}
#ifdef CONFIG_BPF_JIT
+/* All BPF JIT sysctl knobs here. */
+int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+int bpf_jit_harden __read_mostly;
+int bpf_jit_kallsyms __read_mostly;
+
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
unsigned long *symbol_start,
@@ -381,8 +386,6 @@ static DEFINE_SPINLOCK(bpf_lock);
static LIST_HEAD(bpf_kallsyms);
static struct latch_tree_root bpf_tree __cacheline_aligned;
-int bpf_jit_kallsyms __read_mostly;
-
static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
{
WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
@@ -563,8 +566,6 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
bpf_prog_unlock_free(fp);
}
-int bpf_jit_harden __read_mostly;
-
static int bpf_jit_blind_insn(const struct bpf_insn *from,
const struct bpf_insn *aux,
struct bpf_insn *to_buff)
@@ -970,7 +971,7 @@ select_insn:
DST = tmp;
CONT;
ALU_MOD_X:
- if (unlikely(SRC == 0))
+ if (unlikely((u32)SRC == 0))
return 0;
tmp = (u32) DST;
DST = do_div(tmp, (u32) SRC);
@@ -989,7 +990,7 @@ select_insn:
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- if (unlikely(SRC == 0))
+ if (unlikely((u32)SRC == 0))
return 0;
tmp = (u32) DST;
do_div(tmp, (u32) SRC);
@@ -1379,9 +1380,13 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
}
#else
-static unsigned int __bpf_prog_ret0(const void *ctx,
- const struct bpf_insn *insn)
+static unsigned int __bpf_prog_ret0_warn(const void *ctx,
+ const struct bpf_insn *insn)
{
+ /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
+ * is not working properly, so warn about it!
+ */
+ WARN_ON_ONCE(1);
return 0;
}
#endif
@@ -1441,7 +1446,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
- fp->bpf_func = __bpf_prog_ret0;
+ fp->bpf_func = __bpf_prog_ret0_warn;
#endif
/* eBPF JITs can rewrite the program in case constant
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 584e02227671..d7ea96218516 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -591,9 +591,100 @@ unlock:
raw_spin_unlock(&trie->lock);
}
-static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
{
- return -ENOTSUPP;
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
+ struct lpm_trie_node *node, *next_node = NULL, *parent;
+ struct lpm_trie_node **node_stack = NULL;
+ struct lpm_trie_node __rcu **root;
+ int err = 0, stack_ptr = -1;
+ unsigned int next_bit;
+ size_t matchlen;
+
+ /* The get_next_key follows postorder. For the 4 node example in
+ * the top of this file, the trie_get_next_key() returns the following
+ * one after another:
+ * 192.168.0.0/24
+ * 192.168.1.0/24
+ * 192.168.128.0/24
+ * 192.168.0.0/16
+ *
+ * The idea is to return more specific keys before less specific ones.
+ */
+
+ /* Empty trie */
+ if (!rcu_dereference(trie->root))
+ return -ENOENT;
+
+ /* For invalid key, find the leftmost node in the trie */
+ if (!key || key->prefixlen > trie->max_prefixlen) {
+ root = &trie->root;
+ goto find_leftmost;
+ }
+
+ node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
+ GFP_USER | __GFP_NOWARN);
+ if (!node_stack)
+ return -ENOMEM;
+
+ /* Try to find the exact node for the given key */
+ for (node = rcu_dereference(trie->root); node;) {
+ node_stack[++stack_ptr] = node;
+ matchlen = longest_prefix_match(trie, node, key);
+ if (node->prefixlen != matchlen ||
+ node->prefixlen == key->prefixlen)
+ break;
+
+ next_bit = extract_bit(key->data, node->prefixlen);
+ node = rcu_dereference(node->child[next_bit]);
+ }
+ if (!node || node->prefixlen != key->prefixlen ||
+ (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+ root = &trie->root;
+ goto find_leftmost;
+ }
+
+ /* The node with the exactly-matching key has been found,
+ * find the first node in postorder after the matched node.
+ */
+ node = node_stack[stack_ptr];
+ while (stack_ptr > 0) {
+ parent = node_stack[stack_ptr - 1];
+ if (rcu_dereference(parent->child[0]) == node &&
+ rcu_dereference(parent->child[1])) {
+ root = &parent->child[1];
+ goto find_leftmost;
+ }
+ if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
+ next_node = parent;
+ goto do_copy;
+ }
+
+ node = parent;
+ stack_ptr--;
+ }
+
+ /* did not find anything */
+ err = -ENOENT;
+ goto free_stack;
+
+find_leftmost:
+ /* Find the leftmost non-intermediate node, all intermediate nodes
+ * have exact two children, so this function will never return NULL.
+ */
+ for (node = rcu_dereference(*root); node;) {
+ if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
+ next_node = node;
+ node = rcu_dereference(node->child[0]);
+ }
+do_copy:
+ next_key->prefixlen = next_node->prefixlen;
+ memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data),
+ next_node->data, trie->data_size);
+free_stack:
+ kfree(node_stack);
+ return err;
}
const struct bpf_map_ops trie_map_ops = {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index a88cebf368bf..c9401075b58c 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -230,9 +230,12 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
.prog = prog,
.info = info,
};
+ struct bpf_prog_aux *aux = prog->aux;
struct inode *ns_inode;
struct path ns_path;
+ char __user *uinsns;
void *res;
+ u32 ulen;
res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
if (IS_ERR(res)) {
@@ -241,6 +244,26 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
return PTR_ERR(res);
}
+ down_read(&bpf_devs_lock);
+
+ if (!aux->offload) {
+ up_read(&bpf_devs_lock);
+ return -ENODEV;
+ }
+
+ ulen = info->jited_prog_len;
+ info->jited_prog_len = aux->offload->jited_len;
+ if (info->jited_prog_len & ulen) {
+ uinsns = u64_to_user_ptr(info->jited_prog_insns);
+ ulen = min_t(u32, info->jited_prog_len, ulen);
+ if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
+ up_read(&bpf_devs_lock);
+ return -EFAULT;
+ }
+ }
+
+ up_read(&bpf_devs_lock);
+
ns_inode = ns_path.dentry->d_inode;
info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
info->netns_ino = ns_inode->i_ino;
@@ -276,7 +299,8 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (attr->map_type != BPF_MAP_TYPE_HASH)
+ if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
+ attr->map_type != BPF_MAP_TYPE_HASH)
return ERR_PTR(-EINVAL);
offmap = kzalloc(sizeof(*offmap), GFP_USER);
@@ -389,6 +413,61 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
return ret;
}
+struct ns_get_path_bpf_map_args {
+ struct bpf_offloaded_map *offmap;
+ struct bpf_map_info *info;
+};
+
+static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data)
+{
+ struct ns_get_path_bpf_map_args *args = private_data;
+ struct ns_common *ns;
+ struct net *net;
+
+ rtnl_lock();
+ down_read(&bpf_devs_lock);
+
+ if (args->offmap->netdev) {
+ args->info->ifindex = args->offmap->netdev->ifindex;
+ net = dev_net(args->offmap->netdev);
+ get_net(net);
+ ns = &net->ns;
+ } else {
+ args->info->ifindex = 0;
+ ns = NULL;
+ }
+
+ up_read(&bpf_devs_lock);
+ rtnl_unlock();
+
+ return ns;
+}
+
+int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+ struct ns_get_path_bpf_map_args args = {
+ .offmap = map_to_offmap(map),
+ .info = info,
+ };
+ struct inode *ns_inode;
+ struct path ns_path;
+ void *res;
+
+ res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args);
+ if (IS_ERR(res)) {
+ if (!info->ifindex)
+ return -ENODEV;
+ return PTR_ERR(res);
+ }
+
+ ns_inode = ns_path.dentry->d_inode;
+ info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+ info->netns_ino = ns_inode->i_ino;
+ path_put(&ns_path);
+
+ return 0;
+}
+
bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
{
struct bpf_offloaded_map *offmap;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c691b9e972e3..5bdb0cc84ad2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1504,6 +1504,8 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
struct bpf_prog *prog;
int ret = -ENOTSUPP;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
@@ -1724,19 +1726,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
goto done;
}
- ulen = info.jited_prog_len;
- info.jited_prog_len = prog->jited_len;
- if (info.jited_prog_len && ulen) {
- if (bpf_dump_raw_ok()) {
- uinsns = u64_to_user_ptr(info.jited_prog_insns);
- ulen = min_t(u32, info.jited_prog_len, ulen);
- if (copy_to_user(uinsns, prog->bpf_func, ulen))
- return -EFAULT;
- } else {
- info.jited_prog_insns = 0;
- }
- }
-
ulen = info.xlated_prog_len;
info.xlated_prog_len = bpf_prog_insn_size(prog);
if (info.xlated_prog_len && ulen) {
@@ -1762,6 +1751,24 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
err = bpf_prog_offload_info_fill(&info, prog);
if (err)
return err;
+ goto done;
+ }
+
+ /* NOTE: the following code is supposed to be skipped for offload.
+ * bpf_prog_offload_info_fill() is the place to fill similar fields
+ * for offload.
+ */
+ ulen = info.jited_prog_len;
+ info.jited_prog_len = prog->jited_len;
+ if (info.jited_prog_len && ulen) {
+ if (bpf_dump_raw_ok()) {
+ uinsns = u64_to_user_ptr(info.jited_prog_insns);
+ ulen = min_t(u32, info.jited_prog_len, ulen);
+ if (copy_to_user(uinsns, prog->bpf_func, ulen))
+ return -EFAULT;
+ } else {
+ info.jited_prog_insns = 0;
+ }
}
done:
@@ -1794,6 +1801,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
info.map_flags = map->map_flags;
memcpy(info.name, map->name, sizeof(map->name));
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_info_fill(&info, map);
+ if (err)
+ return err;
+ }
+
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
return -EFAULT;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2e7a43edf264..dfb138b46488 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1349,6 +1349,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
}
+static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+ return reg->type == PTR_TO_CTX;
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -1728,6 +1735,12 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
return -EACCES;
}
+ if (is_ctx_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
+
/* check whether atomic_add can read the memory */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1837,6 +1850,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_MEM ||
+ type == ARG_PTR_TO_MEM_OR_NULL ||
+ type == ARG_PTR_TO_UNINIT_MEM;
+}
+
+static bool arg_type_is_mem_size(enum bpf_arg_type type)
+{
+ return type == ARG_CONST_SIZE ||
+ type == ARG_CONST_SIZE_OR_ZERO;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
@@ -1886,9 +1912,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_CTX;
if (type != expected_type)
goto err_type;
- } else if (arg_type == ARG_PTR_TO_MEM ||
- arg_type == ARG_PTR_TO_MEM_OR_NULL ||
- arg_type == ARG_PTR_TO_UNINIT_MEM) {
+ } else if (arg_type_is_mem_ptr(arg_type)) {
expected_type = PTR_TO_STACK;
/* One exception here. In case function allows for NULL to be
* passed in as argument, it's a SCALAR_VALUE type. Final test
@@ -1949,25 +1973,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_stack_boundary(env, regno,
meta->map_ptr->value_size,
false, NULL);
- } else if (arg_type == ARG_CONST_SIZE ||
- arg_type == ARG_CONST_SIZE_OR_ZERO) {
+ } else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
- /* bpf_xxx(..., buf, len) call will access 'len' bytes
- * from stack pointer 'buf'. Check it
- * note: regno == len, regno - 1 == buf
- */
- if (regno == 0) {
- /* kernel subsystem misconfigured verifier */
- verbose(env,
- "ARG_CONST_SIZE cannot be first argument\n");
- return -EACCES;
- }
-
/* The register is SCALAR_VALUE; the access check
* happens using its boundaries.
*/
-
if (!tnum_is_const(reg->var_off))
/* For unprivileged variable accesses, disable raw
* mode so that the program is required to
@@ -2111,7 +2122,7 @@ error:
return -EINVAL;
}
-static int check_raw_mode(const struct bpf_func_proto *fn)
+static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
{
int count = 0;
@@ -2126,7 +2137,44 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
count++;
- return count > 1 ? -EINVAL : 0;
+ /* We only support one arg being in raw mode at the moment,
+ * which is sufficient for the helper functions we have
+ * right now.
+ */
+ return count <= 1;
+}
+
+static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
+ enum bpf_arg_type arg_next)
+{
+ return (arg_type_is_mem_ptr(arg_curr) &&
+ !arg_type_is_mem_size(arg_next)) ||
+ (!arg_type_is_mem_ptr(arg_curr) &&
+ arg_type_is_mem_size(arg_next));
+}
+
+static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
+{
+ /* bpf_xxx(..., buf, len) call will access 'len'
+ * bytes from memory 'buf'. Both arg types need
+ * to be paired, so make sure there's no buggy
+ * helper function specification.
+ */
+ if (arg_type_is_mem_size(fn->arg1_type) ||
+ arg_type_is_mem_ptr(fn->arg5_type) ||
+ check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
+ check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
+ check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
+ check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+ return false;
+
+ return true;
+}
+
+static int check_func_proto(const struct bpf_func_proto *fn)
+{
+ return check_raw_mode_ok(fn) &&
+ check_arg_pair_ok(fn) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2282,7 +2330,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (env->ops->get_func_proto)
fn = env->ops->get_func_proto(func_id);
-
if (!fn) {
verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
func_id);
@@ -2306,10 +2353,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- /* We only support one arg being in raw mode at the moment, which
- * is sufficient for the helper functions we have right now.
- */
- err = check_raw_mode(fn);
+ err = check_func_proto(fn);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
@@ -2478,17 +2522,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg = &regs[dst];
- if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
- print_verifier_state(env, state);
- verbose(env,
- "verifier internal error: known but bad sbounds\n");
- return -EINVAL;
- }
- if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
- print_verifier_state(env, state);
- verbose(env,
- "verifier internal error: known but bad ubounds\n");
- return -EINVAL;
+ if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
+ smin_val > smax_val || umin_val > umax_val) {
+ /* Taint dst register if offset had invalid bounds derived from
+ * e.g. dead branches.
+ */
+ __mark_reg_unknown(dst_reg);
+ return 0;
}
if (BPF_CLASS(insn->code) != BPF_ALU64) {
@@ -2680,6 +2720,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
src_known = tnum_is_const(src_reg.var_off);
dst_known = tnum_is_const(dst_reg->var_off);
+ if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
+ smin_val > smax_val || umin_val > umax_val) {
+ /* Taint dst register if offset had invalid bounds derived from
+ * e.g. dead branches.
+ */
+ __mark_reg_unknown(dst_reg);
+ return 0;
+ }
+
if (!src_known &&
opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
__mark_reg_unknown(dst_reg);
@@ -4661,6 +4710,12 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
+ if (is_ctx_reg(env, insn->dst_reg)) {
+ verbose(env, "BPF_ST stores into R%d context is not allowed\n",
+ insn->dst_reg);
+ return -EACCES;
+ }
+
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
@@ -4779,7 +4834,8 @@ process_bpf_exit:
insn_idx++;
}
- verbose(env, "processed %d insns, stack depth ", insn_processed);
+ verbose(env, "processed %d insns (limit %d), stack depth ",
+ insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
for (i = 0; i < env->subprog_cnt + 1; i++) {
u32 depth = env->subprog_stack_depth[i];
@@ -5330,6 +5386,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
int i, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+ /* due to JIT bugs clear upper 32-bits of src register
+ * before div/mod operation
+ */
+ insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
+ insn_buf[1] = *insn;
+ cnt = 2;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
if (insn->src_reg == BPF_PSEUDO_CALL)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 2cf06c274e4c..7e4c44538119 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4447,6 +4447,7 @@ static struct cftype cgroup_base_files[] = {
},
{
.name = "cgroup.threads",
+ .flags = CFTYPE_NS_DELEGATABLE,
.release = cgroup_procs_release,
.seq_start = cgroup_threads_start,
.seq_next = cgroup_procs_next,
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 4a1c33416b6a..e2764d767f18 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk)
* Finish delay accounting for a statistic using its timestamps (@start),
* accumalator (@total) and @count
*/
-static void delayacct_end(u64 *start, u64 *total, u32 *count)
+static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
{
s64 ns = ktime_get_ns() - *start;
unsigned long flags;
if (ns > 0) {
- spin_lock_irqsave(&current->delays->lock, flags);
+ spin_lock_irqsave(lock, flags);
*total += ns;
(*count)++;
- spin_unlock_irqrestore(&current->delays->lock, flags);
+ spin_unlock_irqrestore(lock, flags);
}
}
@@ -69,17 +69,25 @@ void __delayacct_blkio_start(void)
current->delays->blkio_start = ktime_get_ns();
}
-void __delayacct_blkio_end(void)
+/*
+ * We cannot rely on the `current` macro, as we haven't yet switched back to
+ * the process being woken.
+ */
+void __delayacct_blkio_end(struct task_struct *p)
{
- if (current->delays->flags & DELAYACCT_PF_SWAPIN)
- /* Swapin block I/O */
- delayacct_end(&current->delays->blkio_start,
- &current->delays->swapin_delay,
- &current->delays->swapin_count);
- else /* Other block I/O */
- delayacct_end(&current->delays->blkio_start,
- &current->delays->blkio_delay,
- &current->delays->blkio_count);
+ struct task_delay_info *delays = p->delays;
+ u64 *total;
+ u32 *count;
+
+ if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
+ total = &delays->swapin_delay;
+ count = &delays->swapin_count;
+ } else {
+ total = &delays->blkio_delay;
+ count = &delays->blkio_count;
+ }
+
+ delayacct_end(&delays->lock, &delays->blkio_start, total, count);
}
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -153,8 +161,10 @@ void __delayacct_freepages_start(void)
void __delayacct_freepages_end(void)
{
- delayacct_end(&current->delays->freepages_start,
- &current->delays->freepages_delay,
- &current->delays->freepages_count);
+ delayacct_end(
+ &current->delays->lock,
+ &current->delays->freepages_start,
+ &current->delays->freepages_delay,
+ &current->delays->freepages_count);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 57d0b3657e16..8c5424dd5924 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
struct futex_q *this, *next;
DEFINE_WAKE_Q(wake_q);
+ if (nr_wake < 0 || nr_requeue < 0)
+ return -EINVAL;
+
/*
* When PI not supported: return -ENOSYS if requeue_pi is true,
* consequently the compiler knows requeue_pi is always false past
@@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr);
}
-/*
- * Fixup the pi_state owner with the new owner.
- *
- * Must be called with hash bucket lock held and mm->sem held for non
- * private futexes.
- */
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *newowner)
+ struct task_struct *argowner)
{
- u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
u32 uval, uninitialized_var(curval), newval;
- struct task_struct *oldowner;
+ struct task_struct *oldowner, *newowner;
+ u32 newtid;
int ret;
+ lockdep_assert_held(q->lock_ptr);
+
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
oldowner = pi_state->owner;
@@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
newtid |= FUTEX_OWNER_DIED;
/*
- * We are here either because we stole the rtmutex from the
- * previous highest priority waiter or we are the highest priority
- * waiter but have failed to get the rtmutex the first time.
+ * We are here because either:
+ *
+ * - we stole the lock and pi_state->owner needs updating to reflect
+ * that (@argowner == current),
+ *
+ * or:
+ *
+ * - someone stole our lock and we need to fix things to point to the
+ * new owner (@argowner == NULL).
*
- * We have to replace the newowner TID in the user space variable.
+ * Either way, we have to replace the TID in the user space variable.
* This must be atomic as we have to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the pi_state
@@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* in the PID check in lookup_pi_state.
*/
retry:
+ if (!argowner) {
+ if (oldowner != current) {
+ /*
+ * We raced against a concurrent self; things are
+ * already fixed up. Nothing to do.
+ */
+ ret = 0;
+ goto out_unlock;
+ }
+
+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+ /* We got the lock after all, nothing to fix. */
+ ret = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * Since we just failed the trylock; there must be an owner.
+ */
+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
+ BUG_ON(!newowner);
+ } else {
+ WARN_ON_ONCE(argowner != current);
+ if (oldowner == current) {
+ /*
+ * We raced against a concurrent self; things are
+ * already fixed up. Nothing to do.
+ */
+ ret = 0;
+ goto out_unlock;
+ }
+ newowner = argowner;
+ }
+
+ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+
if (get_futex_value_locked(&uval, uaddr))
goto handle_fault;
@@ -2434,9 +2475,9 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Got the lock. We might not be the anticipated owner if we
* did a lock-steal - fix up the PI-state in that case:
*
- * We can safely read pi_state->owner without holding wait_lock
- * because we now own the rt_mutex, only the owner will attempt
- * to change it.
+ * Speculative pi_state->owner read (we don't hold wait_lock);
+ * since we own the lock pi_state->owner == current is the
+ * stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
@@ -2444,6 +2485,19 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
}
/*
+ * If we didn't get the lock; check if anybody stole it from us. In
+ * that case, we need to fix up the uval to point to them instead of
+ * us, otherwise bad things happen. [10]
+ *
+ * Another speculative read; pi_state->owner == current is unstable
+ * but needs our attention.
+ */
+ if (q->pi_state->owner == current) {
+ ret = fixup_pi_state_owner(uaddr, q, NULL);
+ goto out;
+ }
+
+ /*
* Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex.
*/
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6f3dba6e4e9e..65cc0cb984e6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
return ret;
}
+static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
+{
+ int ret = try_to_take_rt_mutex(lock, current, NULL);
+
+ /*
+ * try_to_take_rt_mutex() sets the lock waiters bit
+ * unconditionally. Clean this up.
+ */
+ fixup_rt_mutex_waiters(lock);
+
+ return ret;
+}
+
/*
* Slow path try-lock function:
*/
@@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
*/
raw_spin_lock_irqsave(&lock->wait_lock, flags);
- ret = try_to_take_rt_mutex(lock, current, NULL);
-
- /*
- * try_to_take_rt_mutex() sets the lock waiters bit
- * unconditionally. Clean this up.
- */
- fixup_rt_mutex_waiters(lock);
+ ret = __rt_mutex_slowtrylock(lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
@@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
return rt_mutex_slowtrylock(lock);
}
+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+{
+ return __rt_mutex_slowtrylock(lock);
+}
+
/**
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 124e98ca0b17..68686b3ec3c1 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 644fa2e3d993..a7bf32aabfda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
p->state = TASK_WAKING;
if (p->in_iowait) {
- delayacct_blkio_end();
+ delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
}
@@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
#else /* CONFIG_SMP */
if (p->in_iowait) {
- delayacct_blkio_end();
+ delayacct_blkio_end(p);
atomic_dec(&task_rq(p)->nr_iowait);
}
@@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
if (!task_on_rq_queued(p)) {
if (p->in_iowait) {
- delayacct_blkio_end();
+ delayacct_blkio_end(p);
atomic_dec(&rq->nr_iowait);
}
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 89a9e1b4264a..0bcf00e3ce48 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1696,7 +1696,7 @@ void run_local_timers(void)
hrtimer_run_queues();
/* Raise the softirq only if required. */
if (time_before(jiffies, base->clk)) {
- if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+ if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
return;
/* CPU is awake, so check the deferrable base. */
base++;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f274468cbc45..fc2838ac8b78 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -245,7 +245,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
*/
#define __BPF_TP_EMIT() __BPF_ARG3_TP()
#define __BPF_TP(...) \
- __trace_printk(1 /* Fake ip will not be printed. */, \
+ __trace_printk(0 /* Fake ip */, \
fmt, ##__VA_ARGS__)
#define __BPF_ARG1_TP(...) \
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0cddf60186da..5af2842dea96 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2579,8 +2579,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = RB_CTX_NORMAL;
else
bit = pc & NMI_MASK ? RB_CTX_NMI :
- pc & HARDIRQ_MASK ? RB_CTX_IRQ :
- pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ;
+ pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
if (unlikely(val & (1 << bit)))
return 1;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ec0f9aa4e151..1b87157edbff 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2213,6 +2213,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
{
struct trace_event_call *call, *p;
const char *last_system = NULL;
+ bool first = false;
int last_i;
int i;
@@ -2220,15 +2221,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
list_for_each_entry_safe(call, p, &ftrace_events, list) {
/* events are usually grouped together with systems */
if (!last_system || call->class->system != last_system) {
+ first = true;
last_i = 0;
last_system = call->class->system;
}
+ /*
+ * Since calls are grouped by systems, the likelyhood that the
+ * next call in the iteration belongs to the same system as the
+ * previous call is high. As an optimization, we skip seaching
+ * for a map[] that matches the call's system if the last call
+ * was from the same system. That's what last_i is for. If the
+ * call has the same system as the previous call, then last_i
+ * will be the index of the first map[] that has a matching
+ * system.
+ */
for (i = last_i; i < len; i++) {
if (call->class->system == map[i]->system) {
/* Save the first system if need be */
- if (!last_i)
+ if (first) {
last_i = i;
+ first = false;
+ }
update_event_printk(call, map[i]);
}
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 43d18cb46308..f699122dab32 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,6 +48,7 @@
#include <linux/moduleparam.h>
#include <linux/uaccess.h>
#include <linux/sched/isolation.h>
+#include <linux/nmi.h>
#include "workqueue_internal.h"
@@ -4463,6 +4464,12 @@ void show_workqueue_state(void)
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
show_pwq(pwq);
spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
}
}
@@ -4490,6 +4497,12 @@ void show_workqueue_state(void)
pr_cont("\n");
next_pool:
spin_unlock_irqrestore(&pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
}
rcu_read_unlock_sched();
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f369889e521d..e3938e395cba 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6109,6 +6109,110 @@ static struct bpf_test tests[] = {
{ { ETH_HLEN, 42 } },
.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
},
+ /* Checking interpreter vs JIT wrt signed extended imms. */
+ {
+ "JNE signed compare, test 1",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+ BPF_MOV64_REG(R2, R1),
+ BPF_ALU64_REG(BPF_AND, R2, R3),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "JNE signed compare, test 2",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+ BPF_MOV64_REG(R2, R1),
+ BPF_ALU64_REG(BPF_AND, R2, R3),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "JNE signed compare, test 3",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+ BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000),
+ BPF_MOV64_REG(R2, R1),
+ BPF_ALU64_REG(BPF_AND, R2, R3),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_REG(BPF_JNE, R2, R4, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 2 } },
+ },
+ {
+ "JNE signed compare, test 4",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, -17104896),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 2 } },
+ },
+ {
+ "JNE signed compare, test 5",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0xfefb0000),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "JNE signed compare, test 6",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x7efb0000),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 2 } },
+ },
+ {
+ "JNE signed compare, test 7",
+ .u.insns = {
+ BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000),
+ BPF_STMT(BPF_MISC | BPF_TAX, 0),
+ BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12),
+ BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0),
+ BPF_STMT(BPF_RET | BPF_K, 1),
+ BPF_STMT(BPF_RET | BPF_K, 2),
+ },
+ CLASSIC | FLAG_NO_DATA,
+ {},
+ { { 0, 2 } },
+ },
};
static struct net_device dev;
diff --git a/mm/memory.c b/mm/memory.c
index ca5674cbaff2..793004608332 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2857,8 +2857,11 @@ int do_swap_page(struct vm_fault *vmf)
int ret = 0;
bool vma_readahead = swap_use_vma_readahead();
- if (vma_readahead)
+ if (vma_readahead) {
page = swap_readahead_detect(vmf, &swap_ra);
+ swapcache = page;
+ }
+
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
if (page)
put_page(page);
@@ -2889,9 +2892,12 @@ int do_swap_page(struct vm_fault *vmf)
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
- if (!page)
+ if (!page) {
page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
vmf->address);
+ swapcache = page;
+ }
+
if (!page) {
struct swap_info_struct *si = swp_swap_info(entry);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 8592543a0f15..270a8219ccd0 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -616,7 +616,6 @@ static void init_early_allocated_pages(void)
{
pg_data_t *pgdat;
- drain_all_pages(NULL);
for_each_online_pgdat(pgdat)
init_zones_in_node(pgdat);
}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 37817d25b63d..02c4b409d317 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2445,7 +2445,6 @@ static int __init ebtables_init(void)
return ret;
}
- printk(KERN_INFO "Ebtables v2.0 registered\n");
return 0;
}
@@ -2453,7 +2452,6 @@ static void __exit ebtables_fini(void)
{
nf_unregister_sockopt(&ebt_sockopts);
xt_unregister_target(&ebt_standard_target);
- printk(KERN_INFO "Ebtables v2.0 unregistered\n");
}
EXPORT_SYMBOL(ebt_register_table);
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index 86774b5c3b73..5160cf614176 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -42,40 +42,6 @@ nft_do_chain_bridge(void *priv,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_bridge __read_mostly = {
- .family = NFPROTO_BRIDGE,
- .nhooks = NF_BR_NUMHOOKS,
- .owner = THIS_MODULE,
-};
-
-static int nf_tables_bridge_init_net(struct net *net)
-{
- net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.bridge == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
-
- if (nft_register_afinfo(net, net->nft.bridge) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.bridge);
- return -ENOMEM;
-}
-
-static void nf_tables_bridge_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.bridge);
- kfree(net->nft.bridge);
-}
-
-static struct pernet_operations nf_tables_bridge_net_ops = {
- .init = nf_tables_bridge_init_net,
- .exit = nf_tables_bridge_exit_net,
-};
-
static const struct nf_chain_type filter_bridge = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -97,27 +63,11 @@ static const struct nf_chain_type filter_bridge = {
static int __init nf_tables_bridge_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_bridge);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
- if (ret < 0)
- goto err_register_subsys;
-
- return ret;
-
-err_register_subsys:
- nft_unregister_chain_type(&filter_bridge);
-
- return ret;
+ return nft_register_chain_type(&filter_bridge);
}
static void __exit nf_tables_bridge_exit(void)
{
- unregister_pernet_subsys(&nf_tables_bridge_net_ops);
nft_unregister_chain_type(&filter_bridge);
}
@@ -126,4 +76,4 @@ module_exit(nf_tables_bridge_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
+MODULE_ALIAS_NFT_CHAIN(AF_BRIDGE, "filter");
diff --git a/net/can/af_can.c b/net/can/af_can.c
index f22b886ed081..6da324550eec 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN,
- "PF_CAN: dropped non conform CAN skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+ cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (WARN_ONCE(dev->type != ARPHRD_CAN ||
- skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN,
- "PF_CAN: dropped non conform CAN FD skbuf: "
- "dev type %d, len %d, datalen %d\n",
- dev->type, skb->len, cfd->len))
- goto drop;
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+ cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ dev->type, skb->len, cfd->len);
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
can_receive(skb, dev);
return NET_RX_SUCCESS;
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
}
/*
diff --git a/net/core/filter.c b/net/core/filter.c
index db2ee8c7e1bd..18da42a81d0c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -458,6 +458,10 @@ do_pass:
convert_bpf_extensions(fp, &insn))
break;
+ if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -2861,7 +2865,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
- .arg5_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -3150,7 +3154,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
- .arg5_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
@@ -3456,6 +3460,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_xdp_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_csum_diff:
+ return &bpf_csum_diff_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
case BPF_FUNC_xdp_adjust_meta:
@@ -4526,6 +4532,7 @@ const struct bpf_verifier_ops sk_filter_verifier_ops = {
};
const struct bpf_prog_ops sk_filter_prog_ops = {
+ .test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 02db7b122a73..559db9ea8d86 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1031,8 +1031,8 @@ ip_proto_again:
out_good:
ret = true;
- key_control->thoff = (u16)nhoff;
out:
+ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
key_basic->n_proto = proto;
key_basic->ip_proto = ip_proto;
@@ -1040,7 +1040,6 @@ out:
out_bad:
ret = false;
- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
goto out;
}
EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index a47ad6cd41c0..f2d0462611c3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,7 @@
static int zero = 0;
static int one = 1;
+static int two __maybe_unused = 2;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
@@ -250,6 +251,46 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
return proc_dostring(&fake_table, write, buffer, lenp, ppos);
}
+#ifdef CONFIG_BPF_JIT
+static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret, jit_enable = *(int *)table->data;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &jit_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (jit_enable < 2 ||
+ (jit_enable == 2 && bpf_dump_raw_ok())) {
+ *(int *)table->data = jit_enable;
+ if (jit_enable == 2)
+ pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
+ } else {
+ ret = -EPERM;
+ }
+ }
+ return ret;
+}
+
+# ifdef CONFIG_HAVE_EBPF_JIT
+static int
+proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+# endif
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -325,13 +366,14 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_enable,
.maxlen = sizeof(int),
.mode = 0644,
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
- .proc_handler = proc_dointvec
-#else
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dointvec_minmax_bpf_enable,
+# ifdef CONFIG_BPF_JIT_ALWAYS_ON
.extra1 = &one,
.extra2 = &one,
-#endif
+# else
+ .extra1 = &zero,
+ .extra2 = &two,
+# endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
{
@@ -339,14 +381,18 @@ static struct ctl_table net_core_table[] = {
.data = &bpf_jit_harden,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "bpf_jit_kallsyms",
.data = &bpf_jit_kallsyms,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &zero,
+ .extra2 = &one,
},
# endif
#endif
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 7d5d444964aa..5f52236780b4 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -79,8 +79,9 @@ config NF_TABLES_ARP
endif # NF_TABLES
config NF_FLOW_TABLE_IPV4
- select NF_FLOW_TABLE
tristate "Netfilter flow table IPv4 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
help
This option adds the flow table IPv4 support.
@@ -157,6 +158,7 @@ config NF_NAT_SNMP_BASIC
depends on NF_CONNTRACK_SNMP
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
+ select ASN1
---help---
This module implements an Application Layer Gateway (ALG) for
@@ -342,6 +344,7 @@ config IP_NF_TARGET_CLUSTERIP
depends on NF_CONNTRACK_IPV4
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
+ select NETFILTER_FAMILY_ARP
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 8bb1f0c7a375..2dad20eefd26 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,9 +27,15 @@ obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
# NAT helpers (nf_conntrack)
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+
+nf_nat_snmp_basic-y := nf_nat_snmp_basic-asn1.o nf_nat_snmp_basic_main.o
+nf_nat_snmp_basic-y : nf_nat_snmp_basic-asn1.h nf_nat_snmp_basic-asn1.c
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+clean-files := nf_nat_snmp_basic-asn1.c nf_nat_snmp_basic-asn1.h
+
obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
+
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index bf8a5340f15e..5f7c0d643fb3 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1656,7 +1656,6 @@ static int __init arp_tables_init(void)
if (ret < 0)
goto err4;
- pr_info("arp_tables: (C) 2002 David S. Miller\n");
return 0;
err4:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 0b975aa2d363..1f534aec22f0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1939,7 +1939,6 @@ static int __init ip_tables_init(void)
if (ret < 0)
goto err5;
- pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index a869d1fea7d9..960625aabf04 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/slab.h>
@@ -12,6 +13,10 @@
static int __net_init iptable_raw_table_init(struct net *net);
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -21,6 +26,15 @@ static const struct xt_table packet_raw = {
.table_init = iptable_raw_table_init,
};
+static const struct xt_table packet_raw_before_defrag = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
+ .table_init = iptable_raw_table_init,
+};
+
/* The work comes in here from netfilter.c. */
static unsigned int
iptable_raw_hook(void *priv, struct sk_buff *skb,
@@ -34,15 +48,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
+ const struct xt_table *table = &packet_raw;
int ret;
+ if (raw_before_defrag)
+ table = &packet_raw_before_defrag;
+
if (net->ipv4.iptable_raw)
return 0;
- repl = ipt_alloc_initial_table(&packet_raw);
+ repl = ipt_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
+ ret = ipt_register_table(net, table, repl, rawtable_ops,
&net->ipv4.iptable_raw);
kfree(repl);
return ret;
@@ -63,8 +81,15 @@ static struct pernet_operations iptable_raw_net_ops = {
static int __init iptable_raw_init(void)
{
int ret;
+ const struct xt_table *table = &packet_raw;
+
+ if (raw_before_defrag) {
+ table = &packet_raw_before_defrag;
+
+ pr_info("Enabling raw table before defrag\n");
+ }
- rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 37fe1616ca0b..a0d3ad60a411 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -78,6 +78,8 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
#endif
+ if (skb->_nfct == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
#endif
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
new file mode 100644
index 000000000000..24b73268f362
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1
@@ -0,0 +1,177 @@
+Message ::=
+ SEQUENCE {
+ version
+ INTEGER ({snmp_version}),
+
+ community
+ OCTET STRING,
+
+ pdu
+ PDUs
+ }
+
+
+ObjectName ::=
+ OBJECT IDENTIFIER
+
+ObjectSyntax ::=
+ CHOICE {
+ simple
+ SimpleSyntax,
+
+ application-wide
+ ApplicationSyntax
+ }
+
+SimpleSyntax ::=
+ CHOICE {
+ integer-value
+ INTEGER,
+
+ string-value
+ OCTET STRING,
+
+ objectID-value
+ OBJECT IDENTIFIER
+ }
+
+ApplicationSyntax ::=
+ CHOICE {
+ ipAddress-value
+ IpAddress,
+
+ counter-value
+ Counter32,
+
+ timeticks-value
+ TimeTicks,
+
+ arbitrary-value
+ Opaque,
+
+ big-counter-value
+ Counter64,
+
+ unsigned-integer-value
+ Unsigned32
+ }
+
+IpAddress ::=
+ [APPLICATION 0]
+ IMPLICIT OCTET STRING OPTIONAL ({snmp_helper})
+
+Counter32 ::=
+ [APPLICATION 1]
+ IMPLICIT INTEGER OPTIONAL
+
+Unsigned32 ::=
+ [APPLICATION 2]
+ IMPLICIT INTEGER OPTIONAL
+
+Gauge32 ::= Unsigned32 OPTIONAL
+
+TimeTicks ::=
+ [APPLICATION 3]
+ IMPLICIT INTEGER OPTIONAL
+
+Opaque ::=
+ [APPLICATION 4]
+ IMPLICIT OCTET STRING OPTIONAL
+
+Counter64 ::=
+ [APPLICATION 6]
+ IMPLICIT INTEGER OPTIONAL
+
+PDUs ::=
+ CHOICE {
+ get-request
+ GetRequest-PDU,
+
+ get-next-request
+ GetNextRequest-PDU,
+
+ get-bulk-request
+ GetBulkRequest-PDU,
+
+ response
+ Response-PDU,
+
+ set-request
+ SetRequest-PDU,
+
+ inform-request
+ InformRequest-PDU,
+
+ snmpV2-trap
+ SNMPv2-Trap-PDU,
+
+ report
+ Report-PDU
+ }
+
+GetRequest-PDU ::=
+ [0] IMPLICIT PDU OPTIONAL
+
+GetNextRequest-PDU ::=
+ [1] IMPLICIT PDU OPTIONAL
+
+Response-PDU ::=
+ [2] IMPLICIT PDU OPTIONAL
+
+SetRequest-PDU ::=
+ [3] IMPLICIT PDU OPTIONAL
+
+-- [4] is obsolete
+
+GetBulkRequest-PDU ::=
+ [5] IMPLICIT PDU OPTIONAL
+
+InformRequest-PDU ::=
+ [6] IMPLICIT PDU OPTIONAL
+
+SNMPv2-Trap-PDU ::=
+ [7] IMPLICIT PDU OPTIONAL
+
+Report-PDU ::=
+ [8] IMPLICIT PDU OPTIONAL
+
+PDU ::=
+ SEQUENCE {
+ request-id
+ INTEGER,
+
+ error-status
+ INTEGER,
+
+ error-index
+ INTEGER,
+
+ variable-bindings
+ VarBindList
+ }
+
+
+VarBind ::=
+ SEQUENCE {
+ name
+ ObjectName,
+
+ CHOICE {
+ value
+ ObjectSyntax,
+
+ unSpecified
+ NULL,
+
+ noSuchObject
+ [0] IMPLICIT NULL,
+
+ noSuchInstance
+ [1] IMPLICIT NULL,
+
+ endOfMibView
+ [2] IMPLICIT NULL
+ }
+}
+
+VarBindList ::= SEQUENCE OF VarBind
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
deleted file mode 100644
index d5b1e0b3f687..000000000000
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ /dev/null
@@ -1,1286 +0,0 @@
-/*
- * nf_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified. The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <linux/netfilter/nf_conntrack_snmp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-MODULE_ALIAS("ip_nat_snmp_basic");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)(n))
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
- u_int8_t from;
- u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI 0 /* Universal */
-#define ASN1_APL 1 /* Application */
-#define ASN1_CTX 2 /* Context */
-#define ASN1_PRV 3 /* Private */
-
-/* Tag */
-#define ASN1_EOC 0 /* End Of Contents */
-#define ASN1_BOL 1 /* Boolean */
-#define ASN1_INT 2 /* Integer */
-#define ASN1_BTS 3 /* Bit String */
-#define ASN1_OTS 4 /* Octet String */
-#define ASN1_NUL 5 /* Null */
-#define ASN1_OJI 6 /* Object Identifier */
-#define ASN1_OJD 7 /* Object Description */
-#define ASN1_EXT 8 /* External */
-#define ASN1_SEQ 16 /* Sequence */
-#define ASN1_SET 17 /* Set */
-#define ASN1_NUMSTR 18 /* Numerical String */
-#define ASN1_PRNSTR 19 /* Printable String */
-#define ASN1_TEXSTR 20 /* Teletext String */
-#define ASN1_VIDSTR 21 /* Video String */
-#define ASN1_IA5STR 22 /* IA5 String */
-#define ASN1_UNITIM 23 /* Universal Time */
-#define ASN1_GENTIM 24 /* General Time */
-#define ASN1_GRASTR 25 /* Graphical String */
-#define ASN1_VISSTR 26 /* Visible String */
-#define ASN1_GENSTR 27 /* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI 0 /* Primitive */
-#define ASN1_CON 1 /* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR 0
-#define ASN1_ERR_DEC_EMPTY 2
-#define ASN1_ERR_DEC_EOC_MISMATCH 3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
-#define ASN1_ERR_DEC_BADVALUE 5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
- int error; /* Error condition */
- unsigned char *pointer; /* Octet just to be decoded */
- unsigned char *begin; /* First octet */
- unsigned char *end; /* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
- unsigned char *data;
- unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
- unsigned char *buf,
- unsigned int len)
-{
- ctx->begin = buf;
- ctx->end = buf + len;
- ctx->pointer = buf;
- ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
- if (ctx->pointer >= ctx->end) {
- ctx->error = ASN1_ERR_DEC_EMPTY;
- return 0;
- }
- *ch = *(ctx->pointer)++;
- return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
- unsigned char ch;
-
- *tag = 0;
-
- do
- {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *tag <<= 7;
- *tag |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned char ch;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *cls = (ch & 0xC0) >> 6;
- *con = (ch & 0x20) >> 5;
- *tag = (ch & 0x1F);
-
- if (*tag == 0x1F) {
- if (!asn1_tag_decode(ctx, tag))
- return 0;
- }
- return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
- unsigned int *def,
- unsigned int *len)
-{
- unsigned char ch, cnt;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch == 0x80)
- *def = 0;
- else {
- *def = 1;
-
- if (ch < 0x80)
- *len = ch;
- else {
- cnt = ch & 0x7F;
- *len = 0;
-
- while (cnt > 0) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
- *len <<= 8;
- *len |= ch;
- cnt--;
- }
- }
- }
-
- /* don't trust len bigger than ctx buffer */
- if (*len > ctx->end - ctx->pointer)
- return 0;
-
- return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
- unsigned char **eoc,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
-{
- unsigned int def, len;
-
- if (!asn1_id_decode(ctx, cls, con, tag))
- return 0;
-
- def = len = 0;
- if (!asn1_length_decode(ctx, &def, &len))
- return 0;
-
- /* primitive shall be definite, indefinite shall be constructed */
- if (*con == ASN1_PRI && !def)
- return 0;
-
- if (def)
- *eoc = ctx->pointer + len;
- else
- *eoc = NULL;
- return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- unsigned char ch;
-
- if (eoc == NULL) {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- if (ch != 0x00) {
- ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
- return 0;
- }
- return 1;
- } else {
- if (ctx->pointer != eoc) {
- ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
- return 0;
- }
- return 1;
- }
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
- ctx->pointer = eoc;
- return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = (signed char) ch;
- len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned int *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned int)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long *integer)
-{
- unsigned char ch;
- unsigned int len;
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer = ch;
- if (ch == 0) len = 0;
- else len = 1;
-
- while (ctx->pointer < eoc) {
- if (++len > sizeof (unsigned long)) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- return 0;
- }
-
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *integer <<= 8;
- *integer |= ch;
- }
- return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned char **octets,
- unsigned int *len)
-{
- unsigned char *ptr;
-
- *len = 0;
-
- *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL)
- return 0;
-
- ptr = *octets;
- while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, ptr++)) {
- kfree(*octets);
- *octets = NULL;
- return 0;
- }
- (*len)++;
- }
- return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
- unsigned long *subid)
-{
- unsigned char ch;
-
- *subid = 0;
-
- do {
- if (!asn1_octet_decode(ctx, &ch))
- return 0;
-
- *subid <<= 7;
- *subid |= ch & 0x7F;
- } while ((ch & 0x80) == 0x80);
- return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long **oid,
- unsigned int *len)
-{
- unsigned long subid;
- unsigned long *optr;
- size_t size;
-
- size = eoc - ctx->pointer + 1;
-
- /* first subid actually encodes first two subids */
- if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
- return 0;
-
- *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL)
- return 0;
-
- optr = *oid;
-
- if (!asn1_subid_decode(ctx, &subid)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (subid < 40) {
- optr[0] = 0;
- optr[1] = subid;
- } else if (subid < 80) {
- optr[0] = 1;
- optr[1] = subid - 40;
- } else {
- optr[0] = 2;
- optr[1] = subid - 80;
- }
-
- *len = 2;
- optr += 2;
-
- while (ctx->pointer < eoc) {
- if (++(*len) > size) {
- ctx->error = ASN1_ERR_DEC_BADVALUE;
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
-
- if (!asn1_subid_decode(ctx, optr++)) {
- kfree(*oid);
- *oid = NULL;
- return 0;
- }
- }
- return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1 0
-#define SNMP_V2C 1
-#define SNMP_V2 2
-#define SNMP_V3 3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM 256
-#define SNMP_SIZE_OBJECTID 128
-#define SNMP_SIZE_BUFCHR 256
-#define SNMP_SIZE_BUFINT 128
-#define SNMP_SIZE_SMALLOBJECTID 16
-
-/* Requests */
-#define SNMP_PDU_GET 0
-#define SNMP_PDU_NEXT 1
-#define SNMP_PDU_RESPONSE 2
-#define SNMP_PDU_SET 3
-#define SNMP_PDU_TRAP1 4
-#define SNMP_PDU_BULK 5
-#define SNMP_PDU_INFORM 6
-#define SNMP_PDU_TRAP2 7
-
-/* Errors */
-#define SNMP_NOERROR 0
-#define SNMP_TOOBIG 1
-#define SNMP_NOSUCHNAME 2
-#define SNMP_BADVALUE 3
-#define SNMP_READONLY 4
-#define SNMP_GENERROR 5
-#define SNMP_NOACCESS 6
-#define SNMP_WRONGTYPE 7
-#define SNMP_WRONGLENGTH 8
-#define SNMP_WRONGENCODING 9
-#define SNMP_WRONGVALUE 10
-#define SNMP_NOCREATION 11
-#define SNMP_INCONSISTENTVALUE 12
-#define SNMP_RESOURCEUNAVAILABLE 13
-#define SNMP_COMMITFAILED 14
-#define SNMP_UNDOFAILED 15
-#define SNMP_AUTHORIZATIONERROR 16
-#define SNMP_NOTWRITABLE 17
-#define SNMP_INCONSISTENTNAME 18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART 0
-#define SNMP_TRAP_WARMSTART 1
-#define SNMP_TRAP_LINKDOWN 2
-#define SNMP_TRAP_LINKUP 3
-#define SNMP_TRAP_AUTFAILURE 4
-#define SNMP_TRAP_EQPNEIGHBORLOSS 5
-#define SNMP_TRAP_ENTSPECIFIC 6
-
-/* SNMPv1 Types */
-#define SNMP_NULL 0
-#define SNMP_INTEGER 1 /* l */
-#define SNMP_OCTETSTR 2 /* c */
-#define SNMP_DISPLAYSTR 2 /* c */
-#define SNMP_OBJECTID 3 /* ul */
-#define SNMP_IPADDR 4 /* uc */
-#define SNMP_COUNTER 5 /* ul */
-#define SNMP_GAUGE 6 /* ul */
-#define SNMP_TIMETICKS 7 /* ul */
-#define SNMP_OPAQUE 8 /* c */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER 5 /* ul */
-#define SNMP_BITSTR 9 /* uc */
-#define SNMP_NSAP 10 /* uc */
-#define SNMP_COUNTER64 11 /* ul */
-#define SNMP_NOSUCHOBJECT 12
-#define SNMP_NOSUCHINSTANCE 13
-#define SNMP_ENDOFMIBVIEW 14
-
-union snmp_syntax
-{
- unsigned char uc[0]; /* 8 bit unsigned */
- char c[0]; /* 8 bit signed */
- unsigned long ul[0]; /* 32 bit unsigned */
- long l[0]; /* 32 bit signed */
-};
-
-struct snmp_object
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned short type;
- unsigned int syntax_len;
- union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
- unsigned long id;
- unsigned int error_status;
- unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
- unsigned long *id;
- unsigned int id_len;
- unsigned long ip_address; /* pointer */
- unsigned int general;
- unsigned int specific;
- unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA 0
-#define SNMP_CNT 1
-#define SNMP_GGE 2
-#define SNMP_TIT 3
-#define SNMP_OPQ 4
-#define SNMP_C64 6
-
-/* SNMP errors */
-#define SERR_NSO 0
-#define SERR_NSI 1
-#define SERR_EOM 2
-
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check);
-struct snmp_cnv
-{
- unsigned int class;
- unsigned int tag;
- int syntax;
-};
-
-static const struct snmp_cnv snmp_conv[] = {
- {ASN1_UNI, ASN1_NUL, SNMP_NULL},
- {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
- {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
- {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
- {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
- {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
- {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
- {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
- {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
- {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
- /* SNMPv2 data types and errors */
- {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
- {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
- {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
- {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
- {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
- {0, 0, -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
- unsigned int cls,
- unsigned short *syntax)
-{
- const struct snmp_cnv *cnv;
-
- cnv = snmp_conv;
-
- while (cnv->syntax != -1) {
- if (cnv->tag == tag && cnv->class == cls) {
- *syntax = cnv->syntax;
- return 1;
- }
- cnv++;
- }
- return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
- struct snmp_object **obj)
-{
- unsigned int cls, con, tag, len, idlen;
- unsigned short type;
- unsigned char *eoc, *end, *p;
- unsigned long *lp, *id;
- unsigned long ul;
- long l;
-
- *obj = NULL;
- id = NULL;
-
- if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &id, &idlen))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
- kfree(id);
- return 0;
- }
-
- if (con != ASN1_PRI) {
- kfree(id);
- return 0;
- }
-
- type = 0;
- if (!snmp_tag_cls2syntax(tag, cls, &type)) {
- kfree(id);
- return 0;
- }
-
- l = 0;
- switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
- kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, &lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(lp);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
- kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
- kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
- kfree(id);
- return 0;
- }
-
- (*obj)->syntax_len = len;
- (*obj)->type = type;
- (*obj)->id = id;
- (*obj)->id_len = idlen;
-
- if (!asn1_eoc_decode(ctx, eoc)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- return 1;
-}
-
-static unsigned char noinline_for_stack
-snmp_request_decode(struct asn1_ctx *ctx, struct snmp_request *request)
-{
- unsigned int cls, con, tag;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_ulong_decode(ctx, end, &request->id))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_status))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
-
- if (!asn1_uint_decode(ctx, end, &request->error_index))
- return 0;
-
- return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
- const unsigned char *optr,
- const unsigned char *nptr,
- int offset)
-{
- unsigned char s[4];
-
- if (offset & 1) {
- s[0] = ~0;
- s[1] = ~*optr;
- s[2] = 0;
- s[3] = *nptr;
- } else {
- s[0] = ~*optr;
- s[1] = ~0;
- s[2] = *nptr;
- s[3] = 0;
- }
-
- *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * - begin points to the start of the snmp messgae
- * - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- __sum16 *check)
-{
- if (map->from == NOCT1(addr)) {
- u_int32_t old;
-
- if (debug)
- memcpy(&old, addr, sizeof(old));
-
- *addr = map->to;
-
- /* Update UDP checksum if being used */
- if (*check) {
- fast_csum(check,
- &map->from, &map->to, addr - begin);
-
- }
-
- if (debug)
- printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n",
- &old, addr);
- }
-}
-
-static unsigned char noinline_for_stack
-snmp_trap_decode(struct asn1_ctx *ctx, struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned int cls, con, tag, len;
- unsigned char *end;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
- return 0;
-
- if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
- return 0;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_id_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
- goto err_id_free;
-
- if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
- goto err_id_free;
-
- /* IPv4 only */
- if (len != 4)
- goto err_addr_free;
-
- mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->general))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- goto err_addr_free;
-
- if (!asn1_uint_decode(ctx, end, &trap->specific))
- goto err_addr_free;
-
- if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
- goto err_addr_free;
-
- if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
- (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
- goto err_addr_free;
-
- if (!asn1_ulong_decode(ctx, end, &trap->time))
- goto err_addr_free;
-
- return 1;
-
-err_addr_free:
- kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
- kfree(trap->id);
-
- return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
- u_int16_t len,
- const struct oct1_map *map,
- __sum16 *check)
-{
- unsigned char *eoc, *end;
- unsigned int cls, con, tag, vers, pdutype;
- struct asn1_ctx ctx;
- struct asn1_octstr comm;
- struct snmp_object *obj;
-
- if (debug > 1)
- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
- msg, len, 0);
-
- asn1_open(&ctx, msg, len);
-
- /*
- * Start of SNMP message.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- /*
- * Version 1 or 2 handled.
- */
- if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
- return 0;
- if (!asn1_uint_decode (&ctx, end, &vers))
- return 0;
- if (debug > 1)
- pr_debug("bsalg: snmp version: %u\n", vers + 1);
- if (vers > 1)
- return 1;
-
- /*
- * Community.
- */
- if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
- return 0;
- if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
- return 0;
- if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
- return 0;
- if (debug > 1) {
- unsigned int i;
-
- pr_debug("bsalg: community: ");
- for (i = 0; i < comm.len; i++)
- pr_cont("%c", comm.data[i]);
- pr_cont("\n");
- }
- kfree(comm.data);
-
- /*
- * PDU type
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
- return 0;
- if (cls != ASN1_CTX || con != ASN1_CON)
- return 0;
- if (debug > 1) {
- static const unsigned char *const pdus[] = {
- [SNMP_PDU_GET] = "get",
- [SNMP_PDU_NEXT] = "get-next",
- [SNMP_PDU_RESPONSE] = "response",
- [SNMP_PDU_SET] = "set",
- [SNMP_PDU_TRAP1] = "trapv1",
- [SNMP_PDU_BULK] = "bulk",
- [SNMP_PDU_INFORM] = "inform",
- [SNMP_PDU_TRAP2] = "trapv2"
- };
-
- if (pdutype > SNMP_PDU_TRAP2)
- pr_debug("bsalg: bad pdu type %u\n", pdutype);
- else
- pr_debug("bsalg: pdu: %s\n", pdus[pdutype]);
- }
- if (pdutype != SNMP_PDU_RESPONSE &&
- pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
- return 1;
-
- /*
- * Request header or v1 trap
- */
- if (pdutype == SNMP_PDU_TRAP1) {
- struct snmp_v1_trap trap;
- unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
- if (ret) {
- kfree(trap.id);
- kfree((unsigned long *)trap.ip_address);
- } else
- return ret;
-
- } else {
- struct snmp_request req;
-
- if (!snmp_request_decode(&ctx, &req))
- return 0;
-
- if (debug > 1)
- pr_debug("bsalg: request: id=0x%lx error_status=%u "
- "error_index=%u\n", req.id, req.error_status,
- req.error_index);
- }
-
- /*
- * Loop through objects, look for IP addresses to mangle.
- */
- if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
- return 0;
-
- if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
- return 0;
-
- while (!asn1_eoc_decode(&ctx, eoc)) {
- unsigned int i;
-
- if (!snmp_object_decode(&ctx, &obj)) {
- if (obj) {
- kfree(obj->id);
- kfree(obj);
- }
- return 0;
- }
-
- if (debug > 1) {
- pr_debug("bsalg: object: ");
- for (i = 0; i < obj->id_len; i++) {
- if (i > 0)
- pr_cont(".");
- pr_cont("%lu", obj->id[i]);
- }
- pr_cont(": type=%u\n", obj->type);
-
- }
-
- if (obj->type == SNMP_IPADDR)
- mangle_address(ctx.begin, ctx.pointer - 4, map, check);
-
- kfree(obj->id);
- kfree(obj);
- }
-
- if (!asn1_eoc_decode(&ctx, eoc))
- return 0;
-
- return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- struct sk_buff *skb)
-{
- struct iphdr *iph = ip_hdr(skb);
- struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
- u_int16_t udplen = ntohs(udph->len);
- u_int16_t paylen = udplen - sizeof(struct udphdr);
- int dir = CTINFO2DIR(ctinfo);
- struct oct1_map map;
-
- /*
- * Determine mappping for application layer addresses based
- * on NAT manipulations for the packet.
- */
- if (dir == IP_CT_DIR_ORIGINAL) {
- /* SNAT traps */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
- } else {
- /* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
- }
-
- if (map.from == map.to)
- return NF_ACCEPT;
-
- if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
- paylen, &map, &udph->check)) {
- net_warn_ratelimited("bsalg: parser failed\n");
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir = CTINFO2DIR(ctinfo);
- unsigned int ret;
- const struct iphdr *iph = ip_hdr(skb);
- const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
-
- /* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
- return NF_ACCEPT;
- if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* No NAT? */
- if (!(ct->status & IPS_NAT_MASK))
- return NF_ACCEPT;
-
- /*
- * Make sure the packet length is ok. So far, we were only guaranteed
- * to have a valid length IP header plus 8 bytes, which means we have
- * enough room for a UDP header. Just verify the UDP length field so we
- * can mess around with the payload.
- */
- if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
- net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
- &iph->saddr, &iph->daddr);
- return NF_DROP;
- }
-
- if (!skb_make_writable(skb, skb->len))
- return NF_DROP;
-
- spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, skb);
- spin_unlock_bh(&snmp_lock);
- return ret;
-}
-
-static const struct nf_conntrack_expect_policy snmp_exp_policy = {
- .max_expected = 0,
- .timeout = 180,
-};
-
-static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp_trap",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init nf_nat_snmp_basic_init(void)
-{
- BUG_ON(nf_nat_snmp_hook != NULL);
- RCU_INIT_POINTER(nf_nat_snmp_hook, help);
-
- return nf_conntrack_helper_register(&snmp_trap_helper);
-}
-
-static void __exit nf_nat_snmp_basic_fini(void)
-{
- RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
- synchronize_rcu();
- nf_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(nf_nat_snmp_basic_init);
-module_exit(nf_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
new file mode 100644
index 000000000000..b6e277093e7e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c
@@ -0,0 +1,235 @@
+/*
+ * nf_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
+#include "nf_nat_snmp_basic-asn1.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+
+static DEFINE_SPINLOCK(snmp_lock);
+
+struct snmp_ctx {
+ unsigned char *begin;
+ __sum16 *check;
+ __be32 from;
+ __be32 to;
+};
+
+static void fast_csum(struct snmp_ctx *ctx, unsigned char offset)
+{
+ unsigned char s[12] = {0,};
+ int size;
+
+ if (offset & 1) {
+ memcpy(&s[1], &ctx->from, 4);
+ memcpy(&s[7], &ctx->to, 4);
+ s[0] = ~0;
+ s[1] = ~s[1];
+ s[2] = ~s[2];
+ s[3] = ~s[3];
+ s[4] = ~s[4];
+ s[5] = ~0;
+ size = 12;
+ } else {
+ memcpy(&s[0], &ctx->from, 4);
+ memcpy(&s[4], &ctx->to, 4);
+ s[0] = ~s[0];
+ s[1] = ~s[1];
+ s[2] = ~s[2];
+ s[3] = ~s[3];
+ size = 8;
+ }
+ *ctx->check = csum_fold(csum_partial(s, size,
+ ~csum_unfold(*ctx->check)));
+}
+
+int snmp_version(void *context, size_t hdrlen, unsigned char tag,
+ const void *data, size_t datalen)
+{
+ if (*(unsigned char *)data > 1)
+ return -ENOTSUPP;
+ return 1;
+}
+
+int snmp_helper(void *context, size_t hdrlen, unsigned char tag,
+ const void *data, size_t datalen)
+{
+ struct snmp_ctx *ctx = (struct snmp_ctx *)context;
+ __be32 *pdata = (__be32 *)data;
+
+ if (*pdata == ctx->from) {
+ pr_debug("%s: %pI4 to %pI4\n", __func__,
+ (void *)&ctx->from, (void *)&ctx->to);
+
+ if (*ctx->check)
+ fast_csum(ctx, (unsigned char *)data - ctx->begin);
+ *pdata = ctx->to;
+ }
+
+ return 1;
+}
+
+static int snmp_translate(struct nf_conn *ct, int dir, struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+ u16 datalen = ntohs(udph->len) - sizeof(struct udphdr);
+ char *data = (unsigned char *)udph + sizeof(struct udphdr);
+ struct snmp_ctx ctx;
+ int ret;
+
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ ctx.from = ct->tuplehash[dir].tuple.src.u3.ip;
+ ctx.to = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ } else {
+ ctx.from = ct->tuplehash[!dir].tuple.src.u3.ip;
+ ctx.to = ct->tuplehash[dir].tuple.dst.u3.ip;
+ }
+
+ if (ctx.from == ctx.to)
+ return NF_ACCEPT;
+
+ ctx.begin = (unsigned char *)udph + sizeof(struct udphdr);
+ ctx.check = &udph->check;
+ ret = asn1_ber_decoder(&nf_nat_snmp_basic_decoder, &ctx, data, datalen);
+ if (ret < 0) {
+ nf_ct_helper_log(skb, ct, "parser failed\n");
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted
+ */
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /* Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
+ nf_ct_helper_log(skb, ct, "dropping malformed packet\n");
+ return NF_DROP;
+ }
+
+ if (!skb_make_writable(skb, skb->len)) {
+ nf_ct_helper_log(skb, ct, "cannot mangle packet");
+ return NF_DROP;
+ }
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, dir, skb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+static int __init nf_nat_snmp_basic_init(void)
+{
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+
+ return nf_conntrack_helper_register(&snmp_trap_helper);
+}
+
+static void __exit nf_nat_snmp_basic_fini(void)
+{
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index f84c17763f6f..036c074736b0 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -27,40 +27,6 @@ nft_do_chain_arp(void *priv,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_arp __read_mostly = {
- .family = NFPROTO_ARP,
- .nhooks = NF_ARP_NUMHOOKS,
- .owner = THIS_MODULE,
-};
-
-static int nf_tables_arp_init_net(struct net *net)
-{
- net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.arp== NULL)
- return -ENOMEM;
-
- memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
-
- if (nft_register_afinfo(net, net->nft.arp) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.arp);
- return -ENOMEM;
-}
-
-static void nf_tables_arp_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.arp);
- kfree(net->nft.arp);
-}
-
-static struct pernet_operations nf_tables_arp_net_ops = {
- .init = nf_tables_arp_init_net,
- .exit = nf_tables_arp_exit_net,
-};
-
static const struct nf_chain_type filter_arp = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -76,22 +42,11 @@ static const struct nf_chain_type filter_arp = {
static int __init nf_tables_arp_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_arp);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_arp_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_arp);
-
- return ret;
+ return nft_register_chain_type(&filter_arp);
}
static void __exit nf_tables_arp_exit(void)
{
- unregister_pernet_subsys(&nf_tables_arp_net_ops);
nft_unregister_chain_type(&filter_arp);
}
@@ -100,4 +55,4 @@ module_exit(nf_tables_arp_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
+MODULE_ALIAS_NFT_CHAIN(3, "filter"); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index f4675253f1e6..96f955496d5f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -30,40 +30,6 @@ static unsigned int nft_do_chain_ipv4(void *priv,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_ipv4 __read_mostly = {
- .family = NFPROTO_IPV4,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
-};
-
-static int nf_tables_ipv4_init_net(struct net *net)
-{
- net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv4 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
-
- if (nft_register_afinfo(net, net->nft.ipv4) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv4);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv4_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv4);
- kfree(net->nft.ipv4);
-}
-
-static struct pernet_operations nf_tables_ipv4_net_ops = {
- .init = nf_tables_ipv4_init_net,
- .exit = nf_tables_ipv4_exit_net,
-};
-
static const struct nf_chain_type filter_ipv4 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -85,22 +51,11 @@ static const struct nf_chain_type filter_ipv4 = {
static int __init nf_tables_ipv4_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_ipv4);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_ipv4);
-
- return ret;
+ return nft_register_chain_type(&filter_ipv4);
}
static void __exit nf_tables_ipv4_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
nft_unregister_chain_type(&filter_ipv4);
}
@@ -109,4 +64,4 @@ module_exit(nf_tables_ipv4_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET);
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "filter");
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 8322f26e770e..785712be5b0d 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -766,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
- (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
+ (rs->rtt_us <= bbr->min_rtt_us ||
+ (filter_expired && !rs->is_ack_delayed))) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_jiffies32;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ff71b18d9682..cfa51cfd2d99 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
+#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -2857,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
*rexmit = REXMIT_LOST;
}
-static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
+static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk);
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+ /* If the remote keeps returning delayed ACKs, eventually
+ * the min filter would pick it up and overestimate the
+ * prop. delay when it expires. Skip suspected delayed ACKs.
+ */
+ return;
+ }
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
}
@@ -2901,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* always taken together with ACK, SACK, or TS-opts. Any negative
* values will be skipped with the seq_rtt_us < 0 check above.
*/
- tcp_update_rtt_min(sk, ca_rtt_us);
+ tcp_update_rtt_min(sk, ca_rtt_us, flag);
tcp_rtt_estimator(sk, seq_rtt_us);
tcp_set_rto(sk);
@@ -3125,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
+
+ if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
+ last_in_flight && !prior_sacked && fully_acked &&
+ sack->rate->prior_delivered + 1 == tp->delivered &&
+ !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
+ /* Conservatively mark a delayed ACK. It's typically
+ * from a lone runt packet over the round trip to
+ * a receiver w/o out-of-order or CE events.
+ */
+ flag |= FLAG_ACK_MAYBE_DELAYED;
+ }
}
if (sack->first_sackt) {
sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
@@ -3614,6 +3633,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
+ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b7c4befe67ec..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1223,8 +1223,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
if (!rcu_access_pointer(fn->leaf)) {
- atomic_inc(&rt->rt6i_ref);
- rcu_assign_pointer(fn->leaf, rt);
+ if (fn->fn_flags & RTN_TL_ROOT) {
+ /* put back null_entry for root node */
+ rcu_assign_pointer(fn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
+ } else {
+ atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
+ }
}
fn = sn;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index db99446e0276..a88480193d77 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -352,11 +352,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
goto failed_free;
+ ip6gre_tnl_link_config(nt, 1);
+
/* Can use a lockless transmit, unless we generate output sequences */
if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
@@ -1709,7 +1710,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
static int ip6gre_tap_init(struct net_device *dev)
{
- struct ip6_tnl *tunnel;
int ret;
ret = ip6gre_tunnel_init_common(dev);
@@ -1718,10 +1718,6 @@ static int ip6gre_tap_init(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
-
- ip6gre_tnl_link_config(tunnel, 1);
-
return 0;
}
@@ -1872,12 +1868,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
nt->dev = dev;
nt->net = dev_net(dev);
- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
err = register_netdevice(dev);
if (err)
goto out;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ if (tb[IFLA_MTU])
+ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 806e95375ec8..4a634b7a2c80 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -72,8 +72,9 @@ endif # NF_TABLES_IPV6
endif # NF_TABLES
config NF_FLOW_TABLE_IPV6
- select NF_FLOW_TABLE
tristate "Netfilter flow table IPv6 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
help
This option adds the flow table IPv6 support.
@@ -240,6 +241,15 @@ config IP6_NF_MATCH_RT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_MATCH_SRH
+ tristate '"srh" Segment Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ srh matching allows you to match packets based on the segment
+ routing header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# The targets
config IP6_NF_TARGET_HL
tristate '"HL" hoplimit target support'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 95611c4b39b0..d984057b8395 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6ebbef2dfb60..37fa76ee5130 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1952,7 +1952,6 @@ static int __init ip6_tables_init(void)
if (ret < 0)
goto err5;
- pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
new file mode 100644
index 000000000000..9642164107ce
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -0,0 +1,161 @@
+/* Kernel module to match Segment Routing Header (SRH) parameters. */
+
+/* Author:
+ * Ahmed Abdelsalam <amsalam20@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/ipv6.h>
+#include <net/seg6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_srh.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+/* Test a struct->mt_invflags and a boolean for inequality */
+#define NF_SRH_INVF(ptr, flag, boolean) \
+ ((boolean) ^ !!((ptr)->mt_invflags & (flag)))
+
+static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+ struct ipv6_sr_hdr *srh;
+ struct ipv6_sr_hdr _srh;
+ int hdrlen, srhoff = 0;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return false;
+ srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
+ if (!srh)
+ return false;
+
+ hdrlen = ipv6_optlen(srh);
+ if (skb->len - srhoff < hdrlen)
+ return false;
+
+ if (srh->type != IPV6_SRCRT_TYPE_4)
+ return false;
+
+ if (srh->segments_left > srh->first_segment)
+ return false;
+
+ /* Next Header matching */
+ if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
+ !(srh->nexthdr == srhinfo->next_hdr)))
+ return false;
+
+ /* Header Extension Length matching */
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
+ !(srh->hdrlen == srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
+ !(srh->hdrlen > srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
+ !(srh->hdrlen < srhinfo->hdr_len)))
+ return false;
+
+ /* Segments Left matching */
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
+ !(srh->segments_left == srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
+ !(srh->segments_left > srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
+ !(srh->segments_left < srhinfo->segs_left)))
+ return false;
+
+ /**
+ * Last Entry matching
+ * Last_Entry field was introduced in revision 6 of the SRH draft.
+ * It was called First_Segment in the previous revision
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
+ !(srh->first_segment == srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
+ !(srh->first_segment > srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
+ !(srh->first_segment < srhinfo->last_entry)))
+ return false;
+
+ /**
+ * Tag matchig
+ * Tag field was introduced in revision 6 of the SRH draft.
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_TAG)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
+ !(srh->tag == srhinfo->tag)))
+ return false;
+ return true;
+}
+
+static int srh_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+
+ if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
+ pr_err("unknown srh match flags %X\n", srhinfo->mt_flags);
+ return -EINVAL;
+ }
+
+ if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
+ pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match srh_mt6_reg __read_mostly = {
+ .name = "srh",
+ .family = NFPROTO_IPV6,
+ .match = srh_mt6,
+ .matchsize = sizeof(struct ip6t_srh),
+ .checkentry = srh_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init srh_mt6_init(void)
+{
+ return xt_register_match(&srh_mt6_reg);
+}
+
+static void __exit srh_mt6_exit(void)
+{
+ xt_unregister_match(&srh_mt6_reg);
+}
+
+module_init(srh_mt6_init);
+module_exit(srh_mt6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match");
+MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index d4bc56443dc1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/slab.h>
@@ -11,6 +12,10 @@
static int __net_init ip6table_raw_table_init(struct net *net);
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -20,6 +25,15 @@ static const struct xt_table packet_raw = {
.table_init = ip6table_raw_table_init,
};
+static const struct xt_table packet_raw_before_defrag = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
+ .table_init = ip6table_raw_table_init,
+};
+
/* The work comes in here from netfilter.c. */
static unsigned int
ip6table_raw_hook(void *priv, struct sk_buff *skb,
@@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ const struct xt_table *table = &packet_raw;
int ret;
+ if (raw_before_defrag)
+ table = &packet_raw_before_defrag;
+
if (net->ipv6.ip6table_raw)
return 0;
- repl = ip6t_alloc_initial_table(&packet_raw);
+ repl = ip6t_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+ ret = ip6t_register_table(net, table, repl, rawtable_ops,
&net->ipv6.ip6table_raw);
kfree(repl);
return ret;
@@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
int ret;
+ const struct xt_table *table = &packet_raw;
+
+ if (raw_before_defrag) {
+ table = &packet_raw_before_defrag;
+
+ pr_info("Enabling raw table before defrag\n");
+ }
/* Register hooks */
- rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 977d8900cfd1..ce53dcfda88a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
if ((unsigned int)end > IPV6_MAXPLEN) {
pr_debug("offset is too large.\n");
- return -1;
+ return -EINVAL;
}
ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- return -1;
+ return -EPROTO;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
@@ -358,7 +358,7 @@ found:
discard_fq:
inet_frag_kill(&fq->q, &nf_frags);
err:
- return -1;
+ return -EINVAL;
}
/*
@@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
+ u16 savethdr = skb->transport_header;
struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
@@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
- if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
- ret = -EINVAL;
+ ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+ if (ret < 0) {
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
+ }
goto out_unlock;
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index b326da59257f..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv,
/* Previously seen (loopback)? */
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
+
+ if (skb->_nfct == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
#endif
err = nf_ct_frag6_gather(state->net, skb,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index 0c3b9d32f64f..fff21602875a 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -5,7 +5,6 @@
#include <linux/rhashtable.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
-#include <linux/ipv6.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 9cd45b964123..17e03589331c 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -28,40 +28,6 @@ static unsigned int nft_do_chain_ipv6(void *priv,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_ipv6 __read_mostly = {
- .family = NFPROTO_IPV6,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
-};
-
-static int nf_tables_ipv6_init_net(struct net *net)
-{
- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv6 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
-
- if (nft_register_afinfo(net, net->nft.ipv6) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv6);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv6_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv6);
- kfree(net->nft.ipv6);
-}
-
-static struct pernet_operations nf_tables_ipv6_net_ops = {
- .init = nf_tables_ipv6_init_net,
- .exit = nf_tables_ipv6_exit_net,
-};
-
static const struct nf_chain_type filter_ipv6 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -83,22 +49,11 @@ static const struct nf_chain_type filter_ipv6 = {
static int __init nf_tables_ipv6_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_ipv6);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_ipv6);
-
- return ret;
+ return nft_register_chain_type(&filter_ipv6);
}
static void __exit nf_tables_ipv6_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
nft_unregister_chain_type(&filter_ipv6);
}
@@ -107,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET6);
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0ee0fcf3abbf..9019fa98003d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -506,7 +506,7 @@ config NFT_CT
connection tracking information such as the flow state.
config NFT_FLOW_OFFLOAD
- depends on NF_CONNTRACK
+ depends on NF_CONNTRACK && NF_FLOW_TABLE
tristate "Netfilter nf_tables hardware flow offload module"
help
This option adds the "flow_offload" expression that you can use to
@@ -665,8 +665,9 @@ endif # NF_TABLES_NETDEV
endif # NF_TABLES
config NF_FLOW_TABLE_INET
- select NF_FLOW_TABLE
tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ depends on NF_FLOW_TABLE_IPV4 && NF_FLOW_TABLE_IPV6
+ select NF_FLOW_TABLE
help
This option adds the flow table mixed IPv4/IPv6 support.
@@ -674,6 +675,7 @@ config NF_FLOW_TABLE_INET
config NF_FLOW_TABLE
tristate "Netfilter flow table module"
+ depends on NF_CONNTRACK && NF_TABLES
help
This option adds the flow table core infrastructure.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 997dd387d259..0f6b8172fb9a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -140,7 +140,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
if (reg->nat_hook && orig_ops[i]->nat_hook) {
kvfree(new);
- return ERR_PTR(-EEXIST);
+ return ERR_PTR(-EBUSY);
}
if (inserted || reg->priority > orig_ops[i]->priority) {
@@ -377,8 +377,8 @@ static void nf_remove_net_hook(struct nf_hook_entries *old,
}
}
-void __nf_unregister_net_hook(struct net *net, int pf,
- const struct nf_hook_ops *reg)
+static void __nf_unregister_net_hook(struct net *net, int pf,
+ const struct nf_hook_ops *reg)
{
struct nf_hook_entries __rcu **pp;
struct nf_hook_entries *p;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 728bf31bb386..975a85a48d39 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2122,7 +2122,6 @@ ip_set_init(void)
return ret;
}
- pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -2138,3 +2137,5 @@ ip_set_fini(void)
module_init(ip_set_init);
module_exit(ip_set_fini);
+
+MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL));
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 299edc6add5a..1c98c907bc63 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -595,7 +595,6 @@ static int ip_vs_app_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_app_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_app_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index f489b8db2406..370abbf6f421 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1143,7 +1143,6 @@ static int ip_vs_conn_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_conn_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_conn_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1221,7 +1220,6 @@ static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_conn_sync_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_conn_sync_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fff213eacf2a..5ebde4b15810 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2116,7 +2116,6 @@ static int ip_vs_info_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_info_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_info_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2161,7 +2160,6 @@ static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_stats_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2230,7 +2228,6 @@ static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip_vs_stats_percpu_fops = {
- .owner = THIS_MODULE,
.open = ip_vs_stats_percpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index a95518261168..6d65389e308f 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -71,7 +71,7 @@ static inline bool already_closed(const struct nf_conn *conn)
return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
else
- return 0;
+ return false;
}
static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6a64d528d076..3d72a0842c01 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -58,8 +58,6 @@
#include "nf_internals.h"
-#define NF_CONNTRACK_VERSION "0.5.0"
-
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr) __read_mostly;
@@ -2068,10 +2066,6 @@ int nf_conntrack_init_start(void)
if (!nf_conntrack_cachep)
goto err_cachep;
- printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
- NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
- nf_conntrack_max);
-
ret = nf_conntrack_expect_init();
if (ret < 0)
goto err_expect;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d6748a8a79c5..8ef21d9f9a00 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -649,7 +649,6 @@ static int exp_open(struct inode *inode, struct file *file)
}
static const struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7c7921a53b13..dd177ebee9aa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,8 +57,6 @@
MODULE_LICENSE("GPL");
-static char __initdata version[] = "0.93";
-
static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *l4proto)
@@ -3425,7 +3423,6 @@ static int __init ctnetlink_init(void)
{
int ret;
- pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
@@ -3459,8 +3456,6 @@ err_out:
static void __exit ctnetlink_exit(void)
{
- pr_info("ctnetlink: unregistering from nfnetlink.\n");
-
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 46d32baad095..9123fdec5e14 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -382,7 +382,6 @@ static int ct_open(struct inode *inode, struct file *file)
}
static const struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
.open = ct_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -475,7 +474,6 @@ static int ct_cpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
.open = ct_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8bb152a7cca4..c2c1b16b7538 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -402,7 +402,6 @@ static int nflog_open(struct inode *inode, struct file *file)
}
static const struct file_operations nflog_file_ops = {
- .owner = THIS_MODULE,
.open = nflog_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 7f55af5f3d1a..d67a96a25a68 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -15,8 +15,6 @@
#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv6.h>
#include <net/protocol.h>
#include <net/netfilter/nf_queue.h>
#include <net/dst.h>
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 49bd8bb16b18..92139a087260 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -317,7 +317,6 @@ static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations synproxy_cpu_seq_fops = {
- .owner = THIS_MODULE,
.open = synproxy_cpu_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 336b81689ac9..0791813a1e7d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,86 +26,19 @@
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
-
-/**
- * nft_register_afinfo - register nf_tables address family info
- *
- * @afi: address family info to register
- *
- * Register the address family for use with nf_tables. Returns zero on
- * success or a negative errno code otherwise.
- */
-int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
-{
- INIT_LIST_HEAD(&afi->tables);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_add_tail_rcu(&afi->list, &net->nft.af_info);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nft_register_afinfo);
-
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
-
-/**
- * nft_unregister_afinfo - unregister nf_tables address family info
- *
- * @afi: address family info to unregister
- *
- * Unregister the address family for use with nf_tables.
- */
-void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
-{
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- __nft_release_afinfo(net, afi);
- list_del_rcu(&afi->list);
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-}
-EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
-
-static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
-{
- struct nft_af_info *afi;
-
- list_for_each_entry(afi, &net->nft.af_info, list) {
- if (afi->family == family)
- return afi;
- }
- return NULL;
-}
-
-static struct nft_af_info *
-nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
-{
- struct nft_af_info *afi;
-
- afi = nft_afinfo_lookup(net, family);
- if (afi != NULL)
- return afi;
-#ifdef CONFIG_MODULES
- if (autoload) {
- nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-afinfo-%u", family);
- nfnl_lock(NFNL_SUBSYS_NFTABLES);
- afi = nft_afinfo_lookup(net, family);
- if (afi != NULL)
- return ERR_PTR(-EAGAIN);
- }
-#endif
- return ERR_PTR(-EAFNOSUPPORT);
-}
+static u64 table_handle;
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- struct nft_af_info *afi,
+ u8 family,
struct nft_table *table,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
ctx->net = net;
- ctx->afi = afi;
+ ctx->family = family;
ctx->table = table;
ctx->chain = chain;
ctx->nla = nla;
@@ -385,30 +318,61 @@ static int nft_delflowtable(struct nft_ctx *ctx,
* Tables
*/
-static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+static struct nft_table *nft_table_lookup(const struct net *net,
const struct nlattr *nla,
- u8 genmask)
+ u8 family, u8 genmask)
{
struct nft_table *table;
- list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(table, &net->nft.tables, list) {
if (!nla_strcmp(nla, table->name) &&
+ table->family == family &&
+ nft_active_genmask(table, genmask))
+ return table;
+ }
+ return NULL;
+}
+
+static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
nft_active_genmask(table, genmask))
return table;
}
return NULL;
}
-static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+static struct nft_table *nf_tables_table_lookup(const struct net *net,
const struct nlattr *nla,
- u8 genmask)
+ u8 family, u8 genmask)
{
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- table = nft_table_lookup(afi, nla, genmask);
+ table = nft_table_lookup(net, nla, family, genmask);
+ if (table != NULL)
+ return table;
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_table *nf_tables_table_lookup_byhandle(const struct net *net,
+ const struct nlattr *nla,
+ u8 genmask)
+{
+ struct nft_table *table;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ table = nft_table_lookup_byhandle(net, nla, genmask);
if (table != NULL)
return table;
@@ -423,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nf_chain_type *
-__nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
{
int i;
@@ -436,22 +400,20 @@ __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
}
static const struct nf_chain_type *
-nf_tables_chain_type_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla,
- bool autoload)
+nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload)
{
const struct nf_chain_type *type;
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return type;
#ifdef CONFIG_MODULES
if (autoload) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nft-chain-%u-%.*s", afi->family,
+ request_module("nft-chain-%u-%.*s", family,
nla_len(nla), (const char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- type = __nf_tables_chain_type_lookup(afi->family, nla);
+ type = __nf_tables_chain_type_lookup(nla, family);
if (type != NULL)
return ERR_PTR(-EAGAIN);
}
@@ -463,6 +425,7 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
+ [NFTA_TABLE_HANDLE] = { .type = NLA_U64 },
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -484,7 +447,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
- nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
+ nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
+ nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
+ NFTA_TABLE_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -509,7 +474,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
goto err;
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table);
+ event, 0, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -526,7 +491,6 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
@@ -535,30 +499,27 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (!nft_is_active(net, table))
- continue;
- if (nf_tables_fill_table_info(skb, net,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWTABLE,
- NLM_F_MULTI,
- afi->family, table) < 0)
- goto done;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, table))
+ continue;
+ if (nf_tables_fill_table_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWTABLE, NLM_F_MULTI,
+ table->family, table) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
done:
rcu_read_unlock();
@@ -573,7 +534,6 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
@@ -586,11 +546,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -611,10 +568,7 @@ err:
return err;
}
-static void _nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table,
- u32 cnt)
+static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
{
struct nft_chain *chain;
u32 i = 0;
@@ -632,9 +586,7 @@ static void _nf_tables_table_disable(struct net *net,
}
}
-static int nf_tables_table_enable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static int nf_tables_table_enable(struct net *net, struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
@@ -654,15 +606,13 @@ static int nf_tables_table_enable(struct net *net,
return 0;
err:
if (i)
- _nf_tables_table_disable(net, afi, table, i);
+ nft_table_disable(net, table, i);
return err;
}
-static void nf_tables_table_disable(struct net *net,
- const struct nft_af_info *afi,
- struct nft_table *table)
+static void nf_tables_table_disable(struct net *net, struct nft_table *table)
{
- _nf_tables_table_disable(net, afi, table, 0);
+ nft_table_disable(net, table, 0);
}
static int nf_tables_updtable(struct nft_ctx *ctx)
@@ -691,7 +641,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
+ ret = nf_tables_table_enable(ctx->net, ctx->table);
if (ret >= 0) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
nft_trans_table_enable(trans) = true;
@@ -716,19 +666,14 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
const struct nlattr *name;
- struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
int err;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
name = nla[NFTA_TABLE_NAME];
- table = nf_tables_table_lookup(afi, name, genmask);
+ table = nf_tables_table_lookup(net, name, family, genmask);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
@@ -738,7 +683,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -748,39 +693,35 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
return -EINVAL;
}
- err = -EAFNOSUPPORT;
- if (!try_module_get(afi->owner))
- goto err1;
-
err = -ENOMEM;
table = kzalloc(sizeof(*table), GFP_KERNEL);
if (table == NULL)
- goto err2;
+ goto err_kzalloc;
table->name = nla_strdup(name, GFP_KERNEL);
if (table->name == NULL)
- goto err3;
+ goto err_strdup;
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
INIT_LIST_HEAD(&table->flowtables);
+ table->family = family;
table->flags = flags;
+ table->handle = ++table_handle;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
- goto err4;
+ goto err_trans;
- list_add_tail_rcu(&table->list, &afi->tables);
+ list_add_tail_rcu(&table->list, &net->nft.tables);
return 0;
-err4:
+err_trans:
kfree(table->name);
-err3:
+err_strdup:
kfree(table);
-err2:
- module_put(afi->owner);
-err1:
+err_kzalloc:
return err;
}
@@ -846,30 +787,28 @@ out:
static int nft_flush(struct nft_ctx *ctx, int family)
{
- struct nft_af_info *afi;
struct nft_table *table, *nt;
const struct nlattr * const *nla = ctx->nla;
int err = 0;
- list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
- if (family != AF_UNSPEC && afi->family != family)
+ list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
+ if (family != AF_UNSPEC && table->family != family)
continue;
- ctx->afi = afi;
- list_for_each_entry_safe(table, nt, &afi->tables, list) {
- if (!nft_is_active_next(ctx->net, table))
- continue;
+ ctx->family = table->family;
- if (nla[NFTA_TABLE_NAME] &&
- nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
- continue;
+ if (!nft_is_active_next(ctx->net, table))
+ continue;
- ctx->table = table;
+ if (nla[NFTA_TABLE_NAME] &&
+ nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+ continue;
- err = nft_flush_table(ctx);
- if (err < 0)
- goto out;
- }
+ ctx->table = table;
+
+ err = nft_flush_table(ctx);
+ if (err < 0)
+ goto out;
}
out:
return err;
@@ -882,20 +821,23 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
- if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+ nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+ if (family == AF_UNSPEC ||
+ (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
return nft_flush(&ctx, family);
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
+ if (nla[NFTA_TABLE_HANDLE])
+ table = nf_tables_table_lookup_byhandle(net,
+ nla[NFTA_TABLE_HANDLE],
+ genmask);
+ else
+ table = nf_tables_table_lookup(net, nla[NFTA_TABLE_NAME],
+ family, genmask);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -903,7 +845,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
table->use > 0)
return -EBUSY;
- ctx.afi = afi;
+ ctx.family = family;
ctx.table = table;
return nft_flush_table(&ctx);
@@ -915,7 +857,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
kfree(ctx->table->name);
kfree(ctx->table);
- module_put(ctx->afi->owner);
}
int nft_register_chain_type(const struct nf_chain_type *ctype)
@@ -1116,7 +1057,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
goto err;
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table,
+ event, 0, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -1134,7 +1075,6 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
unsigned int idx = 0, s_idx = cb->args[0];
@@ -1144,31 +1084,30 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(chain, &table->chains, list) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (!nft_is_active(net, chain))
- continue;
- if (nf_tables_fill_chain_info(skb, net,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWCHAIN,
- NLM_F_MULTI,
- afi->family, table, chain) < 0)
- goto done;
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, chain))
+ continue;
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWCHAIN,
+ NLM_F_MULTI,
+ table->family, table,
+ chain) < 0)
+ goto done;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
done:
@@ -1184,7 +1123,6 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
struct sk_buff *skb2;
@@ -1198,11 +1136,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1310,8 +1245,8 @@ struct nft_chain_hook {
static int nft_chain_parse_hook(struct net *net,
const struct nlattr * const nla[],
- struct nft_af_info *afi,
- struct nft_chain_hook *hook, bool create)
+ struct nft_chain_hook *hook, u8 family,
+ bool create)
{
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nf_chain_type *type;
@@ -1328,15 +1263,12 @@ static int nft_chain_parse_hook(struct net *net,
return -EINVAL;
hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
- if (hook->num >= afi->nhooks)
- return -EINVAL;
-
hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
- type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+ type = chain_type[family][NFT_CHAIN_T_DEFAULT];
if (nla[NFTA_CHAIN_TYPE]) {
- type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
- create);
+ type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ family, create);
if (IS_ERR(type))
return PTR_ERR(type);
}
@@ -1353,7 +1285,7 @@ static int nft_chain_parse_hook(struct net *net,
hook->type = type;
hook->dev = NULL;
- if (afi->flags & NFT_AF_NEEDS_DEV) {
+ if (family == NFPROTO_NETDEV) {
char ifname[IFNAMSIZ];
if (!ha[NFTA_HOOK_DEV]) {
@@ -1388,7 +1320,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
- struct nft_af_info *afi = ctx->afi;
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
@@ -1402,7 +1333,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
- err = nft_chain_parse_hook(net, nla, afi, &hook, create);
+ err = nft_chain_parse_hook(net, nla, &hook, family, create);
if (err < 0)
return err;
@@ -1495,7 +1426,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (!nft_is_base_chain(chain))
return -EBUSY;
- err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+ err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
create);
if (err < 0)
return err;
@@ -1574,7 +1505,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
const struct nlattr * uninitialized_var(name);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
u8 policy = NF_ACCEPT;
@@ -1584,11 +1514,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1628,7 +1555,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
}
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain != NULL) {
if (nlh->nlmsg_flags & NLM_F_EXCL)
@@ -1649,24 +1576,26 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule;
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ u64 handle;
u32 use;
int err;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_CHAIN_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+ } else {
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+ }
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -1674,7 +1603,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
use = chain->use;
list_for_each_entry(rule, &chain->rules, list) {
@@ -1839,7 +1768,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
- type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
+ type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -2062,7 +1991,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
goto err;
err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->afi->family, ctx->table,
+ event, 0, ctx->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
@@ -2086,7 +2015,6 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_rule_dump_ctx *ctx = cb->data;
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -2097,39 +2025,37 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx && ctx->table &&
- strcmp(ctx->table, table->name) != 0)
+ if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+ continue;
+
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (ctx && ctx->chain &&
+ strcmp(ctx->chain, chain->name) != 0)
continue;
- list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain &&
- strcmp(ctx->chain, chain->name) != 0)
- continue;
-
- list_for_each_entry_rcu(rule, &chain->rules, list) {
- if (!nft_is_active(net, rule))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWRULE,
- NLM_F_MULTI | NLM_F_APPEND,
- afi->family, table, chain, rule) < 0)
- goto done;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
+ if (!nft_is_active(net, rule))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWRULE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family,
+ table, chain, rule) < 0)
+ goto done;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
}
@@ -2159,7 +2085,6 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
@@ -2203,11 +2128,8 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return netlink_dump_start(nlsk, skb, nlh, &c);
}
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2264,7 +2186,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
+ int family = nfmsg->nfgen_family;
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2280,11 +2202,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2323,7 +2242,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
n = 0;
size = 0;
@@ -2447,18 +2366,14 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
int family = nfmsg->nfgen_family, err = 0;
struct nft_ctx ctx;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_RULE_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -2469,7 +2384,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2636,6 +2551,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_HANDLE] = { .type = NLA_U64 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2649,26 +2565,17 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct nft_af_info *afi = NULL;
+ int family = nfmsg->nfgen_family;
struct nft_table *table = NULL;
- if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
- }
-
if (nla[NFTA_SET_TABLE] != NULL) {
- if (afi == NULL)
- return -EAFNOSUPPORT;
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
- genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
}
- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
return 0;
}
@@ -2688,6 +2595,22 @@ static struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-ENOENT);
}
+static struct nft_set *nf_tables_set_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_set *set;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(set, &table->sets, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
+ nft_active_genmask(set, genmask))
+ return set;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
const struct nlattr *nla,
u8 genmask)
@@ -2795,7 +2718,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->nfgen_family = ctx->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
@@ -2803,6 +2726,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
+ NFTA_SET_PAD))
+ goto nla_put_failure;
if (set->flags != 0)
if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
goto nla_put_failure;
@@ -2887,10 +2813,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nft_set *set;
unsigned int idx, s_idx = cb->args[0];
- struct nft_af_info *afi;
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
- int cur_family = cb->args[3];
struct nft_ctx *ctx = cb->data, ctx_set;
if (cb->args[1])
@@ -2899,51 +2823,44 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (ctx->afi && ctx->afi != afi)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (ctx->family != NFPROTO_UNSPEC &&
+ ctx->family != table->family)
continue;
- if (cur_family) {
- if (afi->family != cur_family)
+ if (ctx->table && ctx->table != table)
+ continue;
+
+ if (cur_table) {
+ if (cur_table != table)
continue;
- cur_family = 0;
+ cur_table = NULL;
}
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (ctx->table && ctx->table != table)
- continue;
+ idx = 0;
+ list_for_each_entry_rcu(set, &table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (!nft_is_active(net, set))
+ goto cont;
- if (cur_table) {
- if (cur_table != table)
- continue;
+ ctx_set = *ctx;
+ ctx_set.table = table;
+ ctx_set.family = table->family;
- cur_table = NULL;
+ if (nf_tables_fill_set(skb, &ctx_set, set,
+ NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ cb->args[2] = (unsigned long) table;
+ goto done;
}
- idx = 0;
- list_for_each_entry_rcu(set, &table->sets, list) {
- if (idx < s_idx)
- goto cont;
- if (!nft_is_active(net, set))
- goto cont;
-
- ctx_set = *ctx;
- ctx_set.table = table;
- ctx_set.afi = afi;
- if (nf_tables_fill_set(skb, &ctx_set, set,
- NFT_MSG_NEWSET,
- NLM_F_MULTI) < 0) {
- cb->args[0] = idx;
- cb->args[2] = (unsigned long) table;
- cb->args[3] = afi->family;
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
- if (s_idx)
- s_idx = 0;
+ idx++;
}
+ if (s_idx)
+ s_idx = 0;
}
cb->args[1] = 1;
done:
@@ -3041,8 +2958,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
+ int family = nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3149,15 +3066,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
@@ -3223,6 +3137,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
+ set->handle = nf_tables_alloc_handle(table);
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -3280,7 +3195,10 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+ if (nla[NFTA_SET_HANDLE])
+ set = nf_tables_set_lookup_byhandle(ctx.table, nla[NFTA_SET_HANDLE], genmask);
+ else
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -3415,19 +3333,15 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- struct nft_af_info *afi;
+ int family = nfmsg->nfgen_family;
struct nft_table *table;
- afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
- genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
return 0;
}
@@ -3532,7 +3446,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
struct net *net = sock_net(skb->sk);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_set *set;
struct nft_set_dump_args args;
@@ -3544,21 +3457,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
int event;
rcu_read_lock();
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (afi != dump_ctx->ctx.afi)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
+ dump_ctx->ctx.family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- if (table != dump_ctx->ctx.table)
- continue;
+ if (table != dump_ctx->ctx.table)
+ continue;
- list_for_each_entry_rcu(set, &table->sets, list) {
- if (set == dump_ctx->set) {
- set_found = true;
- break;
- }
+ list_for_each_entry_rcu(set, &table->sets, list) {
+ if (set == dump_ctx->set) {
+ set_found = true;
+ break;
}
- break;
}
break;
}
@@ -3578,7 +3489,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = afi->family;
+ nfmsg->nfgen_family = table->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
@@ -3641,7 +3552,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
goto nla_put_failure;
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->nfgen_family = ctx->family;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
@@ -3998,7 +3909,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
.net = ctx->net,
- .afi = ctx->afi,
+ .family = ctx->family,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
};
@@ -4417,6 +4328,21 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nf_tables_obj_lookup);
+struct nft_object *nf_tables_obj_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla,
+ u32 objtype, u8 genmask)
+{
+ struct nft_object *obj;
+
+ list_for_each_entry(obj, &table->objects, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
+ objtype == obj->ops->type->type &&
+ nft_active_genmask(obj, genmask))
+ return obj;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
[NFTA_OBJ_TABLE] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -4424,6 +4350,7 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
.len = NFT_OBJ_MAXNAMELEN - 1 },
[NFTA_OBJ_TYPE] = { .type = NLA_U32 },
[NFTA_OBJ_DATA] = { .type = NLA_NESTED },
+ [NFTA_OBJ_HANDLE] = { .type = NLA_U64},
};
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
@@ -4529,7 +4456,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
const struct nft_object_type *type;
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_object *obj;
struct nft_ctx ctx;
@@ -4541,11 +4467,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_DATA])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -4563,7 +4486,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
return 0;
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
type = nft_obj_type_get(objtype);
if (IS_ERR(type))
@@ -4575,6 +4498,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err1;
}
obj->table = table;
+ obj->handle = nf_tables_alloc_handle(table);
+
obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
if (!obj->name) {
err = -ENOMEM;
@@ -4621,7 +4546,9 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
- nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
+ nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
+ nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
+ NFTA_OBJ_PAD))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -4640,7 +4567,6 @@ struct nft_obj_filter {
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- const struct nft_af_info *afi;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct nft_obj_filter *filter = cb->data;
@@ -4655,38 +4581,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(obj, &table->objects, list) {
- if (!nft_is_active(net, obj))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
- strcmp(filter->table, table->name))
- goto cont;
- if (filter &&
- filter->type != NFT_OBJECT_UNSPEC &&
- obj->ops->type->type != filter->type)
- goto cont;
+ list_for_each_entry_rcu(obj, &table->objects, list) {
+ if (!nft_is_active(net, obj))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
+ if (filter &&
+ filter->type != NFT_OBJECT_UNSPEC &&
+ obj->ops->type->type != filter->type)
+ goto cont;
- if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWOBJ,
- NLM_F_MULTI | NLM_F_APPEND,
- afi->family, table, obj, reset) < 0)
- goto done;
+ if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWOBJ,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family, table,
+ obj, reset) < 0)
+ goto done;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
done:
@@ -4738,7 +4663,6 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
int family = nfmsg->nfgen_family;
- const struct nft_af_info *afi;
const struct nft_table *table;
struct nft_object *obj;
struct sk_buff *skb2;
@@ -4769,11 +4693,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_TYPE])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -4819,32 +4740,33 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_object *obj;
struct nft_ctx ctx;
u32 objtype;
if (!nla[NFTA_OBJ_TYPE] ||
- !nla[NFTA_OBJ_NAME])
+ (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_OBJ_TABLE], family,
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ if (nla[NFTA_OBJ_HANDLE])
+ obj = nf_tables_obj_lookup_byhandle(table, nla[NFTA_OBJ_HANDLE],
+ objtype, genmask);
+ else
+ obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME],
+ objtype, genmask);
if (IS_ERR(obj))
return PTR_ERR(obj);
if (obj->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nft_delobj(&ctx, obj);
}
@@ -4882,7 +4804,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->afi->family, ctx->report, GFP_KERNEL);
+ ctx->family, ctx->report, GFP_KERNEL);
}
/*
@@ -4910,6 +4832,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
[NFTA_FLOWTABLE_NAME] = { .type = NLA_STRING,
.len = NFT_NAME_MAXLEN - 1 },
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
+ [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
};
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
@@ -4927,6 +4850,20 @@ struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nf_tables_flowtable_lookup);
+struct nft_flowtable *
+nf_tables_flowtable_lookup_byhandle(const struct nft_table *table,
+ const struct nlattr *nla, u8 genmask)
+{
+ struct nft_flowtable *flowtable;
+
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
+ nft_active_genmask(flowtable, genmask))
+ return flowtable;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
#define NFT_FLOWTABLE_DEVICE_MAX 8
static int nf_tables_parse_devices(const struct nft_ctx *ctx,
@@ -4993,7 +4930,7 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
return -EINVAL;
hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
- if (hooknum >= ctx->afi->nhooks)
+ if (hooknum != NF_NETDEV_INGRESS)
return -EINVAL;
priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
@@ -5009,6 +4946,8 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
goto err1;
}
+ flowtable->hooknum = hooknum;
+ flowtable->priority = priority;
flowtable->ops = ops;
flowtable->ops_len = n;
@@ -5029,33 +4968,31 @@ err1:
return err;
}
-static const struct nf_flowtable_type *
-__nft_flowtable_type_get(const struct nft_af_info *afi)
+static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
list_for_each_entry(type, &nf_tables_flowtables, list) {
- if (afi->family == type->family)
+ if (family == type->family)
return type;
}
return NULL;
}
-static const struct nf_flowtable_type *
-nft_flowtable_type_get(const struct nft_af_info *afi)
+static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
- type = __nft_flowtable_type_get(afi);
+ type = __nft_flowtable_type_get(family);
if (type != NULL && try_module_get(type->owner))
return type;
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
- request_module("nf-flowtable-%u", afi->family);
+ request_module("nf-flowtable-%u", family);
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- if (__nft_flowtable_type_get(afi))
+ if (__nft_flowtable_type_get(family))
return ERR_PTR(-EAGAIN);
}
#endif
@@ -5067,15 +5004,12 @@ void nft_flow_table_iterate(struct net *net,
void *data)
{
struct nft_flowtable *flowtable;
- const struct nft_af_info *afi;
const struct nft_table *table;
rcu_read_lock();
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
- iter(&flowtable->data, data);
- }
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ iter(&flowtable->data, data);
}
}
rcu_read_unlock();
@@ -5106,7 +5040,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_ctx ctx;
int err, i, k;
@@ -5116,11 +5049,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
!nla[NFTA_FLOWTABLE_HOOK])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -5137,20 +5067,22 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
return 0;
}
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
if (!flowtable)
return -ENOMEM;
flowtable->table = table;
+ flowtable->handle = nf_tables_alloc_handle(table);
+
flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
if (!flowtable->name) {
err = -ENOMEM;
goto err1;
}
- type = nft_flowtable_type_get(afi);
+ type = nft_flowtable_type_get(family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err2;
@@ -5210,26 +5142,28 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_ctx ctx;
- afi = nf_tables_afinfo_lookup(net, family, true);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
- genmask);
+ if (nla[NFTA_FLOWTABLE_HANDLE])
+ flowtable = nf_tables_flowtable_lookup_byhandle(table,
+ nla[NFTA_FLOWTABLE_HANDLE],
+ genmask);
+ else
+ flowtable = nf_tables_flowtable_lookup(table,
+ nla[NFTA_FLOWTABLE_NAME],
+ genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
if (flowtable->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nft_delflowtable(&ctx, flowtable);
}
@@ -5256,7 +5190,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
- nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)))
+ nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
+ nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
+ NFTA_FLOWTABLE_PAD))
goto nla_put_failure;
nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
@@ -5298,40 +5234,37 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
- const struct nft_af_info *afi;
const struct nft_table *table;
rcu_read_lock();
cb->seq = net->nft.base_seq;
- list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
- if (family != NFPROTO_UNSPEC && family != afi->family)
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ if (family != NFPROTO_UNSPEC && family != table->family)
continue;
- list_for_each_entry_rcu(table, &afi->tables, list) {
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
- if (!nft_is_active(net, flowtable))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
- strcmp(filter->table, table->name))
- goto cont;
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ if (!nft_is_active(net, flowtable))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (filter && filter->table[0] &&
+ strcmp(filter->table, table->name))
+ goto cont;
- if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWFLOWTABLE,
- NLM_F_MULTI | NLM_F_APPEND,
- afi->family, flowtable) < 0)
- goto done;
+ if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWFLOWTABLE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family, flowtable) < 0)
+ goto done;
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
- idx++;
- }
+ idx++;
}
}
done:
@@ -5384,7 +5317,6 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
u8 genmask = nft_genmask_cur(net);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
- const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
int err;
@@ -5410,17 +5342,14 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
if (!nla[NFTA_FLOWTABLE_NAME])
return -EINVAL;
- afi = nf_tables_afinfo_lookup(net, family, false);
- if (IS_ERR(afi))
- return PTR_ERR(afi);
-
- table = nf_tables_table_lookup(afi, nla[NFTA_FLOWTABLE_TABLE], genmask);
+ table = nf_tables_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE],
+ family, genmask);
if (IS_ERR(table))
return PTR_ERR(table);
flowtable = nf_tables_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
genmask);
- if (IS_ERR(table))
+ if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
@@ -5457,7 +5386,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
ctx->seq, event, 0,
- ctx->afi->family, flowtable);
+ ctx->family, flowtable);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -5535,17 +5464,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct nft_flowtable *flowtable;
struct nft_table *table;
- struct nft_af_info *afi;
if (event != NETDEV_UNREGISTER)
return 0;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
- list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(flowtable, &table->flowtables, list) {
- nft_flowtable_event(event, dev, flowtable);
- }
+ list_for_each_entry(table, &dev_net(dev)->nft.tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ nft_flowtable_event(event, dev, flowtable);
}
}
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
@@ -5798,7 +5724,6 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_trans_table_update(trans)) {
if (!nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -5960,7 +5885,6 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
if (nft_trans_table_update(trans)) {
if (nft_trans_table_enable(trans)) {
nf_tables_table_disable(net,
- trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -6563,20 +6487,6 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-static int __net_init nf_tables_init_net(struct net *net)
-{
- INIT_LIST_HEAD(&net->nft.af_info);
- INIT_LIST_HEAD(&net->nft.commit_list);
- net->nft.base_seq = 1;
- return 0;
-}
-
-static void __net_exit nf_tables_exit_net(struct net *net)
-{
- WARN_ON_ONCE(!list_empty(&net->nft.af_info));
- WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
-}
-
int __nft_release_basechain(struct nft_ctx *ctx)
{
struct nft_rule *rule, *nr;
@@ -6597,8 +6507,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
-/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
-static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+static void __nft_release_tables(struct net *net)
{
struct nft_flowtable *flowtable, *nf;
struct nft_table *table, *nt;
@@ -6608,10 +6517,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
struct nft_set *set, *ns;
struct nft_ctx ctx = {
.net = net,
- .afi = afi,
};
- list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+ ctx.family = table->family;
+
list_for_each_entry(chain, &table->chains, list)
nf_tables_unregister_hook(net, table, chain);
list_for_each_entry(flowtable, &table->flowtables, list)
@@ -6652,6 +6562,21 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
}
}
+static int __net_init nf_tables_init_net(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nft.tables);
+ INIT_LIST_HEAD(&net->nft.commit_list);
+ net->nft.base_seq = 1;
+ return 0;
+}
+
+static void __net_exit nf_tables_exit_net(struct net *net)
+{
+ __nft_release_tables(net);
+ WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.exit = nf_tables_exit_net,
@@ -6678,7 +6603,6 @@ static int __init nf_tables_module_init(void)
register_netdevice_notifier(&nf_tables_flowtable_notifier);
- pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
return register_pernet_subsys(&nf_tables_net_ops);
err3:
nf_tables_core_module_exit();
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 58b9be7480bb..e30c7da09d0d 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -38,40 +38,6 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_inet __read_mostly = {
- .family = NFPROTO_INET,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
-};
-
-static int __net_init nf_tables_inet_init_net(struct net *net)
-{
- net->nft.inet = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.inet == NULL)
- return -ENOMEM;
- memcpy(net->nft.inet, &nft_af_inet, sizeof(nft_af_inet));
-
- if (nft_register_afinfo(net, net->nft.inet) < 0)
- goto err;
-
- return 0;
-
-err:
- kfree(net->nft.inet);
- return -ENOMEM;
-}
-
-static void __net_exit nf_tables_inet_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.inet);
- kfree(net->nft.inet);
-}
-
-static struct pernet_operations nf_tables_inet_net_ops = {
- .init = nf_tables_inet_init_net,
- .exit = nf_tables_inet_exit_net,
-};
-
static const struct nf_chain_type filter_inet = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -93,22 +59,11 @@ static const struct nf_chain_type filter_inet = {
static int __init nf_tables_inet_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_inet);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_inet_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_inet);
-
- return ret;
+ return nft_register_chain_type(&filter_inet);
}
static void __exit nf_tables_inet_exit(void)
{
- unregister_pernet_subsys(&nf_tables_inet_net_ops);
nft_unregister_chain_type(&filter_inet);
}
@@ -117,4 +72,4 @@ module_exit(nf_tables_inet_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(1);
+MODULE_ALIAS_NFT_CHAIN(1, "filter");
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 42f6f6d42a6d..4041fafca934 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -38,41 +38,6 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
return nft_do_chain(&pkt, priv);
}
-static struct nft_af_info nft_af_netdev __read_mostly = {
- .family = NFPROTO_NETDEV,
- .nhooks = NF_NETDEV_NUMHOOKS,
- .owner = THIS_MODULE,
- .flags = NFT_AF_NEEDS_DEV,
-};
-
-static int nf_tables_netdev_init_net(struct net *net)
-{
- net->nft.netdev = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.netdev == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.netdev, &nft_af_netdev, sizeof(nft_af_netdev));
-
- if (nft_register_afinfo(net, net->nft.netdev) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.netdev);
- return -ENOMEM;
-}
-
-static void nf_tables_netdev_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.netdev);
- kfree(net->nft.netdev);
-}
-
-static struct pernet_operations nf_tables_netdev_net_ops = {
- .init = nf_tables_netdev_init_net,
- .exit = nf_tables_netdev_exit_net,
-};
-
static const struct nf_chain_type nft_filter_chain_netdev = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -109,7 +74,6 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain, *nr;
struct nft_ctx ctx = {
@@ -121,20 +85,18 @@ static int nf_tables_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
- ctx.afi = afi;
- if (afi->family != NFPROTO_NETDEV)
+ list_for_each_entry(table, &ctx.net->nft.tables, list) {
+ if (table->family != NFPROTO_NETDEV)
continue;
- list_for_each_entry(table, &afi->tables, list) {
- ctx.table = table;
- list_for_each_entry_safe(chain, nr, &table->chains, list) {
- if (!nft_is_base_chain(chain))
- continue;
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry_safe(chain, nr, &table->chains, list) {
+ if (!nft_is_base_chain(chain))
+ continue;
- ctx.chain = chain;
- nft_netdev_event(event, dev, &ctx);
- }
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
}
}
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
@@ -154,27 +116,21 @@ static int __init nf_tables_netdev_init(void)
if (ret)
return ret;
- ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret)
- goto err1;
-
ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
if (ret)
- goto err2;
+ goto err_register_netdevice_notifier;
return 0;
-err2:
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
-err1:
+err_register_netdevice_notifier:
nft_unregister_chain_type(&nft_filter_chain_netdev);
+
return ret;
}
static void __exit nf_tables_netdev_exit(void)
{
unregister_netdevice_notifier(&nf_tables_netdev_notifier);
- unregister_pernet_subsys(&nf_tables_netdev_net_ops);
nft_unregister_chain_type(&nft_filter_chain_netdev);
}
@@ -183,4 +139,4 @@ module_exit(nf_tables_netdev_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_FAMILY(5); /* NFPROTO_NETDEV */
+MODULE_ALIAS_NFT_CHAIN(5, "filter"); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 733d3e4a30d8..03ead8a9e90c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -37,8 +37,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
rcu_dereference_protected(table[(id)].subsys, \
lockdep_nfnl_is_held((id)))
-static char __initdata nfversion[] = "0.30";
-
static struct {
struct mutex mutex;
const struct nfnetlink_subsystem __rcu *subsys;
@@ -580,13 +578,11 @@ static int __init nfnetlink_init(void)
for (i=0; i<NFNL_SUBSYS_COUNT; i++)
mutex_init(&table[i].mutex);
- pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
return register_pernet_subsys(&nfnetlink_net_ops);
}
static void __exit nfnetlink_exit(void)
{
- pr_info("Removing netfilter NETLINK layer.\n");
unregister_pernet_subsys(&nfnetlink_net_ops);
}
module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c45e6d4358ab..88d427f9f9e6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -527,7 +527,6 @@ static int __init nfnl_acct_init(void)
goto err_out;
}
- pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
@@ -543,7 +542,6 @@ err_out:
static void __exit nfnl_acct_exit(void)
{
- pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
unregister_pernet_subsys(&nfnl_acct_ops);
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 32b1c0b44e79..95b04702a655 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -615,8 +615,6 @@ err_out:
static void __exit cttimeout_exit(void)
{
- pr_info("cttimeout: unregistering from nfnetlink.\n");
-
nfnetlink_subsys_unregister(&cttimeout_subsys);
unregister_pernet_subsys(&cttimeout_ops);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e955bec0acc6..7b46aa4c478d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1054,7 +1054,6 @@ static int nful_open(struct inode *inode, struct file *file)
}
static const struct file_operations nful_file_ops = {
- .owner = THIS_MODULE,
.open = nful_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 2db35f2d553d..8bba23160a68 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1477,7 +1477,6 @@ static int nfqnl_open(struct inode *inode, struct file *file)
}
static const struct file_operations nfqnl_file_ops = {
- .owner = THIS_MODULE,
.open = nfqnl_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index dcff0dc8d28b..8e23726b9081 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -144,7 +144,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
{
par->net = ctx->net;
par->table = ctx->table->name;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case AF_INET:
entry->e4.ip.proto = proto;
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -175,7 +175,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
} else {
par->hook_mask = 0;
}
- par->family = ctx->afi->family;
+ par->family = ctx->family;
par->nft_compat = true;
}
@@ -267,7 +267,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
par.net = ctx->net;
par.target = target;
par.targinfo = info;
- par.family = ctx->afi->family;
+ par.family = ctx->family;
if (par.target->destroy != NULL)
par.target->destroy(&par);
@@ -358,7 +358,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
{
par->net = ctx->net;
par->table = ctx->table->name;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case AF_INET:
entry->e4.ip.proto = proto;
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
@@ -389,7 +389,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
} else {
par->hook_mask = 0;
}
- par->family = ctx->afi->family;
+ par->family = ctx->family;
par->nft_compat = true;
}
@@ -446,7 +446,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
par.net = ctx->net;
par.match = match;
par.matchinfo = info;
- par.family = ctx->afi->family;
+ par.family = ctx->family;
if (par.match->destroy != NULL)
par.match->destroy(&par);
@@ -648,7 +648,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
mt_name = nla_data(tb[NFTA_MATCH_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
- family = ctx->afi->family;
+ family = ctx->family;
/* Re-use the existing match if it's already loaded. */
list_for_each_entry(nft_match, &nft_match_list, head) {
@@ -733,7 +733,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
tg_name = nla_data(tb[NFTA_TARGET_NAME]);
rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
- family = ctx->afi->family;
+ family = ctx->family;
/* Re-use the existing target if it's already loaded. */
list_for_each_entry(nft_target, &nft_target_list, head) {
@@ -812,8 +812,6 @@ static int __init nft_compat_module_init(void)
goto err_target;
}
- pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
-
return ret;
err_target:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2647b895f4b0..6ab274b14484 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -405,7 +405,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case NFPROTO_IPV4:
len = FIELD_SIZEOF(struct nf_conntrack_tuple,
src.u3.ip);
@@ -456,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nf_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
if (err < 0)
return err;
@@ -550,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
- err = nf_ct_netns_get(ctx->net, ctx->afi->family);
+ err = nf_ct_netns_get(ctx->net, ctx->family);
if (err < 0)
goto err1;
@@ -564,7 +564,7 @@ err1:
static void nft_ct_get_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- nf_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->family);
}
static void nft_ct_set_destroy(const struct nft_ctx *ctx,
@@ -573,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
struct nft_ct *priv = nft_expr_priv(expr);
__nft_ct_set_destroy(ctx, priv);
- nf_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->family);
}
static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -734,7 +734,7 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
struct nft_ct_helper_obj *priv = nft_obj_data(obj);
struct nf_conntrack_helper *help4, *help6;
char name[NF_CT_HELPER_NAME_LEN];
- int family = ctx->afi->family;
+ int family = ctx->family;
if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
return -EINVAL;
@@ -753,14 +753,14 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
switch (family) {
case NFPROTO_IPV4:
- if (ctx->afi->family == NFPROTO_IPV6)
+ if (ctx->family == NFPROTO_IPV6)
return -EINVAL;
help4 = nf_conntrack_helper_try_module_get(name, family,
priv->l4proto);
break;
case NFPROTO_IPV6:
- if (ctx->afi->family == NFPROTO_IPV4)
+ if (ctx->family == NFPROTO_IPV4)
return -EINVAL;
help6 = nf_conntrack_helper_try_module_get(name, family,
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index ec0fd78231d8..fc83e29d6634 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -164,7 +164,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
}
priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
- err = nft_validate_register_load(priv->sreg_key, set->klen);;
+ err = nft_validate_register_load(priv->sreg_key, set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index dd38785dfed9..4503b8dcf9c0 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -151,7 +151,7 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
priv->flowtable = flowtable;
flowtable->use++;
- return nf_ct_netns_get(ctx->net, ctx->afi->family);
+ return nf_ct_netns_get(ctx->net, ctx->family);
}
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
@@ -160,7 +160,7 @@ static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
struct nft_flow_offload *priv = nft_expr_priv(expr);
priv->flowtable->use--;
- nf_ct_netns_put(ctx->net, ctx->afi->family);
+ nf_ct_netns_put(ctx->net, ctx->family);
}
static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 6f6e64423643..a27be36dc0af 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -112,7 +112,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
- err = nf_logger_find_get(ctx->afi->family, li->type);
+ err = nf_logger_find_get(ctx->family, li->type);
if (err < 0)
goto err1;
@@ -133,7 +133,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
- nf_logger_put(ctx->afi->family, li->type);
+ nf_logger_put(ctx->family, li->type);
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 6ac03d4266c9..9d8655bc1bea 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -73,7 +73,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
}
}
- return nf_ct_netns_get(ctx->net, ctx->afi->family);
+ return nf_ct_netns_get(ctx->net, ctx->family);
}
EXPORT_SYMBOL_GPL(nft_masq_init);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1a91e676f13e..8fb91940e2e7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -339,7 +339,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
if (priv->key != NFT_META_SECPATH)
return 0;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case NFPROTO_NETDEV:
hooks = 1 << NF_NETDEV_INGRESS;
break;
@@ -370,7 +370,7 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
if (priv->key != NFT_META_PKTTYPE)
return 0;
- switch (ctx->afi->family) {
+ switch (ctx->family) {
case NFPROTO_BRIDGE:
hooks = 1 << NF_BR_PRE_ROUTING;
break;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index ed548d06b6dd..1f36954c2ba9 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -142,7 +142,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != ctx->afi->family)
+ if (family != ctx->family)
return -EOPNOTSUPP;
switch (family) {
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 1e66538bf0ff..c64cbe78dee7 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -75,7 +75,7 @@ int nft_redir_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return nf_ct_netns_get(ctx->net, ctx->afi->family);
+ return nf_ct_netns_get(ctx->net, ctx->family);
}
EXPORT_SYMBOL_GPL(nft_redir_init);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 10c19a3f4cbd..0b56bf05c169 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1082,10 +1082,10 @@ struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
{
struct xt_table *t = xt_find_table_lock(net, af, name);
-#ifdef CONFIG_MODULE
+#ifdef CONFIG_MODULES
if (IS_ERR(t)) {
int err = request_module("%stable_%s", xt_prefix[af], name);
- if (err)
+ if (err < 0)
return ERR_PTR(err);
t = xt_find_table_lock(net, af, name);
}
@@ -1362,7 +1362,6 @@ static int xt_table_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_table_ops = {
- .owner = THIS_MODULE,
.open = xt_table_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1498,7 +1497,6 @@ static int xt_match_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_match_ops = {
- .owner = THIS_MODULE,
.open = xt_match_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1551,7 +1549,6 @@ static int xt_target_open(struct inode *inode, struct file *file)
}
static const struct file_operations xt_target_ops = {
- .owner = THIS_MODULE,
.open = xt_target_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5da8746f7b88..ca6847403ca2 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -353,7 +353,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
static bool select_all(const struct xt_hashlimit_htable *ht,
const struct dsthash_ent *he)
{
- return 1;
+ return true;
}
static bool select_gc(const struct xt_hashlimit_htable *ht,
@@ -1266,7 +1266,6 @@ static int dl_proc_open(struct inode *inode, struct file *file)
}
static const struct file_operations dl_file_ops_v2 = {
- .owner = THIS_MODULE,
.open = dl_proc_open_v2,
.read = seq_read,
.llseek = seq_lseek,
@@ -1274,7 +1273,6 @@ static const struct file_operations dl_file_ops_v2 = {
};
static const struct file_operations dl_file_ops_v1 = {
- .owner = THIS_MODULE,
.open = dl_proc_open_v1,
.read = seq_read,
.llseek = seq_lseek,
@@ -1282,7 +1280,6 @@ static const struct file_operations dl_file_ops_v1 = {
};
static const struct file_operations dl_file_ops = {
- .owner = THIS_MODULE,
.open = dl_proc_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c
index 000e70377f85..7ca64a50db04 100644
--- a/net/netfilter/xt_ipcomp.c
+++ b/net/netfilter/xt_ipcomp.c
@@ -58,7 +58,7 @@ static bool comp_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
pr_debug("Dropping evil IPComp tinygram.\n");
par->hotdrop = true;
- return 0;
+ return false;
}
return spi_match(compinfo->spis[0], compinfo->spis[1],
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 54cbf5b9864c..2ad445c1d27c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2424,6 +2424,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
while (skb->len >= nlmsg_total_size(0)) {
int msglen;
+ memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;
@@ -2438,7 +2439,6 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
goto ack;
- memset(&extack, 0, sizeof(extack));
err = cb(skb, nlh, &extack);
if (err == -EINTR)
goto skip;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 86d6e9d2cf00..f5d293416f46 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -122,7 +122,8 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
- u32 prio, struct tcf_chain *chain)
+ u32 prio, struct tcf_chain *chain,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
int err;
@@ -148,6 +149,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
module_put(tp->ops->owner);
err = -EAGAIN;
} else {
+ NL_SET_ERR_MSG(extack, "TC classifier not found");
err = -ENOENT;
}
goto errout;
@@ -935,7 +937,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, bool unicast, bool *last)
+ u32 parent, void *fh, bool unicast, bool *last,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -947,11 +950,12 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
}
- err = tp->ops->delete(tp, fh, last);
+ err = tp->ops->delete(tp, fh, last, extack);
if (err) {
kfree_skb(skb);
return err;
@@ -960,8 +964,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (unicast)
return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+ return err;
}
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
@@ -1021,8 +1028,10 @@ replay:
if (prio == 0) {
switch (n->nlmsg_type) {
case RTM_DELTFILTER:
- if (protocol || t->tcm_handle || tca[TCA_KIND])
+ if (protocol || t->tcm_handle || tca[TCA_KIND]) {
+ NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
return -ENOENT;
+ }
break;
case RTM_NEWTFILTER:
/* If no priority is provided by the user,
@@ -1035,6 +1044,7 @@ replay:
}
/* fall-through */
default:
+ NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
return -ENOENT;
}
}
@@ -1063,23 +1073,31 @@ replay:
parent = q->handle;
} else {
q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
- if (!q)
+ if (!q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
return -EINVAL;
+ }
}
/* Is it classful? */
cops = q->ops->cl_ops;
- if (!cops)
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
return -EINVAL;
+ }
- if (!cops->tcf_block)
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
return -EOPNOTSUPP;
+ }
/* Do we search for filter, attached to class? */
if (TC_H_MIN(parent)) {
cl = cops->find(q, parent);
- if (cl == 0)
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
return -ENOENT;
+ }
}
/* And the last stroke */
@@ -1097,12 +1115,14 @@ replay:
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
err = -EINVAL;
goto errout;
}
chain = tcf_chain_get(block, chain_index,
n->nlmsg_type == RTM_NEWTFILTER);
if (!chain) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
goto errout;
}
@@ -1118,6 +1138,7 @@ replay:
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
+ NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
goto errout;
}
@@ -1126,12 +1147,14 @@ replay:
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol) {
+ NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
goto errout;
}
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
@@ -1140,13 +1163,14 @@ replay:
prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain);
+ protocol, prio, chain, extack);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
goto errout;
}
tp_created = 1;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
}
@@ -1165,6 +1189,7 @@ replay:
if (n->nlmsg_type != RTM_NEWTFILTER ||
!(n->nlmsg_flags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
@@ -1176,13 +1201,15 @@ replay:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
tcf_proto_destroy(tp);
+ NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
err = tfilter_del_notify(net, skb, n, tp, block,
- q, parent, fh, false, &last);
+ q, parent, fh, false, &last,
+ extack);
if (err)
goto errout;
if (last) {
@@ -1193,15 +1220,19 @@ replay:
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, block, q, parent,
fh, RTM_NEWTFILTER, true);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
goto errout;
default:
+ NL_SET_ERR_MSG(extack, "Invalid netlink message type");
err = -EINVAL;
goto errout;
}
}
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
- n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
+ n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
+ extack);
if (err == 0) {
if (tp_created)
tcf_chain_tp_insert(chain, &chain_info, tp);
@@ -1392,7 +1423,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
+ struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -1425,8 +1457,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
}
#else
if ((exts->action && tb[exts->action]) ||
- (exts->police && tb[exts->police]))
+ (exts->police && tb[exts->police])) {
+ NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
return -EOPNOTSUPP;
+ }
#endif
return 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 5f169ded347e..6088be65d167 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -130,7 +130,8 @@ static void basic_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = arg;
@@ -152,11 +153,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -175,7 +177,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -221,7 +224,8 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
fnew->handle = idr_index;
}
- err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err < 0) {
if (!fold)
idr_remove_ext(&head->handle_idr, fnew->handle);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index cf72aefcf98d..988ad45d78b8 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -186,10 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
return 0;
}
+static u32 cls_bpf_flags(u32 flags)
+{
+ return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
+}
+
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+ if (prog && oldprog &&
+ cls_bpf_flags(prog->gen_flags) !=
+ cls_bpf_flags(oldprog->gen_flags))
return -EINVAL;
if (prog && tc_skip_hw(prog->gen_flags))
@@ -295,7 +302,8 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
__cls_bpf_delete_prog(prog);
}
-static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -403,7 +411,8 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, bool ovr)
+ struct nlattr **tb, struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
bool is_bpf, is_ebpf, have_exts = false;
u32 gen_flags = 0;
@@ -414,7 +423,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
if (ret < 0)
return ret;
@@ -452,7 +461,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *oldprog = *arg;
@@ -500,7 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
prog->handle = handle;
}
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (ret < 0)
goto errout_idr;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 309d5899265f..1b54fbfca414 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -91,7 +91,8 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -121,7 +122,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
+ extack);
if (err < 0)
goto errout;
@@ -154,7 +156,8 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
}
}
-static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 28cd6fb52c16..64c24b488058 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -401,7 +401,7 @@ static void flow_destroy_filter(struct rcu_head *head)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
@@ -454,7 +454,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err2;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
+ extack);
if (err < 0)
goto err2;
@@ -574,7 +575,8 @@ err1:
return err;
}
-static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = arg;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f61df19b1026..c6ac4a612c4a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -526,13 +526,14 @@ static void fl_set_key_ip(struct nlattr **tb,
}
static int fl_set_key(struct net *net, struct nlattr **tb,
- struct fl_flow_key *key, struct fl_flow_key *mask)
+ struct fl_flow_key *key, struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
{
__be16 ethertype;
int ret = 0;
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FLOWER_INDEV]) {
- int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
if (err < 0)
return err;
key->indev_ifindex = err;
@@ -827,11 +828,12 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -840,7 +842,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &f->res, base);
}
- err = fl_set_key(net, tb, &f->key, &mask->key);
+ err = fl_set_key(net, tb, &f->key, &mask->key, extack);
if (err)
return err;
@@ -853,7 +855,7 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
@@ -916,7 +918,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
- err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err)
goto errout_idr;
@@ -983,7 +986,8 @@ errout_tb:
return err;
}
-static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 20f0de1a960a..94d159a8869a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -172,7 +172,8 @@ static void fw_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
@@ -218,13 +219,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
- struct nlattr **tca, unsigned long base, bool ovr)
+ struct nlattr **tca, unsigned long base, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
+ extack);
if (err < 0)
return err;
@@ -236,7 +239,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FW_INDEV]) {
int ret;
- ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
+ ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
if (ret < 0)
return ret;
f->ifindex = ret;
@@ -257,7 +260,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr)
+ bool ovr, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -296,7 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -345,7 +348,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
f->id = handle;
f->tp = tp;
- err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
+ err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index d0e57c86636f..f67d3d7fcf40 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -142,11 +142,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
if (err < 0)
return err;
@@ -160,7 +161,7 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -198,12 +199,13 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
new->handle = handle;
new->flags = flags;
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+ extack);
if (err)
goto err_set_parms;
if (!tc_skip_hw(new->flags)) {
- err = mall_replace_hw_filter(tp, new, (unsigned long) new);
+ err = mall_replace_hw_filter(tp, new, (unsigned long)new);
if (err)
goto err_replace_hw_filter;
}
@@ -223,7 +225,8 @@ err_exts_init:
return err;
}
-static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a1f2b1b7c014..55467c30d524 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -316,7 +316,8 @@ static void route4_destroy(struct tcf_proto *tp)
kfree_rcu(head, rcu);
}
-static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
@@ -389,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
struct nlattr **tb, struct nlattr *est, int new,
- bool ovr)
+ bool ovr, struct netlink_ext_ack *extack)
{
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
@@ -397,7 +398,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
if (err < 0)
return err;
@@ -471,7 +472,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -515,7 +517,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], new, ovr);
+ tca[TCA_RATE], new, ovr, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index cf325625c99d..5cc0df690cff 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -350,7 +350,8 @@ static void rsvp_destroy(struct tcf_proto *tp)
kfree_rcu(data, rcu);
}
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = arg;
@@ -486,7 +487,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- void **arg, bool ovr)
+ void **arg, bool ovr, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -511,7 +512,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
if (err < 0)
goto errout2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 67467ae24c97..01a163e0b6aa 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -193,7 +193,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
@@ -246,7 +247,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
{
bool last;
- return tcindex_delete(tp, arg, &last);
+ return tcindex_delete(tp, arg, &last, NULL);
}
static void __tcindex_destroy(struct rcu_head *head)
@@ -322,7 +323,7 @@ static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_filter_result cr;
@@ -334,7 +335,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
if (err < 0)
goto errout;
@@ -520,7 +521,8 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -540,7 +542,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr);
+ tca[TCA_RATE], ovr, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 020d328d0afd..57113e936155 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -672,7 +672,8 @@ static void u32_destroy(struct tcf_proto *tp)
tp->data = NULL;
}
-static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
+ struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -688,13 +689,16 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
goto out;
}
- if (root_ht == ht)
+ if (root_ht == ht) {
+ NL_SET_ERR_MSG_MOD(extack, "Not allowed to delete root node");
return -EINVAL;
+ }
if (ht->refcnt == 1) {
ht->refcnt--;
u32_destroy_hnode(tp, ht);
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
return -EBUSY;
}
@@ -765,11 +769,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct tc_u_hnode *ht,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+ struct nlattr *est, bool ovr,
+ struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
if (err < 0)
return err;
@@ -777,14 +782,18 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
- if (TC_U32_KEY(handle))
+ if (TC_U32_KEY(handle)) {
+ NL_SET_ERR_MSG_MOD(extack, "u32 Link handle must be a hash table");
return -EINVAL;
+ }
if (handle) {
ht_down = u32_lookup_ht(ht->tp_c, handle);
- if (ht_down == NULL)
+ if (!ht_down) {
+ NL_SET_ERR_MSG_MOD(extack, "Link hash table not found");
return -EINVAL;
+ }
ht_down->refcnt++;
}
@@ -802,7 +811,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_U32_INDEV]) {
int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
+ ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
if (ret < 0)
return -EINVAL;
n->ifindex = ret;
@@ -893,7 +902,8 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr)
+ struct nlattr **tca, void **arg, bool ovr,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -907,28 +917,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
size_t size;
#endif
- if (opt == NULL)
- return handle ? -EINVAL : 0;
+ if (!opt) {
+ if (handle) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter handle requires options");
+ return -EINVAL;
+ } else {
+ return 0;
+ }
+ }
- err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
+ err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, extack);
if (err < 0)
return err;
if (tb[TCA_U32_FLAGS]) {
flags = nla_get_u32(tb[TCA_U32_FLAGS]);
- if (!tc_flags_valid(flags))
+ if (!tc_flags_valid(flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
return -EINVAL;
+ }
}
n = *arg;
if (n) {
struct tc_u_knode *new;
- if (TC_U32_KEY(n->handle) == 0)
+ if (TC_U32_KEY(n->handle) == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Key node id cannot be zero");
return -EINVAL;
+ }
- if (n->flags != flags)
+ if (n->flags != flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
+ }
new = u32_init_knode(tp, n);
if (!new)
@@ -936,7 +958,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
err = u32_set_parms(net, tp, base,
rtnl_dereference(n->ht_up), new, tb,
- tca[TCA_RATE], ovr);
+ tca[TCA_RATE], ovr, extack);
if (err) {
u32_destroy_key(tp, new, false);
@@ -962,10 +984,14 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (tb[TCA_U32_DIVISOR]) {
unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
- if (--divisor > 0x100)
+ if (--divisor > 0x100) {
+ NL_SET_ERR_MSG_MOD(extack, "Exceeded maximum 256 hash buckets");
return -EINVAL;
- if (TC_U32_KEY(handle))
+ }
+ if (TC_U32_KEY(handle)) {
+ NL_SET_ERR_MSG_MOD(extack, "Divisor can only be used on a hash table");
return -EINVAL;
+ }
ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
@@ -1011,20 +1037,26 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
htid = ht->handle;
} else {
ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
- if (ht == NULL)
+ if (!ht) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified hash table not found");
return -EINVAL;
+ }
}
} else {
ht = rtnl_dereference(tp->root);
htid = ht->handle;
}
- if (ht->divisor < TC_U32_HASH(htid))
+ if (ht->divisor < TC_U32_HASH(htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Specified hash table buckets exceed configured value");
return -EINVAL;
+ }
if (handle) {
- if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
+ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
+ }
handle = htid | TC_U32_NODE(handle);
err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
handle, handle + 1,
@@ -1035,6 +1067,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
handle = gen_new_kid(ht, htid);
if (tb[TCA_U32_SEL] == NULL) {
+ NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
err = -EINVAL;
goto erridr;
}
@@ -1083,7 +1116,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
+ err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr,
+ extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
diff --git a/net/socket.c b/net/socket.c
index fbfae1ed3ff5..1536515b6437 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2613,15 +2613,6 @@ out_fs:
core_initcall(sock_init); /* early initcall */
-static int __init jit_init(void)
-{
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
- bpf_jit_enable = 1;
-#endif
- return 0;
-}
-pure_initcall(jit_init);
-
#ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq)
{
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e07ee3ae0023..736719c8314e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
crypto_info = &ctx->crypto_send;
/* Currently we don't support set crypto info more than one time */
- if (TLS_CRYPTO_INFO_READY(crypto_info))
+ if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+ rc = -EBUSY;
goto out;
+ }
rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
case TLS_CIPHER_AES_GCM_128: {
if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
}
default:
rc = -EINVAL;
- goto out;
+ goto err_crypto_info;
}
/* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
struct tls_context *ctx;
int rc = 0;
+ /* The TLS ulp is currently supported only for TCP sockets
+ * in ESTABLISHED state.
+ * Supporting sockets in LISTEN state will require us
+ * to modify the accept implementation to clone rather then
+ * share the ulp context.
+ */
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTSUPP;
+
/* allocate tls context */
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9773571b6a34..61f394d369bf 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -681,18 +681,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
}
default:
rc = -EINVAL;
- goto out;
+ goto free_priv;
}
ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
ctx->tag_size = tag_size;
ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
ctx->iv_size = iv_size;
- ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
- GFP_KERNEL);
+ ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
if (!ctx->iv) {
rc = -ENOMEM;
- goto out;
+ goto free_priv;
}
memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +739,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
if (!rc)
- goto out;
+ return 0;
free_aead:
crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +750,9 @@ free_rec_seq:
free_iv:
kfree(ctx->iv);
ctx->iv = NULL;
+free_priv:
+ kfree(ctx->priv_ctx);
+ ctx->priv_ctx = NULL;
out:
return rc;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b48eb6d104c9..ab0c687d0c44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9835,7 +9835,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
*/
if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
rdev->ops->get_station) {
- struct station_info sinfo;
+ struct station_info sinfo = {};
u8 *mac_addr;
mac_addr = wdev->current_bss->pub.bssid;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7ca04a7de85a..05186a47878f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- /* we are under RTNL - globally locked - so can use a static struct */
- static struct station_info sinfo;
+ struct station_info sinfo = {};
u8 addr[ETH_ALEN];
int err;
diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c
index 12e1024069c2..0c12048ac79f 100644
--- a/samples/bpf/xdp2skb_meta_kern.c
+++ b/samples/bpf/xdp2skb_meta_kern.c
@@ -35,15 +35,17 @@ int _xdp_mark(struct xdp_md *ctx)
void *data, *data_end;
int ret;
- /* Reserve space in-front data pointer for our meta info.
+ /* Reserve space in-front of data pointer for our meta info.
* (Notice drivers not supporting data_meta will fail here!)
*/
ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
if (ret < 0)
return XDP_ABORTED;
- /* For some unknown reason, these ctx pointers must be read
- * after bpf_xdp_adjust_meta, else verifier will reject prog.
+ /* Notice: Kernel-side verifier requires that loading of
+ * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(),
+ * as pkt-data pointers are invalidated. Helpers that require
+ * this are determined/marked by bpf_helper_changes_pkt_data()
*/
data = (void *)(unsigned long)ctx->data;
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index c969141bfa8b..211db8ded0de 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -1,6 +1,7 @@
-/* XDP monitor tool, based on tracepoints
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
*
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * XDP monitor tool, based on tracepoints
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
@@ -118,3 +119,92 @@ int trace_xdp_exception(struct xdp_exception_ctx *ctx)
return 0;
}
+
+/* Common stats data record shared with _user.c */
+struct datarec {
+ u64 processed;
+ u64 dropped;
+ u64 info;
+};
+#define MAX_CPUS 64
+
+struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_CPUS,
+};
+
+struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int to_cpu; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+ u32 to_cpu = ctx->to_cpu;
+ struct datarec *rec;
+
+ if (to_cpu >= MAX_CPUS)
+ return 1;
+
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (ctx->processed > 0)
+ rec->info += 1;
+
+ return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Count times kthread yielded CPU via schedule call */
+ if (ctx->sched)
+ rec->info++;
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index eaba165b3549..eec14520d513 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,4 +1,5 @@
-/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__=
"XDP monitor tool, based on tracepoints\n"
@@ -40,6 +41,9 @@ static const struct option long_options[] = {
{0, 0, NULL, 0 }
};
+/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
+#define EXIT_FAIL_MEM 5
+
static void usage(char *argv[])
{
int i;
@@ -108,23 +112,93 @@ static const char *action2str(int action)
return NULL;
}
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 dropped;
+ __u64 info;
+};
+#define MAX_CPUS 64
+
+/* Userspace structs for collection of stats from maps */
struct record {
- __u64 counter;
__u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct u64rec {
+ __u64 processed;
+};
+struct record_u64 {
+ /* record for _kern side __u64 values */
+ __u64 timestamp;
+ struct u64rec total;
+ struct u64rec *cpu;
};
struct stats_record {
- struct record xdp_redir[REDIR_RES_MAX];
- struct record xdp_exception[XDP_ACTION_MAX];
+ struct record_u64 xdp_redirect[REDIR_RES_MAX];
+ struct record_u64 xdp_exception[XDP_ACTION_MAX];
+ struct record xdp_cpumap_kthread;
+ struct record xdp_cpumap_enqueue[MAX_CPUS];
};
-static void stats_print_headers(bool err_only)
+static bool map_collect_record(int fd, __u32 key, struct record *rec)
{
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_info = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
- printf("%-14s %-11s %-10s %-18s %-9s\n",
- "ACTION", "result", "pps ", "pps-human-readable", "measure-period");
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].dropped = values[i].dropped;
+ sum_dropped += values[i].dropped;
+ rec->cpu[i].info = values[i].info;
+ sum_info += values[i].info;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.info = sum_info;
+ return true;
+}
+
+static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct u64rec values[nr_cpus];
+ __u64 sum_total = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_total += values[i].processed;
+ }
+ rec->total.processed = sum_total;
+ return true;
}
static double calc_period(struct record *r, struct record *p)
@@ -139,77 +213,203 @@ static double calc_period(struct record *r, struct record *p)
return period_;
}
-static double calc_pps(struct record *r, struct record *p, double period)
+static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double calc_pps(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_drop(struct datarec *r, struct datarec *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->dropped - p->dropped;
+ pps = packets / period;
+ }
+ return pps;
+}
+
+static double calc_info(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
- packets = r->counter - p->counter;
+ packets = r->info - p->info;
pps = packets / period;
}
return pps;
}
-static void stats_print(struct stats_record *rec,
- struct stats_record *prev,
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
bool err_only)
{
- double period = 0, pps = 0;
- struct record *r, *p;
- int i = 0;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int rec_i = 0, i, to_cpu;
+ double t = 0, pps = 0;
- char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n";
+ /* Header */
+ printf("%-15s %-7s %-12s %-12s %-9s\n",
+ "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
/* tracepoint: xdp:xdp_redirect_* */
if (err_only)
- i = REDIR_ERROR;
-
- for (; i < REDIR_RES_MAX; i++) {
- r = &rec->xdp_redir[i];
- p = &prev->xdp_redir[i];
-
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ rec_i = REDIR_ERROR;
+
+ for (; rec_i < REDIR_RES_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_redirect[rec_i];
+ prev = &stats_prev->xdp_redirect[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "XDP_REDIRECT", i,
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0,
+ err2str(rec_i));
}
- printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period);
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
+ printf(fmt2, "XDP_REDIRECT", "total",
+ rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
}
/* tracepoint: xdp:xdp_exception */
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- r = &rec->xdp_exception[i];
- p = &prev->xdp_exception[i];
- if (p->timestamp) {
- period = calc_period(r, p);
- pps = calc_pps(r, p, period);
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ struct record_u64 *rec, *prev;
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
+
+ rec = &stats_rec->xdp_exception[rec_i];
+ prev = &stats_prev->xdp_exception[rec_i];
+ t = calc_period_u64(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct u64rec *r = &rec->cpu[i];
+ struct u64rec *p = &prev->cpu[i];
+
+ pps = calc_pps_u64(r, p, t);
+ if (pps > 0)
+ printf(fmt1, "Exception", i,
+ 0.0, pps, err2str(rec_i));
}
+ pps = calc_pps_u64(&rec->total, &prev->total, t);
if (pps > 0)
- printf(fmt, action2str(i), "Exception",
- pps, pps, period);
+ printf(fmt2, "Exception", "total",
+ 0.0, pps, action2str(rec_i));
}
- printf("\n");
-}
-static __u64 get_key32_value64_percpu(int fd, __u32 key)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus];
- __u64 sum = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return 0;
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+ char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
+ struct record *rec, *prev;
+ char *info_str = "";
+ double drop, info;
+
+ rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
+ prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ if (pps > 0)
+ printf(fmt1, "cpumap-enqueue",
+ i, to_cpu, pps, drop, info, info_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ if (pps > 0) {
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0) {
+ info_str = "bulk-average";
+ info = pps / info; /* calc average bulk size */
+ }
+ printf(fmt2, "cpumap-enqueue",
+ "sum", to_cpu, pps, drop, info, info_str);
+ }
}
- /* Sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- sum += values[i];
+ /* cpumap kthread stats */
+ {
+ char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
+ char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
+ struct record *rec, *prev;
+ double drop, info;
+ char *i_str = "";
+
+ rec = &stats_rec->xdp_cpumap_kthread;
+ prev = &stats_prev->xdp_cpumap_kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop(r, p, t);
+ info = calc_info(r, p, t);
+ if (info > 0)
+ i_str = "sched";
+ if (pps > 0)
+ printf(fmt1, "cpumap-kthread",
+ i, pps, drop, info, i_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop(&rec->total, &prev->total, t);
+ info = calc_info(&rec->total, &prev->total, t);
+ if (info > 0)
+ i_str = "sched-sum";
+ printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
- return sum;
+
+ printf("\n");
}
static bool stats_collect(struct stats_record *rec)
@@ -222,25 +422,109 @@ static bool stats_collect(struct stats_record *rec)
*/
fd = map_data[0].fd; /* map0: redirect_err_cnt */
- for (i = 0; i < REDIR_RES_MAX; i++) {
- rec->xdp_redir[i].timestamp = gettime();
- rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
- }
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
fd = map_data[1].fd; /* map1: exception_cnt */
for (i = 0; i < XDP_ACTION_MAX; i++) {
- rec->xdp_exception[i].timestamp = gettime();
- rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i);
+ map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
}
+ fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
+ for (i = 0; i < MAX_CPUS; i++)
+ map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
+
+ fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
+ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
+
return true;
}
+static void *alloc_rec_per_cpu(int record_size)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ void *array;
+ size_t size;
+
+ size = record_size * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int rec_sz;
+ int i;
+
+ /* Alloc main stats_record structure */
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+
+ /* Alloc stats stored per CPU for each record */
+ rec_sz = sizeof(struct u64rec);
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ rec_sz = sizeof(struct datarec);
+ rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
+
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ int i;
+
+ for (i = 0; i < REDIR_RES_MAX; i++)
+ free(r->xdp_redirect[i].cpu);
+
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->xdp_exception[i].cpu);
+
+ free(r->xdp_cpumap_kthread.cpu);
+
+ for (i = 0; i < MAX_CPUS; i++)
+ free(r->xdp_cpumap_enqueue[i].cpu);
+
+ free(r);
+}
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
static void stats_poll(int interval, bool err_only)
{
- struct stats_record rec, prev;
+ struct stats_record *rec, *prev;
- memset(&rec, 0, sizeof(rec));
+ rec = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(rec);
+
+ if (err_only)
+ printf("\n%s\n", __doc_err_only__);
/* Trick to pretty printf with thousands separators use %' */
setlocale(LC_NUMERIC, "en_US");
@@ -258,13 +542,15 @@ static void stats_poll(int interval, bool err_only)
fflush(stdout);
while (1) {
- memcpy(&prev, &rec, sizeof(rec));
- stats_collect(&rec);
- stats_print_headers(err_only);
- stats_print(&rec, &prev, err_only);
+ swap(&prev, &rec);
+ stats_collect(rec);
+ stats_print(rec, prev, err_only);
fflush(stdout);
sleep(interval);
}
+
+ free_stats_record(rec);
+ free_stats_record(prev);
}
static void print_bpf_prog_info(void)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index cb8997ed0149..47cddf32aeba 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -265,12 +265,18 @@ else
objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
endif
+ifdef CONFIG_MODVERSIONS
+objtool_o = $(@D)/.tmp_$(@F)
+else
+objtool_o = $(@)
+endif
+
# 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
# 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
# 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file
cmd_objtool = $(if $(patsubst y%,, \
$(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \
- $(__objtool_obj) $(objtool_args) "$(@)";)
+ $(__objtool_obj) $(objtool_args) "$(objtool_o)";)
objtool_obj = $(if $(patsubst y%,, \
$(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \
$(__objtool_obj))
@@ -286,16 +292,16 @@ objtool_dep = $(objtool_obj) \
define rule_cc_o_c
$(call echo-cmd,checksrc) $(cmd_checksrc) \
$(call cmd_and_fixdep,cc_o_c) \
- $(cmd_modversions_c) \
$(cmd_checkdoc) \
$(call echo-cmd,objtool) $(cmd_objtool) \
+ $(cmd_modversions_c) \
$(call echo-cmd,record_mcount) $(cmd_record_mcount)
endef
define rule_as_o_S
$(call cmd_and_fixdep,as_o_S) \
- $(cmd_modversions_S) \
- $(call echo-cmd,objtool) $(cmd_objtool)
+ $(call echo-cmd,objtool) $(cmd_objtool) \
+ $(cmd_modversions_S)
endef
# List module undefined symbols (or empty line if not enabled)
diff --git a/scripts/decodecode b/scripts/decodecode
index 438120da1361..5ea071099330 100755
--- a/scripts/decodecode
+++ b/scripts/decodecode
@@ -59,6 +59,14 @@ disas() {
${CROSS_COMPILE}strip $1.o
fi
+ if [ "$ARCH" = "arm64" ]; then
+ if [ $width -eq 4 ]; then
+ type=inst
+ fi
+
+ ${CROSS_COMPILE}strip $1.o
+ fi
+
${CROSS_COMPILE}objdump $OBJDUMPFLAGS -S $1.o | \
grep -v "/tmp\|Disassembly\|\.text\|^$" > $1.dis 2>&1
}
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 1bf949c43b76..f6ab3ccf698f 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -96,6 +96,8 @@ def get_thread_info(task):
thread_info_addr = task.address + ia64_task_size
thread_info = thread_info_addr.cast(thread_info_ptr_type)
else:
+ if task.type.fields()[0].type == thread_info_type.get_type():
+ return task['thread_info']
thread_info = task['stack'].cast(thread_info_ptr_type)
return thread_info.dereference()
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 30044bc4f389..58c2bab4ef6e 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -172,7 +172,8 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
{
char *ptr, *pptr, *tmp;
off_t off = 0;
- int ret, flen, proglen, pass, ulen = 0;
+ unsigned int proglen;
+ int ret, flen, pass, ulen = 0;
regmatch_t pmatch[1];
unsigned long base;
regex_t regex;
@@ -199,7 +200,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
}
ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
- ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
+ ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx",
&flen, &proglen, &pass, &base);
if (ret != 4) {
regfree(&regex);
@@ -239,7 +240,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
}
assert(ulen == proglen);
- printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
+ printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
proglen, pass, flen);
printf("%lx + <x>:\n", base);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6601c95a9258..0b482c0070e0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -34,6 +34,7 @@
/* Author: Jakub Kicinski <kubakici@wp.pl> */
#include <errno.h>
+#include <fcntl.h>
#include <fts.h>
#include <libgen.h>
#include <mntent.h>
@@ -433,6 +434,77 @@ ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
return if_indextoname(ifindex, buf);
}
+static int read_sysfs_hex_int(char *path)
+{
+ char vendor_id_buf[8];
+ int len;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ p_err("Can't open %s: %s", path, strerror(errno));
+ return -1;
+ }
+
+ len = read(fd, vendor_id_buf, sizeof(vendor_id_buf));
+ close(fd);
+ if (len < 0) {
+ p_err("Can't read %s: %s", path, strerror(errno));
+ return -1;
+ }
+ if (len >= (int)sizeof(vendor_id_buf)) {
+ p_err("Value in %s too long", path);
+ return -1;
+ }
+
+ vendor_id_buf[len] = 0;
+
+ return strtol(vendor_id_buf, NULL, 0);
+}
+
+static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
+{
+ char full_path[64];
+
+ snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s",
+ devname, entry_name);
+
+ return read_sysfs_hex_int(full_path);
+}
+
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+{
+ char devname[IF_NAMESIZE];
+ int vendor_id;
+ int device_id;
+
+ if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
+ p_err("Can't get net device name for ifindex %d: %s", ifindex,
+ strerror(errno));
+ return NULL;
+ }
+
+ vendor_id = read_sysfs_netdev_hex_int(devname, "vendor");
+ if (vendor_id < 0) {
+ p_err("Can't get device vendor id for %s", devname);
+ return NULL;
+ }
+
+ switch (vendor_id) {
+ case 0x19ee:
+ device_id = read_sysfs_netdev_hex_int(devname, "device");
+ if (device_id != 0x4000 &&
+ device_id != 0x6000 &&
+ device_id != 0x6003)
+ p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+ return "NFP-6xxx";
+ default:
+ p_err("Can't get bfd arch name for device vendor id 0x%04x",
+ vendor_id);
+ return NULL;
+ }
+}
+
void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
{
char name[IF_NAMESIZE];
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 57d32e8a1391..87439320ef70 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -76,7 +76,8 @@ static int fprintf_json(void *out, const char *fmt, ...)
return 0;
}
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ const char *arch)
{
disassembler_ftype disassemble;
struct disassemble_info info;
@@ -100,6 +101,19 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
else
init_disassemble_info(&info, stdout,
(fprintf_ftype) fprintf);
+
+ /* Update architecture info for offload. */
+ if (arch) {
+ const bfd_arch_info_type *inf = bfd_scan_arch(arch);
+
+ if (inf) {
+ bfdf->arch_info = inf;
+ } else {
+ p_err("No libfd support for %s", arch);
+ return;
+ }
+ }
+
info.arch = bfd_get_arch(bfdf);
info.mach = bfd_get_mach(bfdf);
info.buffer = image;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 65b526fe6e7e..b8e9584d6246 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -121,7 +121,10 @@ int do_cgroup(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
-void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes);
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ const char *arch);
void print_hex_data_json(uint8_t *data, size_t len);
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 8d7db9d6b9cd..f95fa67bb498 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -66,6 +66,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
[BPF_MAP_TYPE_DEVMAP] = "devmap",
[BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+ [BPF_MAP_TYPE_CPUMAP] = "cpumap",
};
static unsigned int get_possible_cpus(void)
@@ -428,6 +429,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_name(json_wtr, "flags");
jsonw_printf(json_wtr, "%#x", info->map_flags);
+
+ print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
jsonw_uint_field(json_wtr, "bytes_key", info->key_size);
jsonw_uint_field(json_wtr, "bytes_value", info->value_size);
jsonw_uint_field(json_wtr, "max_entries", info->max_entries);
@@ -469,7 +473,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (*info->name)
printf("name %s ", info->name);
- printf("flags 0x%x\n", info->map_flags);
+ printf("flags 0x%x", info->map_flags);
+ print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+ printf("\n");
printf("\tkey %uB value %uB max_entries %u",
info->key_size, info->value_size, info->max_entries);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 099e21cf1b5c..e8e2baaf93c2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -776,7 +776,17 @@ static int do_dump(int argc, char **argv)
}
} else {
if (member_len == &info.jited_prog_len) {
- disasm_print_insn(buf, *member_len, opcodes);
+ const char *name = NULL;
+
+ if (info.ifindex) {
+ name = ifindex_to_bfd_name_ns(info.ifindex,
+ info.netns_dev,
+ info.netns_ino);
+ if (!name)
+ goto err_free;
+ }
+
+ disasm_print_insn(buf, *member_len, opcodes, name);
} else {
kernel_syms_load(&dd);
if (json_output)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69f96af4a569..af1f49ad8b88 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -900,6 +900,9 @@ struct xdp_md {
__u32 data;
__u32 data_end;
__u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
};
enum sk_action {
@@ -935,6 +938,9 @@ struct bpf_map_info {
__u32 max_entries;
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -956,6 +962,12 @@ struct bpf_sock_ops {
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
};
/* List of known BPF sock_ops operators.
@@ -1010,7 +1022,8 @@ struct bpf_perf_event_value {
#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
struct bpf_cgroup_dev_ctx {
- __u32 access_type; /* (access << 16) | type */
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
__u32 major;
__u32 minor;
};
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 24460155c82c..c1c338661699 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -26,6 +26,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <errno.h>
#include "elf.h"
#include "warn.h"
@@ -358,7 +359,8 @@ struct elf *elf_open(const char *name, int flags)
elf->fd = open(name, flags);
if (elf->fd == -1) {
- perror("open");
+ fprintf(stderr, "objtool: Can't open '%s': %s\n",
+ name, strerror(errno));
goto err;
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 541d9d7fad5a..1e09d77f1948 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -3,3 +3,10 @@ test_maps
test_lru_map
test_lpm_map
test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_verifier_log
+feature
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a8aa7e251c8e..3a44b655d852 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
- test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
+ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+ sample_map_ret0.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000000..0756303676ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+ __u64 key64 = 0;
+ __u32 key = 0;
+ long *value;
+
+ value = bpf_map_lookup_elem(&htab, &key);
+ if (!value)
+ return 1;
+ value = bpf_map_lookup_elem(&array, &key64);
+ if (!value)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index f61480641b6e..081510853c6d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -521,6 +521,126 @@ static void test_lpm_delete(void)
close(map_fd);
}
+static void test_lpm_get_next_key(void)
+{
+ struct bpf_lpm_trie_key *key_p, *next_key_p;
+ size_t key_size;
+ __u32 value = 0;
+ int map_fd;
+
+ key_size = sizeof(*key_p) + sizeof(__u32);
+ key_p = alloca(key_size);
+ next_key_p = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* empty tree. get_next_key should return ENOENT */
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+ errno == ENOENT);
+
+ /* get and verify the first key, get the second one should fail. */
+ key_p->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should get the first one in post order. */
+ key_p->prefixlen = 8;
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ /* add one more element (total two) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total three) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total four) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should return the first one in post order */
+ key_p->prefixlen = 22;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+ close(map_fd);
+}
+
int main(void)
{
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
@@ -545,6 +665,8 @@ int main(void)
test_lpm_delete();
+ test_lpm_get_next_key();
+
printf("test_lpm: OK\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e3c750f17cb8..833b9c1ec450 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -20,6 +20,7 @@ import os
import pprint
import random
import string
+import struct
import subprocess
import time
@@ -156,6 +157,14 @@ def bpftool_prog_list(expected=None, ns=""):
(len(progs), expected))
return progs
+def bpftool_map_list(expected=None, ns=""):
+ _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+ if expected is not None:
+ if len(maps) != expected:
+ fail(True, "%d BPF maps loaded, expected %d" %
+ (len(maps), expected))
+ return maps
+
def bpftool_prog_list_wait(expected=0, n_retry=20):
for i in range(n_retry):
nprogs = len(bpftool_prog_list())
@@ -164,6 +173,14 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+def bpftool_map_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nmaps = len(bpftool_map_list())
+ if nmaps == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
def ip(args, force=False, JSON=True, ns="", fail=True):
if force:
args = "-force " + args
@@ -193,6 +210,26 @@ def mknetns(n_retry=10):
return name
return None
+def int2str(fmt, val):
+ ret = []
+ for b in struct.pack(fmt, val):
+ ret.append(int(b))
+ return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+ inttab = []
+ for i in strtab:
+ inttab.append(int(i, 16))
+ ba = bytearray(inttab)
+ if len(strtab) == 4:
+ fmt = "I"
+ elif len(strtab) == 8:
+ fmt = "Q"
+ else:
+ raise Exception("String array of len %d can't be unpacked to an int" %
+ (len(strtab)))
+ return struct.unpack(fmt, ba)[0]
+
class DebugfsDir:
"""
Class for accessing DebugFS directories as a dictionary.
@@ -311,13 +348,13 @@ class NetdevSim:
return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
fail=fail)
- def set_xdp(self, bpf, mode, force=False, fail=True):
+ def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
- force=force, fail=fail)
+ force=force, JSON=JSON, fail=fail)
- def unset_xdp(self, mode, force=False, fail=True):
+ def unset_xdp(self, mode, force=False, JSON=True, fail=True):
return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
- force=force, fail=fail)
+ force=force, JSON=JSON, fail=fail)
def ip_link_show(self, xdp):
_, link = ip("link show dev %s" % (self['ifname']))
@@ -390,12 +427,16 @@ class NetdevSim:
################################################################################
def clean_up():
+ global files, netns, devs
+
for dev in devs:
dev.remove()
for f in files:
cmd("rm -f %s" % (f))
for ns in netns:
cmd("ip netns delete %s" % (ns))
+ files = []
+ netns = []
def pin_prog(file_name, idx=0):
progs = bpftool_prog_list(expected=(idx + 1))
@@ -405,16 +446,31 @@ def pin_prog(file_name, idx=0):
return file_name, bpf_pinned(file_name)
-def check_dev_info(other_ns, ns, pin_file=None, removed=False):
- if removed:
- bpftool_prog_list(expected=0)
- ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
- fail(ret == 0, "Showing prog with removed device did not fail")
- fail(err["error"].find("No such device") == -1,
- "Showing prog with removed device expected ENODEV, error is %s" %
- (err["error"]))
- return
- progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+def pin_map(file_name, idx=0, expected=1):
+ maps = bpftool_map_list(expected=expected)
+ m = maps[idx]
+ bpftool("map pin id %d %s" % (m["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+ bpftool_prog_list(expected=0)
+ ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+ fail(ret == 0, "Showing prog with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing prog with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+ bpftool_map_list(expected=0)
+ ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+ fail(ret == 0, "Showing map with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing map with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+ progs = bpftool_prog_list(expected=1, ns=ns)
prog = progs[0]
fail("dev" not in prog.keys(), "Device parameters not reported")
@@ -423,16 +479,17 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False):
fail("ns_dev" not in dev.keys(), "Device parameters not reported")
fail("ns_inode" not in dev.keys(), "Device parameters not reported")
- if not removed and not other_ns:
+ if not other_ns:
fail("ifname" not in dev.keys(), "Ifname not reported")
fail(dev["ifname"] != sim["ifname"],
"Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
else:
fail("ifname" in dev.keys(), "Ifname is reported for other ns")
- if removed:
- fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
- fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
- fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
+ maps = bpftool_map_list(expected=2, ns=ns)
+ for m in maps:
+ fail("dev" not in m.keys(), "Device parameters not reported")
+ fail(dev != m["dev"], "Map's device different than program's")
# Parse command line
parser = argparse.ArgumentParser()
@@ -464,7 +521,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1:
cmd("mount -t debugfs none /sys/kernel/debug")
# Check samples are compiled
-samples = ["sample_ret0.o"]
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
for s in samples:
ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
skip(ret != 0, "sample %s/%s not found, please compile it" %
@@ -739,8 +796,9 @@ try:
bpftool_prog_list_wait(expected=0)
sim = NetdevSim()
- sim.set_ethtool_tc_offloads(True)
- sim.set_xdp(obj, "offload")
+ map_obj = bpf_obj("sample_map_ret0.o")
+ start_test("Test loading program with maps...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
start_test("Test bpftool bound info reporting (own ns)...")
check_dev_info(False, "")
@@ -757,11 +815,111 @@ try:
sim.set_ns("")
check_dev_info(False, "")
- pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+ prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+ map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
sim.remove()
start_test("Test bpftool bound info reporting (removed dev)...")
- check_dev_info(True, "", pin_file=pin_file, removed=True)
+ check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+
+ start_test("Test map update (no flags)...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ maps = bpftool_map_list(expected=2)
+ array = maps[0] if maps[0]["type"] == "array" else maps[1]
+ htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, _ = bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "added too many entries")
+
+ start_test("Test map update (exists)...")
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, err = bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "updated non-existing key")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map update (noexist)...")
+ for m in maps:
+ for i in range(2):
+ ret, err = bpftool("map update id %d key %s value %s noexist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)),
+ fail=False)
+ fail(ret == 0, "updated existing key")
+ fail(err["error"].find("File exists") == -1,
+ "expected EEXIST, error is '%s'" % (err["error"]))
+
+ start_test("Test map dump...")
+ for m in maps:
+ _, entries = bpftool("map dump id %d" % (m["id"]))
+ for i in range(2):
+ key = str2int(entries[i]["key"])
+ fail(key != i, "expected key %d, got %d" % (key, i))
+ val = str2int(entries[i]["value"])
+ fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+ start_test("Test map getnext...")
+ for m in maps:
+ _, entry = bpftool("map getnext id %d" % (m["id"]))
+ key = str2int(entry["next_key"])
+ fail(key != 0, "next key %d, expected %d" % (key, 0))
+ _, entry = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 0)))
+ key = str2int(entry["next_key"])
+ fail(key != 1, "next key %d, expected %d" % (key, 1))
+ ret, err = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 1)), fail=False)
+ fail(ret == 0, "got next key past the end of map")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map delete (htab)...")
+ for i in range(2):
+ bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+ start_test("Test map delete (array)...")
+ for i in range(2):
+ ret, err = bpftool("map delete id %d key %s" %
+ (htab["id"], int2str("I", i)), fail=False)
+ fail(ret == 0, "removed entry from an array")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map remove...")
+ sim.unset_xdp("offload")
+ bpftool_map_list_wait(expected=0)
+ sim.remove()
+
+ sim = NetdevSim()
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ sim.remove()
+ bpftool_map_list_wait(expected=0)
+
+ start_test("Test map creation fail path...")
+ sim = NetdevSim()
+ sim.dfs["bpf_map_accept"] = "N"
+ ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+ fail(ret == 0,
+ "netdevsim didn't refuse to create a map with offload disabled")
print("%s: OK" % (os.path.basename(__file__)))
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 960179882a1c..fb82d29ee863 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -29,6 +29,7 @@
#include <linux/filter.h>
#include <linux/bpf_perf_event.h>
#include <linux/bpf.h>
+#include <linux/if_ether.h>
#include <bpf/bpf.h>
@@ -49,6 +50,8 @@
#define MAX_INSNS 512
#define MAX_FIXUPS 8
#define MAX_NR_MAPS 4
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
@@ -62,6 +65,7 @@ struct bpf_test {
int fixup_map_in_map[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
+ uint32_t retval;
enum {
UNDEF,
ACCEPT,
@@ -95,6 +99,94 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = -3,
+ },
+ {
+ "DIV32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "empty prog",
+ .insns = {
+ },
+ .errstr = "last insn is not an exit or jmp",
+ .result = REJECT,
+ },
+ {
+ "only exit insn",
+ .insns = {
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
},
{
"unreachable",
@@ -210,6 +302,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 1,
},
{
"test8 ld_imm64",
@@ -517,6 +610,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R0 leaks addr",
.result = ACCEPT,
.result_unpriv = REJECT,
+ .retval = POINTER_VALUE,
},
{
"check valid spill/fill, skb mark",
@@ -803,6 +897,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "R1 pointer comparison",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .retval = -ENOENT,
},
{
"jump test 4",
@@ -1823,6 +1918,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 0xfaceb00c,
},
{
"PTR_TO_STACK store/load - bad alignment on off",
@@ -1881,6 +1977,7 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 leaks addr",
+ .retval = POINTER_VALUE,
},
{
"unpriv: add const to pointer",
@@ -2054,6 +2151,7 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_hash_recalc),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -2594,6 +2692,29 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "context stores via ST",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ST stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "context stores via XADD",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
+ BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
"direct packet access: test1",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -2818,6 +2939,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
},
{
"direct packet access: test12 (and, good access)",
@@ -2842,6 +2964,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
},
{
"direct packet access: test13 (branches, good access)",
@@ -2872,6 +2995,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
},
{
"direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
@@ -2895,6 +3019,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
},
{
"direct packet access: test15 (spill with xadd)",
@@ -3181,6 +3306,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
},
{
"direct packet access: test28 (marking on <=, bad access)",
@@ -4313,7 +4439,8 @@ static struct bpf_test tests[] = {
.fixup_map1 = { 2 },
.errstr_unpriv = "R2 leaks addr into mem",
.result_unpriv = REJECT,
- .result = ACCEPT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
},
{
"leak pointer into ctx 2",
@@ -4327,7 +4454,8 @@ static struct bpf_test tests[] = {
},
.errstr_unpriv = "R10 leaks addr into mem",
.result_unpriv = REJECT,
- .result = ACCEPT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
},
{
"leak pointer into ctx 3",
@@ -5798,6 +5926,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 0 /* csum_diff of 64-byte packet */,
},
{
"helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
@@ -6166,6 +6295,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = 42 /* ultimate return value */,
},
{
"ld_ind: check calling conv, r1",
@@ -6237,6 +6367,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 1,
},
{
"check bpf_perf_event_data->sample_period byte load permitted",
@@ -6708,7 +6839,7 @@ static struct bpf_test tests[] = {
BPF_JMP_IMM(BPF_JA, 0, 0, -7),
},
.fixup_map1 = { 4 },
- .errstr = "unbounded min value",
+ .errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
},
{
@@ -7224,6 +7355,7 @@ static struct bpf_test tests[] = {
},
.fixup_map1 = { 3 },
.result = ACCEPT,
+ .retval = POINTER_VALUE,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 leaks addr as return value"
},
@@ -7244,6 +7376,7 @@ static struct bpf_test tests[] = {
},
.fixup_map1 = { 3 },
.result = ACCEPT,
+ .retval = POINTER_VALUE,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 leaks addr as return value"
},
@@ -7685,6 +7818,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = TEST_DATA_LEN,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
@@ -8610,6 +8744,127 @@ static struct bpf_test tests[] = {
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "check deducing bounds from const, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 tried to subtract pointer from scalar",
+ },
+ {
+ "check deducing bounds from const, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check deducing bounds from const, 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 tried to subtract pointer from scalar",
+ },
+ {
+ "check deducing bounds from const, 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check deducing bounds from const, 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 tried to subtract pointer from scalar",
+ },
+ {
+ "check deducing bounds from const, 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 tried to subtract pointer from scalar",
+ },
+ {
+ "check deducing bounds from const, 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "check deducing bounds from const, 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "check deducing bounds from const, 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 tried to subtract pointer from scalar",
+ },
+ {
+ "check deducing bounds from const, 10",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ /* Marks reg as unknown. */
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+ },
+ {
"bpf_exit with invalid return code. test1",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
@@ -8705,6 +8960,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .retval = 1,
},
{
"calls: overlapping caller/callee",
@@ -8900,6 +9156,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_ACT,
.result = ACCEPT,
+ .retval = TEST_DATA_LEN,
},
{
"calls: callee using args1",
@@ -8912,6 +9169,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "allowed for root only",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .retval = POINTER_VALUE,
},
{
"calls: callee using wrong args2",
@@ -8942,6 +9200,7 @@ static struct bpf_test tests[] = {
.errstr_unpriv = "allowed for root only",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
},
{
"calls: callee changing pkt pointers",
@@ -8990,6 +9249,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN,
},
{
"calls: calls with stack arith",
@@ -9008,6 +9268,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = 42,
},
{
"calls: calls with misaligned stack access",
@@ -9041,6 +9302,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = 43,
},
{
"calls: calls control flow, jump test 2",
@@ -9533,6 +9795,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_XDP,
.result = ACCEPT,
+ .retval = 42,
},
{
"calls: write into callee stack frame",
@@ -10144,6 +10407,7 @@ static struct bpf_test tests[] = {
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = POINTER_VALUE,
},
{
"calls: pkt_ptr spill into caller stack 2",
@@ -10209,6 +10473,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = 1,
},
{
"calls: pkt_ptr spill into caller stack 4",
@@ -10242,6 +10507,7 @@ static struct bpf_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
+ .retval = 1,
},
{
"calls: pkt_ptr spill into caller stack 5",
@@ -10650,10 +10916,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
int fd_prog, expected_ret, reject_from_alignment;
struct bpf_insn *prog = test->insns;
int prog_len = probe_filter_length(prog);
+ char data_in[TEST_DATA_LEN] = {};
int prog_type = test->prog_type;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
- int i;
+ uint32_t retval;
+ int i, err;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
@@ -10696,6 +10964,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
}
}
+ if (fd_prog >= 0) {
+ err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in),
+ NULL, NULL, &retval, NULL);
+ if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+ printf("Unexpected bpf_prog_test_run error\n");
+ goto fail_log;
+ }
+ if (!err && retval != test->retval &&
+ test->retval != POINTER_VALUE) {
+ printf("FAIL retval %d != %d\n", retval, test->retval);
+ goto fail_log;
+ }
+ }
(*passes)++;
printf("OK%s\n", reject_from_alignment ?
" (NOTE: reject due to unknown alignment)" : "");