diff options
author | Martin KaFai Lau <martin.lau@kernel.org> | 2023-10-24 16:05:02 -0700 |
---|---|---|
committer | Martin KaFai Lau <martin.lau@kernel.org> | 2023-10-24 16:07:48 -0700 |
commit | 22360fad5889cbefe1eca695b0cc0273ab280b56 (patch) | |
tree | 33ca776f10feda7e38956dc26edf74fd1bbc88da /tools | |
parent | 42d31dd601fa43b9afdf069d1ba410b2306a4c76 (diff) | |
parent | ace15f91e569172dac71ae0aeb3a2e76d1ce1b17 (diff) |
Merge branch 'Add bpf programmable net device'
Daniel Borkmann says:
====================
This work adds a BPF programmable device which can operate in L3 or L2
mode where the BPF program is part of the xmit routine. It's program
management is done via bpf_mprog and it comes with BPF link support.
For details see patch 1 and following. Thanks!
v3 -> v4:
- Moved netkit_release_all() into ndo_uninit (Stan)
- Two small commit msg corrections (Toke)
- Added Acked/Reviewed-by
v2 -> v3:
- Remove setting dev->min_mtu to ETH_MIN_MTU (Andrew)
- Do not populate ethtool info->version (Andrew)
- Populate netdev private data before register_netdevice (Andrew)
- Use strscpy for ifname template (Jakub)
- Use GFP_KERNEL_ACCOUNT for link kzalloc (Jakub)
- Carry and dump link attach type for bpftool (Toke)
v1 -> v2:
- Rename from meta (Toke, Andrii, Alexei)
- Reuse skb_scrub_packet (Stan)
- Remove IFF_META and use netdev_ops (Toke)
- Add comment to multicast handler (Toke)
- Remove silly version info (Toke)
- Fix attach_type_name (Quentin)
- Rework libbpf link attach api to be similar
as tcx (Andrii)
- Move flags last for bpf_netkit_opts (Andrii)
- Rebased to bpf_mprog query api changes
- Folded link support patch into main one
====================
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/bpf/bpftool/Documentation/bpftool-net.rst | 8 | ||||
-rw-r--r-- | tools/bpf/bpftool/link.c | 9 | ||||
-rw-r--r-- | tools/bpf/bpftool/net.c | 7 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 14 | ||||
-rw-r--r-- | tools/include/uapi/linux/if_link.h | 141 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 16 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 5 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 39 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 15 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/Makefile | 19 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/config | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/netlink_helpers.c | 358 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/netlink_helpers.h | 46 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/tc_helpers.h | 4 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/tc_netkit.c | 687 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_tc_link.c | 13 |
17 files changed, 1373 insertions, 10 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 5e2abd3de5ab8..dd3f9469765b2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -37,7 +37,7 @@ DESCRIPTION **bpftool net { show | list }** [ **dev** *NAME* ] List bpf program attachments in the kernel networking subsystem. - Currently, device driver xdp attachments, tcx and old-style tc + Currently, device driver xdp attachments, tcx, netkit and old-style tc classifier/action attachments, flow_dissector as well as netfilter attachments are implemented, i.e., for program types **BPF_PROG_TYPE_XDP**, **BPF_PROG_TYPE_SCHED_CLS**, @@ -52,11 +52,11 @@ DESCRIPTION bpf programs, users should consult other tools, e.g., iproute2. The current output will start with all xdp program attachments, followed by - all tcx, then tc class/qdisc bpf program attachments, then flow_dissector - and finally netfilter programs. Both xdp programs and tcx/tc programs are + all tcx, netkit, then tc class/qdisc bpf program attachments, then flow_dissector + and finally netfilter programs. Both xdp programs and tcx/netkit/tc programs are ordered based on ifindex number. If multiple bpf programs attached to the same networking device through **tc**, the order will be first - all bpf programs attached to tcx, then tc classes, then all bpf programs + all bpf programs attached to tcx, netkit, then tc classes, then all bpf programs attached to non clsact qdiscs, and finally all bpf programs attached to root and clsact qdisc. diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 4b1407b050565..a1528cde81abf 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -451,6 +451,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) show_link_ifindex_json(info->tcx.ifindex, json_wtr); show_link_attach_type_json(info->tcx.attach_type, json_wtr); break; + case BPF_LINK_TYPE_NETKIT: + show_link_ifindex_json(info->netkit.ifindex, json_wtr); + show_link_attach_type_json(info->netkit.attach_type, json_wtr); + break; case BPF_LINK_TYPE_XDP: show_link_ifindex_json(info->xdp.ifindex, json_wtr); break; @@ -791,6 +795,11 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) show_link_ifindex_plain(info->tcx.ifindex); show_link_attach_type_plain(info->tcx.attach_type); break; + case BPF_LINK_TYPE_NETKIT: + printf("\n\t"); + show_link_ifindex_plain(info->netkit.ifindex); + show_link_attach_type_plain(info->netkit.attach_type); + break; case BPF_LINK_TYPE_XDP: printf("\n\t"); show_link_ifindex_plain(info->xdp.ifindex); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 66a8ce8ae0127..968714b4c3d45 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -79,6 +79,8 @@ static const char * const attach_type_strings[] = { static const char * const attach_loc_strings[] = { [BPF_TCX_INGRESS] = "tcx/ingress", [BPF_TCX_EGRESS] = "tcx/egress", + [BPF_NETKIT_PRIMARY] = "netkit/primary", + [BPF_NETKIT_PEER] = "netkit/peer", }; const size_t net_attach_type_size = ARRAY_SIZE(attach_type_strings); @@ -506,6 +508,9 @@ static void show_dev_tc_bpf(struct ip_devname_ifindex *dev) { __show_dev_tc_bpf(dev, BPF_TCX_INGRESS); __show_dev_tc_bpf(dev, BPF_TCX_EGRESS); + + __show_dev_tc_bpf(dev, BPF_NETKIT_PRIMARY); + __show_dev_tc_bpf(dev, BPF_NETKIT_PEER); } static int show_dev_tc_bpf_classic(int sock, unsigned int nl_pid, @@ -926,7 +931,7 @@ static int do_help(int argc, char **argv) " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" " " HELP_SPEC_OPTIONS " }\n" "\n" - "Note: Only xdp, tcx, tc, flow_dissector and netfilter attachments\n" + "Note: Only xdp, tcx, tc, netkit, flow_dissector and netfilter attachments\n" " are currently supported.\n" " For progs attached to cgroups, use \"bpftool cgroup\"\n" " to dump program attachments. For program types\n" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7ba61b75bc0ef..0f6cdf52b1dab 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1052,6 +1052,8 @@ enum bpf_attach_type { BPF_CGROUP_UNIX_RECVMSG, BPF_CGROUP_UNIX_GETPEERNAME, BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_NETKIT_PRIMARY, + BPF_NETKIT_PEER, __MAX_BPF_ATTACH_TYPE }; @@ -1071,6 +1073,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETFILTER = 10, BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, + BPF_LINK_TYPE_NETKIT = 13, MAX_BPF_LINK_TYPE, }; @@ -1656,6 +1659,13 @@ union bpf_attr { __u32 flags; __u32 pid; } uprobe_multi; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } netkit; }; } link_create; @@ -6576,6 +6586,10 @@ struct bpf_link_info { __u32 ifindex; __u32 attach_type; } tcx; + struct { + __u32 ifindex; + __u32 attach_type; + } netkit; }; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 39e659c83cfd2..a0aa05a28cf29 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,23 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; +}; + +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; }; /* The struct should be in sync with struct ifmap */ @@ -350,7 +370,13 @@ enum { IFLA_GRO_MAX_SIZE, IFLA_TSO_MAX_SIZE, IFLA_TSO_MAX_SEGS, + IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + + IFLA_DEVLINK_PORT, + IFLA_GSO_IPV4_MAX_SIZE, + IFLA_GRO_IPV4_MAX_SIZE, + IFLA_DPLL_PIN, __IFLA_MAX }; @@ -539,6 +565,12 @@ enum { IFLA_BRPORT_MRP_IN_OPEN, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, + IFLA_BRPORT_MAB, + IFLA_BRPORT_MCAST_N_GROUPS, + IFLA_BRPORT_MCAST_MAX_GROUPS, + IFLA_BRPORT_NEIGH_VLAN_SUPPRESS, + IFLA_BRPORT_BACKUP_NHID, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -716,7 +748,79 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 flags; + __u16 reserved2; + __u32 ifindex; +}; + +/* netkit section */ +enum netkit_action { + NETKIT_NEXT = -1, + NETKIT_PASS = 0, + NETKIT_DROP = 2, + NETKIT_REDIRECT = 7, +}; + +enum netkit_mode { + NETKIT_L2, + NETKIT_L3, +}; + +enum { + IFLA_NETKIT_UNSPEC, + IFLA_NETKIT_PEER_INFO, + IFLA_NETKIT_PRIMARY, + IFLA_NETKIT_POLICY, + IFLA_NETKIT_PEER_POLICY, + IFLA_NETKIT_MODE, + __IFLA_NETKIT_MAX, +}; +#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) + /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -748,6 +852,8 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ + IFLA_VXLAN_LOCALBYPASS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -781,6 +887,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) @@ -826,6 +933,8 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) @@ -1162,6 +1271,17 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] @@ -1179,10 +1299,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) @@ -1281,4 +1412,14 @@ enum { #define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index b0f1913763a33..9dc9625651dcf 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -810,6 +810,22 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, tcx)) return libbpf_err(-EINVAL); break; + case BPF_NETKIT_PRIMARY: + case BPF_NETKIT_PEER: + relative_fd = OPTS_GET(opts, netkit.relative_fd, 0); + relative_id = OPTS_GET(opts, netkit.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.netkit.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.netkit.relative_fd = relative_fd; + } + attr.link_create.netkit.expected_revision = OPTS_GET(opts, netkit.expected_revision, 0); + if (!OPTS_ZEROED(opts, netkit)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 74c2887cfd24a..d0f53772bdc02 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -415,6 +415,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } tcx; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } netkit; }; size_t :0; }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a295f6acf98fe..e067be95da3c0 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -126,6 +126,8 @@ static const char * const attach_type_name[] = { [BPF_TCX_INGRESS] = "tcx_ingress", [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", + [BPF_NETKIT_PRIMARY] = "netkit_primary", + [BPF_NETKIT_PEER] = "netkit_peer", }; static const char * const link_type_name[] = { @@ -142,6 +144,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETFILTER] = "netfilter", [BPF_LINK_TYPE_TCX] = "tcx", [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", + [BPF_LINK_TYPE_NETKIT] = "netkit", }; static const char * const map_type_name[] = { @@ -8915,6 +8918,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), + SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), @@ -12126,6 +12131,40 @@ bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); } +struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_netkit_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.netkit.relative_fd = relative_fd; + link_create_opts.netkit.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); +} + struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 4753784385453..6cd9c501624f5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -800,6 +800,21 @@ LIBBPF_API struct bpf_link * bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, const struct bpf_tcx_opts *opts); +struct bpf_netkit_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_netkit_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index cc973b678a39b..b52dc28dc8af5 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -398,6 +398,7 @@ LIBBPF_1.3.0 { bpf_object__unpin; bpf_prog_detach_opts; bpf_program__attach_netfilter; + bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; ring__avail_data_size; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4225f975fce3f..9c27b67bc7b13 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -585,11 +585,20 @@ endef # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs -TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ - network_helpers.c testing_helpers.c \ - btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c disasm.c \ - json_writer.c unpriv_helpers.c \ +TRUNNER_EXTRA_SOURCES := test_progs.c \ + cgroup_helpers.c \ + trace_helpers.c \ + network_helpers.c \ + testing_helpers.c \ + btf_helpers.c \ + cap_helpers.c \ + unpriv_helpers.c \ + netlink_helpers.c \ + test_loader.c \ + xsk.c \ + disasm.c \ + json_writer.c \ + flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 02dd4409200ee..3ec5927ec3e53 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -71,6 +71,7 @@ CONFIG_NETFILTER_SYNPROXY=y CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NETKIT=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c new file mode 100644 index 0000000000000..caf36eb1d0323 --- /dev/null +++ b/tools/testing/selftests/bpf/netlink_helpers.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Taken & modified from iproute2's libnetlink.c + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <time.h> +#include <sys/socket.h> + +#include "netlink_helpers.h" + +static int rcvbuf = 1024 * 1024; + +void rtnl_close(struct rtnl_handle *rth) +{ + if (rth->fd >= 0) { + close(rth->fd); + rth->fd = -1; + } +} + +int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, + int protocol) +{ + socklen_t addr_len; + int sndbuf = 32768; + int one = 1; + + memset(rth, 0, sizeof(*rth)); + rth->proto = protocol; + rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); + if (rth->fd < 0) { + perror("Cannot open netlink socket"); + return -1; + } + if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF, + &sndbuf, sizeof(sndbuf)) < 0) { + perror("SO_SNDBUF"); + goto err; + } + if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF, + &rcvbuf, sizeof(rcvbuf)) < 0) { + perror("SO_RCVBUF"); + goto err; + } + + /* Older kernels may no support extended ACK reporting */ + setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + memset(&rth->local, 0, sizeof(rth->local)); + rth->local.nl_family = AF_NETLINK; + rth->local.nl_groups = subscriptions; + + if (bind(rth->fd, (struct sockaddr *)&rth->local, + sizeof(rth->local)) < 0) { + perror("Cannot bind netlink socket"); + goto err; + } + addr_len = sizeof(rth->local); + if (getsockname(rth->fd, (struct sockaddr *)&rth->local, + &addr_len) < 0) { + perror("Cannot getsockname"); + goto err; + } + if (addr_len != sizeof(rth->local)) { + fprintf(stderr, "Wrong address length %d\n", addr_len); + goto err; + } + if (rth->local.nl_family != AF_NETLINK) { + fprintf(stderr, "Wrong address family %d\n", + rth->local.nl_family); + goto err; + } + rth->seq = time(NULL); + return 0; +err: + rtnl_close(rth); + return -1; +} + +int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) +{ + return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE); +} + +static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags) +{ + int len; + + do { + len = recvmsg(fd, msg, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + if (len < 0) { + fprintf(stderr, "netlink receive error %s (%d)\n", + strerror(errno), errno); + return -errno; + } + if (len == 0) { + fprintf(stderr, "EOF on netlink\n"); + return -ENODATA; + } + return len; +} + +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer) +{ + struct iovec *iov = msg->msg_iov; + char *buf; + int len; + + iov->iov_base = NULL; + iov->iov_len = 0; + + len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC); + if (len < 0) + return len; + if (len < 32768) + len = 32768; + buf = malloc(len); + if (!buf) { + fprintf(stderr, "malloc error: not enough buffer\n"); + return -ENOMEM; + } + iov->iov_base = buf; + iov->iov_len = len; + len = __rtnl_recvmsg(fd, msg, 0); + if (len < 0) { + free(buf); + return len; + } + if (answer) + *answer = buf; + else + free(buf); + return len; +} + +static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err, + nl_ext_ack_fn_t errfn) +{ + fprintf(stderr, "RTNETLINK answers: %s\n", + strerror(-err->error)); +} + +static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov, + size_t iovlen, struct nlmsghdr **answer, + bool show_rtnl_err, nl_ext_ack_fn_t errfn) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec riov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + unsigned int seq = 0; + struct nlmsghdr *h; + int i, status; + char *buf; + + for (i = 0; i < iovlen; i++) { + h = iov[i].iov_base; + h->nlmsg_seq = seq = ++rtnl->seq; + if (answer == NULL) + h->nlmsg_flags |= NLM_F_ACK; + } + status = sendmsg(rtnl->fd, &msg, 0); + if (status < 0) { + perror("Cannot talk to rtnetlink"); + return -1; + } + /* change msg to use the response iov */ + msg.msg_iov = &riov; + msg.msg_iovlen = 1; + i = 0; + while (1) { +next: + status = rtnl_recvmsg(rtnl->fd, &msg, &buf); + ++i; + if (status < 0) + return status; + if (msg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "Sender address length == %d!\n", + msg.msg_namelen); + exit(1); + } + for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) { + int len = h->nlmsg_len; + int l = len - sizeof(*h); + + if (l < 0 || len > status) { + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Truncated message!\n"); + free(buf); + return -1; + } + fprintf(stderr, + "Malformed message: len=%d!\n", + len); + exit(1); + } + if (nladdr.nl_pid != 0 || + h->nlmsg_pid != rtnl->local.nl_pid || + h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) { + /* Don't forget to skip that message. */ + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + continue; + } + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + int error = err->error; + + if (l < sizeof(struct nlmsgerr)) { + fprintf(stderr, "ERROR truncated\n"); + free(buf); + return -1; + } + if (error) { + errno = -error; + if (rtnl->proto != NETLINK_SOCK_DIAG && + show_rtnl_err) + rtnl_talk_error(h, err, errfn); + } + if (i < iovlen) { + free(buf); + goto next; + } + if (error) { + free(buf); + return -i; + } + if (answer) + *answer = (struct nlmsghdr *)buf; + else + free(buf); + return 0; + } + if (answer) { + *answer = (struct nlmsghdr *)buf; + return 0; + } + fprintf(stderr, "Unexpected reply!\n"); + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + } + free(buf); + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Message truncated!\n"); + continue; + } + if (status) { + fprintf(stderr, "Remnant of size %d!\n", status); + exit(1); + } + } +} + +static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer, bool show_rtnl_err, + nl_ext_ack_fn_t errfn) +{ + struct iovec iov = { + .iov_base = n, + .iov_len = n->nlmsg_len, + }; + + return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn); +} + +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) +{ + return __rtnl_talk(rtnl, n, answer, true, NULL); +} + +int addattr(struct nlmsghdr *n, int maxlen, int type) +{ + return addattr_l(n, maxlen, type, NULL, 0); +} + +int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u8)); +} + +int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u16)); +} + +int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u32)); +} + +int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u64)); +} + +int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str) +{ + return addattr_l(n, maxlen, type, str, strlen(str)+1); +} + +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, "%s: Message exceeded bound of %d\n", + __func__, maxlen); + return -1; + } + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + if (alen) + memcpy(RTA_DATA(rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; +} + +int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len) +{ + if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { + fprintf(stderr, "%s: Message exceeded bound of %d\n", + __func__, maxlen); + return -1; + } + + memcpy(NLMSG_TAIL(n), data, len); + memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); + return 0; +} + +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest; + return n->nlmsg_len; +} diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h new file mode 100644 index 0000000000000..68116818a47e5 --- /dev/null +++ b/tools/testing/selftests/bpf/netlink_helpers.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef NETLINK_HELPERS_H +#define NETLINK_HELPERS_H + +#include <string.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +struct rtnl_handle { + int fd; + struct sockaddr_nl local; + struct sockaddr_nl peer; + __u32 seq; + __u32 dump; + int proto; + FILE *dump_fp; +#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01 +#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02 +#define RTNL_HANDLE_F_STRICT_CHK 0x04 + int flags; +}; + +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off, + const struct nlmsghdr *inner_nlh); + +int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) + __attribute__((warn_unused_result)); +void rtnl_close(struct rtnl_handle *rth); +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) + __attribute__((warn_unused_result)); + +int addattr(struct nlmsghdr *n, int maxlen, int type); +int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data); +int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data); +int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data); +int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data); +int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data); +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen); +int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len); +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type); +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest); +#endif /* NETLINK_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 67f985f7d2157..924d0e25320c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -4,6 +4,10 @@ #define TC_HELPERS #include <test_progs.h> +#ifndef loopback +# define loopback 1 +#endif + static inline __u32 id_from_prog_fd(int fd) { struct bpf_prog_info prog_info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c new file mode 100644 index 0000000000000..15ee7b2fc4106 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -0,0 +1,687 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <net/if.h> +#include <test_progs.h> + +#define netkit_peer "nk0" +#define netkit_name "nk1" + +#define ping_addr_neigh 0x0a000002 /* 10.0.0.2 */ +#define ping_addr_noneigh 0x0a000003 /* 10.0.0.3 */ + +#include "test_tc_link.skel.h" +#include "netlink_helpers.h" +#include "tc_helpers.h" + +#define ICMP_ECHO 8 + +struct icmphdr { + __u8 type; + __u8 code; + __sum16 checksum; + struct { + __be16 id; + __be16 sequence; + } echo; +}; + +struct iplink_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; +}; + +static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, + bool same_netns) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct iplink_req req = {}; + struct rtattr *linkinfo, *data; + const char *type = "netkit"; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name, + strlen(netkit_name)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + *ifindex = if_nametoindex(netkit_name); + + ASSERT_GT(*ifindex, 0, "retrieve_ifindex"); + ASSERT_OK(system("ip netns add foo"), "create netns"); + ASSERT_OK(system("ip link set dev " netkit_name " up"), + "up primary"); + ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"), + "addr primary"); + if (same_netns) { + ASSERT_OK(system("ip link set dev " netkit_peer " up"), + "up peer"); + ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), + "addr peer"); + } else { + ASSERT_OK(system("ip link set " netkit_peer " netns foo"), + "move peer"); + ASSERT_OK(system("ip netns exec foo ip link set dev " + netkit_peer " up"), "up peer"); + ASSERT_OK(system("ip netns exec foo ip addr add dev " + netkit_peer " 10.0.0.2/24"), "addr peer"); + } + return err; +} + +static void destroy_netkit(void) +{ + ASSERT_OK(system("ip link del dev " netkit_name), "del primary"); + ASSERT_OK(system("ip netns del foo"), "delete netns"); + ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex"); +} + +static int __send_icmp(__u32 dest) +{ + struct sockaddr_in addr; + struct icmphdr icmp; + int sock, ret; + + ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); + if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)")) + return ret; + + sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (!ASSERT_GE(sock, 0, "icmp_socket")) + return -errno; + + ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, + netkit_name, strlen(netkit_name) + 1); + if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)")) + goto out; + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(dest); + + memset(&icmp, 0, sizeof(icmp)); + icmp.type = ICMP_ECHO; + icmp.echo.id = 1234; + icmp.echo.sequence = 1; + + ret = sendto(sock, &icmp, sizeof(icmp), 0, + (struct sockaddr *)&addr, sizeof(addr)); + if (!ASSERT_GE(ret, 0, "icmp_sendto")) + ret = -errno; + else + ret = 0; +out: + close(sock); + return ret; +} + +static int send_icmp(void) +{ + return __send_icmp(ping_addr_neigh); +} + +void serial_test_tc_netkit_basic(void) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + BPF_NETKIT_PEER), 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + ASSERT_NEQ(lid1, lid2, "link_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +static void serial_test_tc_netkit_multi_links_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[3], link_ids[3]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + target), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + target), 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + ASSERT_NEQ(lid1, lid2, "link_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_multi_links(void) +{ + serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER); + serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER); +} + +static void serial_test_tc_netkit_multi_opts_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 pid1, pid2, fd1, fd2; + __u32 prog_ids[3]; + struct test_tc_link *skel; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + + pid1 = id_from_prog_fd(fd1); + pid2 = id_from_prog_fd(fd2); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + err = bpf_prog_attach_opts(fd1, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_fd1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd2, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_fd1; + + assert_mprog_count_ifindex(ifindex, target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_fd2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + +cleanup_fd2: + err = bpf_prog_detach_opts(fd2, ifindex, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count_ifindex(ifindex, target, 1); +cleanup_fd1: + err = bpf_prog_detach_opts(fd1, ifindex, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count_ifindex(ifindex, target, 0); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_multi_opts(void) +{ + serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER); + serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER); +} + +void serial_test_tc_netkit_device(void) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, pid2, lid1; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex, ifindex2; + + err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, + &ifindex, true); + if (err) + return; + + ifindex2 = if_nametoindex(netkit_peer); + ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + BPF_NETKIT_PEER), 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, + BPF_NETKIT_PRIMARY), 0, "tc3_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq); + ASSERT_EQ(err, -EACCES, "prog_query_should_fail"); + + err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq); + ASSERT_EQ(err, -EACCES, "prog_query_should_fail"); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +static void serial_test_tc_netkit_neigh_links_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, lid1; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_neigh_links(void) +{ + serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 30e7124c49a11..992400acb9572 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Isovalent */ #include <stdbool.h> + #include <linux/bpf.h> +#include <linux/if_ether.h> + +#include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> char LICENSE[] SEC("license") = "GPL"; @@ -12,10 +16,19 @@ bool seen_tc3; bool seen_tc4; bool seen_tc5; bool seen_tc6; +bool seen_eth; SEC("tc/ingress") int tc1(struct __sk_buff *skb) { + struct ethhdr eth = {}; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IP)) + goto out; + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth))) + goto out; + seen_eth = eth.h_proto == bpf_htons(ETH_P_IP); +out: seen_tc1 = true; return TCX_NEXT; } |