From c359821ac65b7cda0e24f75f6ffd465c3f771204 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Jan 2022 22:14:19 -0800 Subject: libbpf: streamline low-level XDP APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce 4 new netlink-based XDP APIs for attaching, detaching, and querying XDP programs: - bpf_xdp_attach; - bpf_xdp_detach; - bpf_xdp_query; - bpf_xdp_query_id. These APIs replace bpf_set_link_xdp_fd, bpf_set_link_xdp_fd_opts, bpf_get_link_xdp_id, and bpf_get_link_xdp_info APIs ([0]). The latter don't follow a consistent naming pattern and some of them use non-extensible approaches (e.g., struct xdp_link_info which can't be modified without breaking libbpf ABI). The approach I took with these low-level XDP APIs is similar to what we did with low-level TC APIs. There is a nice duality of bpf_tc_attach vs bpf_xdp_attach, and so on. I left bpf_xdp_attach() to support detaching when -1 is specified for prog_fd for generality and convenience, but bpf_xdp_detach() is preferred due to clearer naming and associated semantics. Both bpf_xdp_attach() and bpf_xdp_detach() accept the same opts struct allowing to specify expected old_prog_fd. While doing the refactoring, I noticed that old APIs require users to specify opts with old_fd == -1 to declare "don't care about already attached XDP prog fd" condition. Otherwise, FD 0 is assumed, which is essentially never an intended behavior. So I made this behavior consistent with other kernel and libbpf APIs, in which zero FD means "no FD". This seems to be more in line with the latest thinking in BPF land and should cause less user confusion, hopefully. For querying, I left two APIs, both more generic bpf_xdp_query() allowing to query multiple IDs and attach mode, but also a specialization of it, bpf_xdp_query_id(), which returns only requested prog_id. Uses of prog_id returning bpf_get_link_xdp_id() were so prevalent across selftests and samples, that it seemed a very common use case and using bpf_xdp_query() for doing it felt very cumbersome with a highly branches if/else chain based on flags and attach mode. Old APIs are scheduled for deprecation in libbpf 0.8 release. [0] Closes: https://github.com/libbpf/libbpf/issues/309 Signed-off-by: Andrii Nakryiko Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20220120061422.2710637-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/netlink.c | 117 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 33 deletions(-) (limited to 'tools/lib/bpf/netlink.c') diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e..c39c37f99d5c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -217,6 +217,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); } +int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + int old_prog_fd, err; + + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) + return libbpf_err(-EINVAL); + + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (old_prog_fd) + flags |= XDP_FLAGS_REPLACE; + else + old_prog_fd = -1; + + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); + return libbpf_err(err); +} + +int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + return bpf_xdp_attach(ifindex, -1, flags, opts); +} + int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts) { @@ -303,69 +325,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) +int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) { - struct xdp_id_md xdp_id = {}; - __u32 mask; - int ret; struct libbpf_nla_req req = { .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_type = RTM_GETLINK, .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifinfo.ifi_family = AF_PACKET, }; + struct xdp_id_md xdp_id = {}; + int err; - if (flags & ~XDP_FLAGS_MASK || !info_size) + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) + return libbpf_err(-EINVAL); + + if (xdp_flags & ~XDP_FLAGS_MASK) return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; + if (xdp_flags & (xdp_flags - 1)) return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; - xdp_id.flags = flags; + xdp_id.flags = xdp_flags; - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, + err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); + if (err) + return libbpf_err(err); - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); - } + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); - return libbpf_err(ret); + return 0; } -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { - flags &= XDP_FLAGS_MODES; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + size_t sz; + int err; + + if (!info_size) + return libbpf_err(-EINVAL); - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; + err = bpf_xdp_query(ifindex, flags, &opts); + if (err) + return libbpf_err(err); + + /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts + * layout after sz field + */ + sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); + memcpy(info, &opts.prog_id, sz); + memset((void *)info + sz, 0, info_size - sz); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { - struct xdp_link_info info; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret; - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); + ret = bpf_xdp_query(ifindex, flags, &opts); + if (ret) + return libbpf_err(ret); + + flags &= XDP_FLAGS_MODES; - return libbpf_err(ret); + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) + *prog_id = opts.prog_id; + else if (flags & XDP_FLAGS_DRV_MODE) + *prog_id = opts.drv_prog_id; + else if (flags & XDP_FLAGS_HW_MODE) + *prog_id = opts.hw_prog_id; + else if (flags & XDP_FLAGS_SKB_MODE) + *prog_id = opts.skb_prog_id; + else + *prog_id = 0; + + return 0; +} + + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + return bpf_xdp_query_id(ifindex, flags, prog_id); } typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); -- cgit v1.2.3 From 9c3de619e13ee6693ec5ac74f50b7aa89056a70e Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Sat, 12 Feb 2022 00:48:19 +0100 Subject: libbpf: Use dynamically allocated buffer when receiving netlink messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When receiving netlink messages, libbpf was using a statically allocated stack buffer of 4k bytes. This happened to work fine on systems with a 4k page size, but on systems with larger page sizes it can lead to truncated messages. The user-visible impact of this was that libbpf would insist no XDP program was attached to some interfaces because that bit of the netlink message got chopped off. Fix this by switching to a dynamically allocated buffer; we borrow the approach from iproute2 of using recvmsg() with MSG_PEEK|MSG_TRUNC to get the actual size of the pending message before receiving it, adjusting the buffer as necessary. While we're at it, also add retries on interrupted system calls around the recvmsg() call. v2: - Move peek logic to libbpf_netlink_recv(), don't double free on ENOMEM. Fixes: 8bbb77b7c7a2 ("libbpf: Add various netlink helpers") Reported-by: Zhiqian Guan Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20220211234819.612288-1-toke@redhat.com --- tools/lib/bpf/netlink.c | 55 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) (limited to 'tools/lib/bpf/netlink.c') diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index c39c37f99d5c..a598061f6fea 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -87,29 +87,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); + if (len < 0) { + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); if (len < 0) { - ret = -errno; + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -151,6 +197,7 @@ start: } ret = 0; done: + free(iov.iov_base); return ret; } -- cgit v1.2.3 From 1b8c924a05934d2e758ec7da7bd217ef8ebd80ce Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 16 Feb 2022 23:39:58 -0800 Subject: libbpf: Fix memleak in libbpf_netlink_recv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that libbpf_netlink_recv() frees dynamically allocated buffer in all code paths. Fixes: 9c3de619e13e ("libbpf: Use dynamically allocated buffer when receiving netlink messages") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20220217073958.276959-1-andrii@kernel.org --- tools/lib/bpf/netlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools/lib/bpf/netlink.c') diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index a598061f6fea..cbc8967d5402 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -176,7 +176,8 @@ start: libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -188,9 +189,10 @@ start: case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } -- cgit v1.2.3