summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/graph_ds_impl.rst266
-rw-r--r--Documentation/bpf/other.rst3
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/uapi/linux/bpf.h11
-rw-r--r--kernel/bpf/btf.c186
-rw-r--r--kernel/bpf/helpers.c94
-rw-r--r--kernel/bpf/syscall.c28
-rw-r--r--kernel/bpf/verifier.c420
-rw-r--r--tools/include/uapi/linux/bpf.h11
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c117
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c176
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c52
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c49
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c322
16 files changed, 1708 insertions, 120 deletions
diff --git a/Documentation/bpf/graph_ds_impl.rst b/Documentation/bpf/graph_ds_impl.rst
new file mode 100644
index 000000000000..8bbf1815efe7
--- /dev/null
+++ b/Documentation/bpf/graph_ds_impl.rst
@@ -0,0 +1,266 @@
+=========================
+BPF Graph Data Structures
+=========================
+
+This document describes implementation details of new-style "graph" data
+structures (linked_list, rbtree), with particular focus on the verifier's
+implementation of semantics specific to those data structures.
+
+Although no specific verifier code is referred to in this document, the document
+assumes that the reader has general knowledge of BPF verifier internals, BPF
+maps, and BPF program writing.
+
+Note that the intent of this document is to describe the current state of
+these graph data structures. **No guarantees** of stability for either
+semantics or APIs are made or implied here.
+
+.. contents::
+ :local:
+ :depth: 2
+
+Introduction
+------------
+
+The BPF map API has historically been the main way to expose data structures
+of various types for use within BPF programs. Some data structures fit naturally
+with the map API (HASH, ARRAY), others less so. Consequentially, programs
+interacting with the latter group of data structures can be hard to parse
+for kernel programmers without previous BPF experience.
+
+Luckily, some restrictions which necessitated the use of BPF map semantics are
+no longer relevant. With the introduction of kfuncs, kptrs, and the any-context
+BPF allocator, it is now possible to implement BPF data structures whose API
+and semantics more closely match those exposed to the rest of the kernel.
+
+Two such data structures - linked_list and rbtree - have many verification
+details in common. Because both have "root"s ("head" for linked_list) and
+"node"s, the verifier code and this document refer to common functionality
+as "graph_api", "graph_root", "graph_node", etc.
+
+Unless otherwise stated, examples and semantics below apply to both graph data
+structures.
+
+Unstable API
+------------
+
+Data structures implemented using the BPF map API have historically used BPF
+helper functions - either standard map API helpers like ``bpf_map_update_elem``
+or map-specific helpers. The new-style graph data structures instead use kfuncs
+to define their manipulation helpers. Because there are no stability guarantees
+for kfuncs, the API and semantics for these data structures can be evolved in
+a way that breaks backwards compatibility if necessary.
+
+Root and node types for the new data structures are opaquely defined in the
+``uapi/linux/bpf.h`` header.
+
+Locking
+-------
+
+The new-style data structures are intrusive and are defined similarly to their
+vanilla kernel counterparts:
+
+.. code-block:: c
+ struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ };
+
+ struct bpf_spin_lock glock;
+ struct bpf_rb_root groot __contains(node_data, node);
+
+The "root" type for both linked_list and rbtree expects to be in a map_value
+which also contains a ``bpf_spin_lock`` - in the above example both global
+variables are placed in a single-value arraymap. The verifier considers this
+spin_lock to be associated with the ``bpf_rb_root`` by virtue of both being in
+the same map_value and will enforce that the correct lock is held when
+verifying BPF programs that manipulate the tree. Since this lock checking
+happens at verification time, there is no runtime penalty.
+
+Non-owning references
+---------------------
+
+**Motivation**
+
+Consider the following BPF code:
+
+.. code-block:: c
+
+ struct node_data *n = bpf_obj_new(typeof(*n)); /* ACQUIRED */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* PASSED */
+
+ bpf_spin_unlock(&lock);
+
+From the verifier's perspective, the pointer ``n`` returned from ``bpf_obj_new``
+has type ``PTR_TO_BTF_ID | MEM_ALLOC``, with a ``btf_id`` of
+``struct node_data`` and a nonzero ``ref_obj_id``. Because it holds ``n``, the
+program has ownership of the pointee's (object pointed to by ``n``) lifetime.
+The BPF program must pass off ownership before exiting - either via
+``bpf_obj_drop``, which ``free``'s the object, or by adding it to ``tree`` with
+``bpf_rbtree_add``.
+
+(``ACQUIRED`` and ``PASSED`` comments in the example denote statements where
+"ownership is acquired" and "ownership is passed", respectively)
+
+What should the verifier do with ``n`` after ownership is passed off? If the
+object was ``free``'d with ``bpf_obj_drop`` the answer is obvious: the verifier
+should reject programs which attempt to access ``n`` after ``bpf_obj_drop`` as
+the object is no longer valid. The underlying memory may have been reused for
+some other allocation, unmapped, etc.
+
+When ownership is passed to ``tree`` via ``bpf_rbtree_add`` the answer is less
+obvious. The verifier could enforce the same semantics as for ``bpf_obj_drop``,
+but that would result in programs with useful, common coding patterns being
+rejected, e.g.:
+
+.. code-block:: c
+
+ int x;
+ struct node_data *n = bpf_obj_new(typeof(*n)); /* ACQUIRED */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* PASSED */
+ x = n->data;
+ n->data = 42;
+
+ bpf_spin_unlock(&lock);
+
+Both the read from and write to ``n->data`` would be rejected. The verifier
+can do better, though, by taking advantage of two details:
+
+ * Graph data structure APIs can only be used when the ``bpf_spin_lock``
+ associated with the graph root is held
+
+ * Both graph data structures have pointer stability
+
+ * Because graph nodes are allocated with ``bpf_obj_new`` and
+ adding / removing from the root involves fiddling with the
+ ``bpf_{list,rb}_node`` field of the node struct, a graph node will
+ remain at the same address after either operation.
+
+Because the associated ``bpf_spin_lock`` must be held by any program adding
+or removing, if we're in the critical section bounded by that lock, we know
+that no other program can add or remove until the end of the critical section.
+This combined with pointer stability means that, until the critical section
+ends, we can safely access the graph node through ``n`` even after it was used
+to pass ownership.
+
+The verifier considers such a reference a *non-owning reference*. The ref
+returned by ``bpf_obj_new`` is accordingly considered an *owning reference*.
+Both terms currently only have meaning in the context of graph nodes and API.
+
+**Details**
+
+Let's enumerate the properties of both types of references.
+
+*owning reference*
+
+ * This reference controls the lifetime of the pointee
+
+ * Ownership of pointee must be 'released' by passing it to some graph API
+ kfunc, or via ``bpf_obj_drop``, which ``free``'s the pointee
+
+ * If not released before program ends, verifier considers program invalid
+
+ * Access to the pointee's memory will not page fault
+
+*non-owning reference*
+
+ * This reference does not own the pointee
+
+ * It cannot be used to add the graph node to a graph root, nor ``free``'d via
+ ``bpf_obj_drop``
+
+ * No explicit control of lifetime, but can infer valid lifetime based on
+ non-owning ref existence (see explanation below)
+
+ * Access to the pointee's memory will not page fault
+
+From verifier's perspective non-owning references can only exist
+between spin_lock and spin_unlock. Why? After spin_unlock another program
+can do arbitrary operations on the data structure like removing and ``free``-ing
+via bpf_obj_drop. A non-owning ref to some chunk of memory that was remove'd,
+``free``'d, and reused via bpf_obj_new would point to an entirely different thing.
+Or the memory could go away.
+
+To prevent this logic violation all non-owning references are invalidated by the
+verifier after a critical section ends. This is necessary to ensure the "will
+not page fault" property of non-owning references. So if the verifier hasn't
+invalidated a non-owning ref, accessing it will not page fault.
+
+Currently ``bpf_obj_drop`` is not allowed in the critical section, so
+if there's a valid non-owning ref, we must be in a critical section, and can
+conclude that the ref's memory hasn't been dropped-and- ``free``'d or
+dropped-and-reused.
+
+Any reference to a node that is in an rbtree _must_ be non-owning, since
+the tree has control of the pointee's lifetime. Similarly, any ref to a node
+that isn't in rbtree _must_ be owning. This results in a nice property:
+graph API add / remove implementations don't need to check if a node
+has already been added (or already removed), as the ownership model
+allows the verifier to prevent such a state from being valid by simply checking
+types.
+
+However, pointer aliasing poses an issue for the above "nice property".
+Consider the following example:
+
+.. code-block:: c
+
+ struct node_data *n, *m, *o, *p;
+ n = bpf_obj_new(typeof(*n)); /* 1 */
+
+ bpf_spin_lock(&lock);
+
+ bpf_rbtree_add(&tree, n); /* 2 */
+ m = bpf_rbtree_first(&tree); /* 3 */
+
+ o = bpf_rbtree_remove(&tree, n); /* 4 */
+ p = bpf_rbtree_remove(&tree, m); /* 5 */
+
+ bpf_spin_unlock(&lock);
+
+ bpf_obj_drop(o);
+ bpf_obj_drop(p); /* 6 */
+
+Assume the tree is empty before this program runs. If we track verifier state
+changes here using numbers in above comments:
+
+ 1) n is an owning reference
+
+ 2) n is a non-owning reference, it's been added to the tree
+
+ 3) n and m are non-owning references, they both point to the same node
+
+ 4) o is an owning reference, n and m non-owning, all point to same node
+
+ 5) o and p are owning, n and m non-owning, all point to the same node
+
+ 6) a double-free has occurred, since o and p point to same node and o was
+ ``free``'d in previous statement
+
+States 4 and 5 violate our "nice property", as there are non-owning refs to
+a node which is not in an rbtree. Statement 5 will try to remove a node which
+has already been removed as a result of this violation. State 6 is a dangerous
+double-free.
+
+At a minimum we should prevent state 6 from being possible. If we can't also
+prevent state 5 then we must abandon our "nice property" and check whether a
+node has already been removed at runtime.
+
+We prevent both by generalizing the "invalidate non-owning references" behavior
+of ``bpf_spin_unlock`` and doing similar invalidation after
+``bpf_rbtree_remove``. The logic here being that any graph API kfunc which:
+
+ * takes an arbitrary node argument
+
+ * removes it from the data structure
+
+ * returns an owning reference to the removed node
+
+May result in a state where some other non-owning reference points to the same
+node. So ``remove``-type kfuncs must be considered a non-owning reference
+invalidation point as well.
diff --git a/Documentation/bpf/other.rst b/Documentation/bpf/other.rst
index 3d61963403b4..7e6b12018802 100644
--- a/Documentation/bpf/other.rst
+++ b/Documentation/bpf/other.rst
@@ -6,4 +6,5 @@ Other
:maxdepth: 1
ringbuf
- llvm_reloc \ No newline at end of file
+ llvm_reloc
+ graph_ds_impl
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8b5d0b4c4ada..be34f7deb6c3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -181,7 +181,10 @@ enum btf_field_type {
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
BPF_LIST_NODE = (1 << 5),
- BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD,
+ BPF_RB_ROOT = (1 << 6),
+ BPF_RB_NODE = (1 << 7),
+ BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
+ BPF_RB_NODE | BPF_RB_ROOT,
};
struct btf_field_kptr {
@@ -285,6 +288,10 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "bpf_list_head";
case BPF_LIST_NODE:
return "bpf_list_node";
+ case BPF_RB_ROOT:
+ return "bpf_rb_root";
+ case BPF_RB_NODE:
+ return "bpf_rb_node";
default:
WARN_ON_ONCE(1);
return "unknown";
@@ -305,6 +312,10 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_list_head);
case BPF_LIST_NODE:
return sizeof(struct bpf_list_node);
+ case BPF_RB_ROOT:
+ return sizeof(struct bpf_rb_root);
+ case BPF_RB_NODE:
+ return sizeof(struct bpf_rb_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -325,6 +336,10 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_list_head);
case BPF_LIST_NODE:
return __alignof__(struct bpf_list_node);
+ case BPF_RB_ROOT:
+ return __alignof__(struct bpf_rb_root);
+ case BPF_RB_NODE:
+ return __alignof__(struct bpf_rb_node);
default:
WARN_ON_ONCE(1);
return 0;
@@ -435,6 +450,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
void bpf_timer_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+ struct bpf_spin_lock *spin_lock);
+
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 17afd2b35ee5..1503f61336b6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6917,6 +6917,17 @@ struct bpf_list_node {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_rb_root {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_rb_node {
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 740bdb045b14..6582735ef1fc 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3324,12 +3324,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
return NULL;
}
-static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
- const struct btf_type *t, int comp_idx,
- u32 off, int sz, struct btf_field_info *info)
+static int
+btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
+ const struct btf_type *t, int comp_idx, u32 off,
+ int sz, struct btf_field_info *info,
+ enum btf_field_type head_type)
{
+ const char *node_field_name;
const char *value_type;
- const char *list_node;
s32 id;
if (!__btf_type_is_struct(t))
@@ -3339,26 +3341,32 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
if (!value_type)
return -EINVAL;
- list_node = strstr(value_type, ":");
- if (!list_node)
+ node_field_name = strstr(value_type, ":");
+ if (!node_field_name)
return -EINVAL;
- value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+ value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
kfree(value_type);
if (id < 0)
return id;
- list_node++;
- if (str_is_empty(list_node))
+ node_field_name++;
+ if (str_is_empty(node_field_name))
return -EINVAL;
- info->type = BPF_LIST_HEAD;
+ info->type = head_type;
info->off = off;
info->graph_root.value_btf_id = id;
- info->graph_root.node_name = list_node;
+ info->graph_root.node_name = node_field_name;
return BTF_FIELD_FOUND;
}
+#define field_mask_test_name(field_type, field_type_str) \
+ if (field_mask & field_type && !strcmp(name, field_type_str)) { \
+ type = field_type; \
+ goto end; \
+ }
+
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
@@ -3382,18 +3390,11 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
goto end;
}
}
- if (field_mask & BPF_LIST_HEAD) {
- if (!strcmp(name, "bpf_list_head")) {
- type = BPF_LIST_HEAD;
- goto end;
- }
- }
- if (field_mask & BPF_LIST_NODE) {
- if (!strcmp(name, "bpf_list_node")) {
- type = BPF_LIST_NODE;
- goto end;
- }
- }
+ field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
+ field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
+ field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
+ field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
+
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
type = BPF_KPTR_REF;
@@ -3406,6 +3407,8 @@ end:
return type;
}
+#undef field_mask_test_name
+
static int btf_find_struct_field(const struct btf *btf,
const struct btf_type *t, u32 field_mask,
struct btf_field_info *info, int info_cnt)
@@ -3438,6 +3441,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3451,8 +3455,11 @@ static int btf_find_struct_field(const struct btf *btf,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, t, member_type, i, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, t, member_type,
+ i, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3499,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3512,8 +3520,11 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, var, var_type,
+ -1, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3615,8 +3626,11 @@ end_btf:
return ret;
}
-static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
- struct btf_field_info *info)
+static int btf_parse_graph_root(const struct btf *btf,
+ struct btf_field *field,
+ struct btf_field_info *info,
+ const char *node_type_name,
+ size_t node_type_align)
{
const struct btf_type *t, *n = NULL;
const struct btf_member *member;
@@ -3638,13 +3652,13 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
n = btf_type_by_id(btf, member->type);
if (!__btf_type_is_struct(n))
return -EINVAL;
- if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+ if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
return -EINVAL;
offset = __btf_member_bit_offset(n, member);
if (offset % 8)
return -EINVAL;
offset /= 8;
- if (offset % __alignof__(struct bpf_list_node))
+ if (offset % node_type_align)
return -EINVAL;
field->graph_root.btf = (struct btf *)btf;
@@ -3656,6 +3670,20 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
return 0;
}
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_list_node",
+ __alignof__(struct bpf_list_node));
+}
+
+static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
+ __alignof__(struct bpf_rb_node));
+}
+
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
@@ -3718,7 +3746,13 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
+ case BPF_RB_ROOT:
+ ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
+ if (ret < 0)
+ goto end;
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
ret = -EFAULT;
@@ -3727,8 +3761,33 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->cnt++;
}
- /* bpf_list_head requires bpf_spin_lock */
- if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+ /* bpf_{list_head, rb_node} require bpf_spin_lock */
+ if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
+ btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* need collection identity for non-owning refs before allowing this
+ *
+ * Consider a node type w/ both list and rb_node fields:
+ * struct node {
+ * struct bpf_list_node l;
+ * struct bpf_rb_node r;
+ * }
+ *
+ * Used like so:
+ * struct node *n = bpf_obj_new(....);
+ * bpf_list_push_front(&list_head, &n->l);
+ * bpf_rbtree_remove(&rb_root, &n->r);
+ *
+ * It should not be possible to rbtree_remove the node since it hasn't
+ * been added to a tree. But push_front converts n to a non-owning
+ * reference, and rbtree_remove accepts the non-owning reference to
+ * a type w/ bpf_rb_node field.
+ */
+ if (btf_record_has_field(rec, BPF_LIST_NODE) &&
+ btf_record_has_field(rec, BPF_RB_NODE)) {
ret = -EINVAL;
goto end;
}
@@ -3739,22 +3798,28 @@ end:
return ERR_PTR(ret);
}
+#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
+#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
+
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
- /* There are two owning types, kptr_ref and bpf_list_head. The former
- * only supports storing kernel types, which can never store references
- * to program allocated local types, atleast not yet. Hence we only need
- * to ensure that bpf_list_head ownership does not form cycles.
+ /* There are three types that signify ownership of some other type:
+ * kptr_ref, bpf_list_head, bpf_rb_root.
+ * kptr_ref only supports storing kernel types, which can't store
+ * references to program allocated local types.
+ *
+ * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
+ * does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & BPF_LIST_HEAD))
+ if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@@ -3762,39 +3827,47 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
return -EFAULT;
rec->fields[i].graph_root.value_rec = meta->record;
- if (!(rec->field_mask & BPF_LIST_NODE))
+ /* We need to set value_rec for all root types, but no need
+ * to check ownership cycle for a type unless it's also a
+ * node type.
+ */
+ if (!(rec->field_mask & GRAPH_NODE_MASK))
continue;
/* We need to ensure ownership acyclicity among all types. The
* proper way to do it would be to topologically sort all BTF
* IDs based on the ownership edges, since there can be multiple
- * bpf_list_head in a type. Instead, we use the following
- * reasoning:
+ * bpf_{list_head,rb_node} in a type. Instead, we use the
+ * following resaoning:
*
* - A type can only be owned by another type in user BTF if it
- * has a bpf_list_node.
+ * has a bpf_{list,rb}_node. Let's call these node types.
* - A type can only _own_ another type in user BTF if it has a
- * bpf_list_head.
+ * bpf_{list_head,rb_root}. Let's call these root types.
*
- * We ensure that if a type has both bpf_list_head and
- * bpf_list_node, its element types cannot be owning types.
+ * We ensure that if a type is both a root and node, its
+ * element types cannot be root types.
*
* To ensure acyclicity:
*
- * When A only has bpf_list_head, ownership chain can be:
+ * When A is an root type but not a node, its ownership
+ * chain can be:
* A -> B -> C
* Where:
- * - B has both bpf_list_head and bpf_list_node.
- * - C only has bpf_list_node.
+ * - A is an root, e.g. has bpf_rb_root.
+ * - B is both a root and node, e.g. has bpf_rb_node and
+ * bpf_list_head.
+ * - C is only an root, e.g. has bpf_list_node
*
- * When A has both bpf_list_head and bpf_list_node, some other
- * type already owns it in the BTF domain, hence it can not own
- * another owning type through any of the bpf_list_head edges.
+ * When A is both a root and node, some other type already
+ * owns it in the BTF domain, hence it can not own
+ * another root type through any of the ownership edges.
* A -> B
* Where:
- * - B only has bpf_list_node.
+ * - A is both an root and node.
+ * - B is only an node.
*/
- if (meta->record->field_mask & BPF_LIST_HEAD)
+ if (meta->record->field_mask & GRAPH_ROOT_MASK)
return -ELOOP;
}
return 0;
@@ -5256,6 +5329,8 @@ static const char *alloc_obj_fields[] = {
"bpf_spin_lock",
"bpf_list_head",
"bpf_list_node",
+ "bpf_rb_root",
+ "bpf_rb_node",
};
static struct btf_struct_metas *
@@ -5329,7 +5404,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
- record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
+ record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
+ BPF_RB_ROOT | BPF_RB_NODE, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2dae44581922..5b278a38ae58 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1772,6 +1772,46 @@ unlock:
}
}
+/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
+ * 'rb_node *', so field name of rb_node within containing struct is not
+ * needed.
+ *
+ * Since bpf_rb_tree's node type has a corresponding struct btf_field with
+ * graph_root.node_offset, it's not necessary to know field name
+ * or type of node struct
+ */
+#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
+ for (pos = rb_first_postorder(root); \
+ pos && ({ n = rb_next_postorder(pos); 1; }); \
+ pos = n)
+
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+ struct bpf_spin_lock *spin_lock)
+{
+ struct rb_root_cached orig_root, *root = rb_root;
+ struct rb_node *pos, *n;
+ void *obj;
+
+ BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
+ BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
+
+ __bpf_spin_lock_irqsave(spin_lock);
+ orig_root = *root;
+ *root = RB_ROOT_CACHED;
+ __bpf_spin_unlock_irqrestore(spin_lock);
+
+ bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
+ obj = pos;
+ obj -= field->graph_root.node_offset;
+
+ bpf_obj_free_fields(field->graph_root.value_rec, obj);
+
+ migrate_disable();
+ bpf_mem_free(&bpf_global_ma, obj);
+ migrate_enable();
+ }
+}
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
@@ -1844,6 +1884,56 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+ struct rb_node *n = (struct rb_node *)node;
+
+ rb_erase_cached(n, r);
+ RB_CLEAR_NODE(n);
+ return (struct bpf_rb_node *)n;
+}
+
+/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
+ * program
+ */
+static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ void *less)
+{
+ struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
+ bpf_callback_t cb = (bpf_callback_t)less;
+ struct rb_node *parent = NULL;
+ bool leftmost = true;
+
+ while (*link) {
+ parent = *link;
+ if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node((struct rb_node *)node, parent, link);
+ rb_insert_color_cached((struct rb_node *)node,
+ (struct rb_root_cached *)root, leftmost);
+}
+
+__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ __bpf_rbtree_add(root, node, (void *)less);
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)rb_first_cached(r);
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -2068,6 +2158,10 @@ BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_rbtree_add)
+BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cda8d00f3762..e3fcdc9836a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -537,9 +537,6 @@ void btf_record_free(struct btf_record *rec)
return;
for (i = 0; i < rec->cnt; i++) {
switch (rec->fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (rec->fields[i].kptr.module)
@@ -548,7 +545,11 @@ void btf_record_free(struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to release for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to release */
break;
default:
WARN_ON_ONCE(1);
@@ -581,9 +582,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
new_rec->cnt = 0;
for (i = 0; i < rec->cnt; i++) {
switch (fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
btf_get(fields[i].kptr.btf);
@@ -594,7 +592,11 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to acquire for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to acquire */
break;
default:
ret = -EFAULT;
@@ -674,7 +676,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
continue;
bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
break;
+ case BPF_RB_ROOT:
+ if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+ continue;
+ bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
WARN_ON_ONCE(1);
@@ -1010,7 +1018,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EINVAL;
map->record = btf_parse_fields(btf, value_type,
- BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
+ BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
+ BPF_RB_ROOT,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1058,6 +1067,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
break;
case BPF_LIST_HEAD:
+ case BPF_RB_ROOT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f176bc15c879..21e08c111702 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -191,6 +191,7 @@ struct bpf_verifier_stack_elem {
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
@@ -1642,6 +1643,16 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
reg->type &= ~PTR_MAYBE_NULL;
}
+static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
+ struct btf_field_graph_root *ds_head)
+{
+ __mark_reg_known_zero(&regs[regno]);
+ regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[regno].btf = ds_head->btf;
+ regs[regno].btf_id = ds_head->value_btf_id;
+ regs[regno].off = ds_head->node_offset;
+}
+
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
return type_is_pkt_pointer(reg->type);
@@ -6837,6 +6848,10 @@ skip_type_check:
meta->ret_btf_id = reg->btf_id;
break;
case ARG_PTR_TO_SPIN_LOCK:
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
+ return -EACCES;
+ }
if (meta->func_id == BPF_FUNC_spin_lock) {
err = process_spin_lock(env, regno, true);
if (err)
@@ -7420,6 +7435,8 @@ static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx);
+static bool is_callback_calling_kfunc(u32 btf_id);
+
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
@@ -7474,10 +7491,18 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* interested in validating only BPF helpers that can call subprogs as
* callbacks
*/
- if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
- verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
- return -EFAULT;
+ if (set_callee_state_cb != set_callee_state) {
+ if (bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_kfunc(insn->imm)) {
+ verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ } else if (!bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_function(insn->imm)) { /* helper */
+ verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
}
if (insn->code == (BPF_JMP | BPF_CALL) &&
@@ -7742,6 +7767,63 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
+ *
+ * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
+ * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
+ * by this point, so look at 'root'
+ */
+ struct btf_field *field;
+
+ field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
+ BPF_RB_ROOT);
+ if (!field || !field->graph_root.value_btf_id)
+ return -EFAULT;
+
+ mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
+ mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
+
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ callee->in_callback_fn = true;
+ callee->callback_ret_range = tnum_range(0, 1);
+ return 0;
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id);
+
+/* Are we currently verifying the callback for a rbtree helper that must
+ * be called with lock held? If so, no need to complain about unreleased
+ * lock
+ */
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_insn *insn = env->prog->insnsi;
+ struct bpf_func_state *callee;
+ int kfunc_btf_id;
+
+ if (!state->curframe)
+ return false;
+
+ callee = state->frame[state->curframe];
+
+ if (!callee->in_callback_fn)
+ return false;
+
+ kfunc_btf_id = insn[callee->callsite].imm;
+ return is_rbtree_lock_required_kfunc(kfunc_btf_id);
+}
+
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -8510,6 +8592,7 @@ struct bpf_kfunc_call_arg_meta {
bool r0_rdonly;
u32 ret_btf_id;
u64 r0_size;
+ u32 subprogno;
struct {
u64 value;
bool found;
@@ -8521,6 +8604,9 @@ struct bpf_kfunc_call_arg_meta {
struct {
struct btf_field *field;
} arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
};
static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
@@ -8632,12 +8718,16 @@ enum {
KF_ARG_DYNPTR_ID,
KF_ARG_LIST_HEAD_ID,
KF_ARG_LIST_NODE_ID,
+ KF_ARG_RB_ROOT_ID,
+ KF_ARG_RB_NODE_ID,
};
BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct, bpf_dynptr_kern)
BTF_ID(struct, bpf_list_head)
BTF_ID(struct, bpf_list_node)
+BTF_ID(struct, bpf_rb_root)
+BTF_ID(struct, bpf_rb_node)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -8671,6 +8761,28 @@ static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
}
+static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
+}
+
+static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
+}
+
+static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
+ const struct btf_param *arg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
+ if (!t)
+ return false;
+
+ return true;
+}
+
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
const struct btf *btf,
@@ -8730,6 +8842,9 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
KF_ARG_PTR_TO_MEM,
KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
+ KF_ARG_PTR_TO_CALLBACK,
+ KF_ARG_PTR_TO_RB_ROOT,
+ KF_ARG_PTR_TO_RB_NODE,
};
enum special_kfunc_type {
@@ -8743,6 +8858,9 @@ enum special_kfunc_type {
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
KF_bpf_rcu_read_unlock,
+ KF_bpf_rbtree_remove,
+ KF_bpf_rbtree_add,
+ KF_bpf_rbtree_first,
};
BTF_SET_START(special_kfunc_set)
@@ -8754,6 +8872,9 @@ BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -8767,6 +8888,9 @@ BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -8828,6 +8952,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_LIST_NODE;
+ if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_ROOT;
+
+ if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_NODE;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -8837,6 +8967,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
return KF_ARG_PTR_TO_BTF_ID;
}
+ if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_CALLBACK;
+
if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
arg_mem_size = true;
@@ -9084,95 +9217,203 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_list_pop_back];
}
-static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, u32 regno,
- struct bpf_kfunc_call_arg_meta *meta)
+static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+}
+
+static bool is_bpf_graph_api_kfunc(u32 btf_id)
+{
+ return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool is_callback_calling_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id)
{
+ return is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
+ enum btf_field_type head_field_type,
+ u32 kfunc_btf_id)
+{
+ bool ret;
+
+ switch (head_field_type) {
+ case BPF_LIST_HEAD:
+ ret = is_bpf_list_api_kfunc(kfunc_btf_id);
+ break;
+ case BPF_RB_ROOT:
+ ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
+ btf_field_type_name(head_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
+ btf_field_type_name(head_field_type));
+ return ret;
+}
+
+static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
+ enum btf_field_type node_field_type,
+ u32 kfunc_btf_id)
+{
+ bool ret;
+
+ switch (node_field_type) {
+ case BPF_LIST_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
+ break;
+ case BPF_RB_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
+ btf_field_type_name(node_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
+ btf_field_type_name(node_field_type));
+ return ret;
+}
+
+static int
+__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ struct btf_field **head_field)
+{
+ const char *head_type_name;
struct btf_field *field;
struct btf_record *rec;
- u32 list_head_off;
+ u32 head_off;
- if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
- verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
+ return -EFAULT;
+
+ head_type_name = btf_field_type_name(head_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, head_type_name);
return -EINVAL;
}
rec = reg_btf_record(reg);
- list_head_off = reg->off + reg->var_off.value;
- field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
+ head_off = reg->off + reg->var_off.value;
+ field = btf_record_find(rec, head_off, head_field_type);
if (!field) {
- verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
+ verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
return -EINVAL;
}
/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
if (check_reg_allocation_locked(env, reg)) {
- verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
- rec->spin_lock_off);
+ verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
+ rec->spin_lock_off, head_type_name);
return -EINVAL;
}
- if (meta->arg_list_head.field) {
- verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
+ if (*head_field) {
+ verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
return -EFAULT;
}
- meta->arg_list_head.field = field;
+ *head_field = field;
return 0;
}
-static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno,
struct bpf_kfunc_call_arg_meta *meta)
{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
+ &meta->arg_rbtree_root.field);
+}
+
+static int
+__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ enum btf_field_type node_field_type,
+ struct btf_field **node_field)
+{
+ const char *node_type_name;
const struct btf_type *et, *t;
struct btf_field *field;
- u32 list_node_off;
+ u32 node_off;
- if (meta->btf != btf_vmlinux ||
- (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
- meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
- verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
+ return -EFAULT;
+
+ node_type_name = btf_field_type_name(node_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, node_type_name);
return -EINVAL;
}
- list_node_off = reg->off + reg->var_off.value;
- field = reg_find_field_offset(reg, list_node_off, BPF_LIST_NODE);
- if (!field || field->offset != list_node_off) {
- verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
+ node_off = reg->off + reg->var_off.value;
+ field = reg_find_field_offset(reg, node_off, node_field_type);
+ if (!field || field->offset != node_off) {
+ verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
return -EINVAL;
}
- field = meta->arg_list_head.field;
+ field = *node_field;
et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
t = btf_type_by_id(reg->btf, reg->btf_id);
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
field->graph_root.value_btf_id, true)) {
- verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
+ verbose(env, "operation on %s expects arg#1 %s at offset=%d "
"in struct %s, but arg is at offset=%d in struct %s\n",
+ btf_field_type_name(head_field_type),
+ btf_field_type_name(node_field_type),
field->graph_root.node_offset,
btf_name_by_offset(field->graph_root.btf, et->name_off),
- list_node_off, btf_name_by_offset(reg->btf, t->name_off));
+ node_off, btf_name_by_offset(reg->btf, t->name_off));
return -EINVAL;
}
- if (list_node_off != field->graph_root.node_offset) {
- verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
- list_node_off, field->graph_root.node_offset,
+ if (node_off != field->graph_root.node_offset) {
+ verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
+ node_off, btf_field_type_name(node_field_type),
+ field->graph_root.node_offset,
btf_name_by_offset(field->graph_root.btf, et->name_off));
return -EINVAL;
}
@@ -9180,6 +9421,24 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
return 0;
}
+static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_LIST_HEAD, BPF_LIST_NODE,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_RB_ROOT, BPF_RB_NODE,
+ &meta->arg_rbtree_root.field);
+}
+
static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
{
const char *func_name = meta->func_name, *ref_tname;
@@ -9314,8 +9573,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_DYNPTR:
case KF_ARG_PTR_TO_LIST_HEAD:
case KF_ARG_PTR_TO_LIST_NODE:
+ case KF_ARG_PTR_TO_RB_ROOT:
+ case KF_ARG_PTR_TO_RB_NODE:
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
+ case KF_ARG_PTR_TO_CALLBACK:
/* Trusted by default */
break;
default:
@@ -9392,6 +9654,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_ROOT:
+ if (reg->type != PTR_TO_MAP_VALUE &&
+ reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
+ return -EINVAL;
+ }
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_LIST_NODE:
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
@@ -9405,6 +9681,31 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_NODE:
+ if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
+ if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
+ verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ return -EINVAL;
+ }
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ return -EINVAL;
+ }
+ } else {
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ return -EINVAL;
+ }
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
@@ -9440,6 +9741,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
/* Skip next '__sz' argument */
i++;
break;
+ case KF_ARG_PTR_TO_CALLBACK:
+ meta->subprogno = reg->subprogno;
+ break;
}
}
@@ -9556,7 +9860,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
- meta.func_id == special_kfunc_list[KF_bpf_list_push_back]) {
+ meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
@@ -9573,6 +9878,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_rbtree_add_callback_state);
+ if (err) {
+ verbose(env, "kfunc %s#%d failed callback verification\n",
+ func_name, func_id);
+ return err;
+ }
+ }
+
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -9637,11 +9952,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
struct btf_field *field = meta.arg_list_head.field;
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = field->graph_root.btf;
- regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id;
- regs[BPF_REG_0].off = field->graph_root.node_offset;
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ struct btf_field *field = meta.arg_rbtree_root.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
@@ -9707,7 +10023,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_ret_null(&meta))
regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ ref_set_non_owning(env, &regs[BPF_REG_0]);
}
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
+ invalidate_non_owning_refs(env);
+
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
@@ -14405,7 +14727,7 @@ static int do_check(struct bpf_verifier_env *env)
if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
(insn->src_reg == BPF_PSEUDO_CALL) ||
(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
- (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
+ (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
verbose(env, "function calls are not allowed while holding a lock\n");
return -EINVAL;
}
@@ -14441,7 +14763,8 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (env->cur_state->active_lock.ptr) {
+ if (env->cur_state->active_lock.ptr &&
+ !in_rbtree_lock_required_cb(env)) {
verbose(env, "bpf_spin_unlock is missing\n");
return -EINVAL;
}
@@ -14703,9 +15026,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+ if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
+ btf_record_has_field(map->record, BPF_RB_ROOT)) {
if (is_tracing_prog_type(prog_type)) {
- verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+ verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
return -EINVAL;
}
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 17afd2b35ee5..1503f61336b6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6917,6 +6917,17 @@ struct bpf_list_node {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_rb_root {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
+struct bpf_rb_node {
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 424f7bbbfe9b..dbd2c729781a 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -65,4 +65,28 @@ extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ks
*/
extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
+/* Description
+ * Remove 'node' from rbtree with root 'root'
+ * Returns
+ * Pointer to the removed node, or NULL if 'root' didn't contain 'node'
+ */
+extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node) __ksym;
+
+/* Description
+ * Add 'node' to rbtree with root 'root' using comparator 'less'
+ * Returns
+ * Nothing
+ */
+extern void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)) __ksym;
+
+/* Description
+ * Return the first (leftmost) node in input tree
+ * Returns
+ * Pointer to the node, which is _not_ removed from the tree. If the tree
+ * contains no nodes, returns NULL.
+ */
+extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 2592b8aa5e41..0ed8132ce1c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -58,12 +58,12 @@ static struct {
TEST(inner_map, pop_front)
TEST(inner_map, pop_back)
#undef TEST
- { "map_compat_kprobe", "tracing progs cannot use bpf_list_head yet" },
- { "map_compat_kretprobe", "tracing progs cannot use bpf_list_head yet" },
- { "map_compat_tp", "tracing progs cannot use bpf_list_head yet" },
- { "map_compat_perf", "tracing progs cannot use bpf_list_head yet" },
- { "map_compat_raw_tp", "tracing progs cannot use bpf_list_head yet" },
- { "map_compat_raw_tp_w", "tracing progs cannot use bpf_list_head yet" },
+ { "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
{ "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
{ "obj_new_no_composite", "bpf_obj_new type ID argument must be of a struct" },
{ "obj_new_no_struct", "bpf_obj_new type ID argument must be of a struct" },
@@ -715,6 +715,43 @@ static void test_btf(void)
btf__free(btf);
break;
}
+
+ while (test__start_subtest("btf: list_node and rb_node in same struct")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+
+ id = btf__add_struct(btf, "bpf_rb_node", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct bpf_rb_node"))
+ break;
+ id = btf__add_struct(btf, "bar", 40);
+ if (!ASSERT_EQ(id, 6, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "c", 5, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:bar:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
}
void test_linked_list(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
new file mode 100644
index 000000000000..156fa95c42f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "rbtree.skel.h"
+#include "rbtree_fail.skel.h"
+#include "rbtree_btf_fail__wrong_node_type.skel.h"
+#include "rbtree_btf_fail__add_wrong_type.skel.h"
+
+static void test_rbtree_add_nodes(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes), &opts);
+ ASSERT_OK(ret, "rbtree_add_nodes run");
+ ASSERT_OK(opts.retval, "rbtree_add_nodes retval");
+ ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes less_callback_ran");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_add_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_add_and_remove retval");
+ ASSERT_EQ(skel->data->removed_key, 5, "rbtree_add_and_remove first removed key");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_first_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_first_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_first_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_first_and_remove retval");
+ ASSERT_EQ(skel->data->first_data[0], 2, "rbtree_first_and_remove first rbtree_first()");
+ ASSERT_EQ(skel->data->removed_key, 1, "rbtree_first_and_remove first removed key");
+ ASSERT_EQ(skel->data->first_data[1], 4, "rbtree_first_and_remove second rbtree_first()");
+
+ rbtree__destroy(skel);
+}
+
+void test_rbtree_success(void)
+{
+ if (test__start_subtest("rbtree_add_nodes"))
+ test_rbtree_add_nodes();
+ if (test__start_subtest("rbtree_add_and_remove"))
+ test_rbtree_add_and_remove();
+ if (test__start_subtest("rbtree_first_and_remove"))
+ test_rbtree_first_and_remove();
+}
+
+#define BTF_FAIL_TEST(suffix) \
+void test_rbtree_btf_fail__##suffix(void) \
+{ \
+ struct rbtree_btf_fail__##suffix *skel; \
+ \
+ skel = rbtree_btf_fail__##suffix##__open_and_load(); \
+ if (!ASSERT_ERR_PTR(skel, \
+ "rbtree_btf_fail__" #suffix "__open_and_load unexpected success")) \
+ rbtree_btf_fail__##suffix##__destroy(skel); \
+}
+
+#define RUN_BTF_FAIL_TEST(suffix) \
+ if (test__start_subtest("rbtree_btf_fail__" #suffix)) \
+ test_rbtree_btf_fail__##suffix();
+
+BTF_FAIL_TEST(wrong_node_type);
+BTF_FAIL_TEST(add_wrong_type);
+
+void test_rbtree_btf_fail(void)
+{
+ RUN_BTF_FAIL_TEST(wrong_node_type);
+ RUN_BTF_FAIL_TEST(add_wrong_type);
+}
+
+void test_rbtree_fail(void)
+{
+ RUN_TESTS(rbtree_fail);
+}
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
new file mode 100644
index 000000000000..e5db1a4287e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+long less_callback_ran = -1;
+long removed_key = -1;
+long first_data[2] = {-1, -1};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ less_callback_ran = 1;
+
+ return node_a->key < node_b->key;
+}
+
+static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ m->key = 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ bpf_spin_unlock(&glock);
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 3;
+ n->key = 3;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_add_nodes(void *ctx)
+{
+ return __add_three(&groot, &glock);
+}
+
+SEC("tc")
+long rbtree_add_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ goto err_out;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 3;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ res = bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+
+ n = container_of(res, struct node_data, node);
+ removed_key = n->key;
+
+ bpf_obj_drop(n);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+SEC("tc")
+long rbtree_first_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m, *o;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 3;
+ n->data = 4;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 5;
+ m->data = 6;
+
+ o = bpf_obj_new(typeof(*o));
+ if (!o)
+ goto err_out;
+ o->key = 1;
+ o->data = 2;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ bpf_rbtree_add(&groot, &o->node, less);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 2;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[0] = o->data;
+
+ res = bpf_rbtree_remove(&groot, &o->node);
+ bpf_spin_unlock(&glock);
+
+ o = container_of(res, struct node_data, node);
+ removed_key = o->key;
+
+ bpf_obj_drop(o);
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 3;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[1] = o->data;
+ bpf_spin_unlock(&glock);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
new file mode 100644
index 000000000000..60079b202c07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ int key;
+ int data;
+ struct bpf_rb_node node;
+};
+
+struct node_data2 {
+ int key;
+ struct bpf_rb_node node;
+ int data;
+};
+
+static bool less2(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data2 *node_a;
+ struct node_data2 *node_b;
+
+ node_a = container_of(a, struct node_data2, node);
+ node_b = container_of(b, struct node_data2, node);
+
+ return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__add_wrong_type(void *ctx)
+{
+ struct node_data2 *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less2);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
new file mode 100644
index 000000000000..340f97da1084
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+/* BTF load should fail as bpf_rb_root __contains this type and points to
+ * 'node', but 'node' is not a bpf_rb_node
+ */
+struct node_data {
+ int key;
+ int data;
+ struct bpf_list_node node;
+};
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__wrong_node_type(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
new file mode 100644
index 000000000000..bf3cba115897
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot2 __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_add(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_rbtree_add(&groot, &n->node, less);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_remove(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_rbtree_remove(&groot, &n->node);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_first(void *ctx)
+{
+ bpf_rbtree_first(&groot);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_remove_unadded_node(void *ctx)
+{
+ struct node_data *n, *m;
+ struct bpf_rb_node *res;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ /* This remove should pass verifier */
+ res = bpf_rbtree_remove(&groot, &n->node);
+ n = container_of(res, struct node_data, node);
+
+ /* This remove shouldn't, m isn't in an rbtree */
+ res = bpf_rbtree_remove(&groot, &m->node);
+ m = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=2 alloc_insn=11")
+long rbtree_api_remove_no_drop(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto unlock_err;
+
+ res = bpf_rbtree_remove(&groot, res);
+
+ n = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ /* bpf_obj_drop(n) is missing here */
+ return 0;
+
+unlock_err:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_to_multiple_trees(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ /* This add should fail since n already in groot's tree */
+ bpf_rbtree_add(&groot2, &n->node, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_add_release_unlock_escape(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After add() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_release_aliasing(void *ctx)
+{
+ struct node_data *n, *m, *o;
+ struct bpf_rb_node *res;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+
+ /* m and o point to the same node,
+ * but verifier doesn't know this
+ */
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ return 1;
+ o = container_of(res, struct node_data, node);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ return 1;
+ m = container_of(res, struct node_data, node);
+
+ bpf_rbtree_remove(&groot, &m->node);
+ /* This second remove shouldn't be possible. Retval of previous
+ * remove returns owning reference to m, which is the same
+ * node o's non-owning ref is pointing at
+ *
+ * In order to preserve property
+ * * owning ref must not be in rbtree
+ * * non-owning ref must be in rbtree
+ *
+ * o's ref must be invalidated after previous remove. Otherwise
+ * we'd have non-owning ref to node that isn't in rbtree, and
+ * verifier wouldn't be able to use type system to prevent remove
+ * of ref that already isn't in any tree. Would have to do runtime
+ * checks in that case.
+ */
+ bpf_rbtree_remove(&groot, &o->node);
+
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_first_release_unlock_escape(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (res)
+ n = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After first() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+static bool less__bad_fn_call_add(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_add(&groot, &node_a->node, less);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_remove(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_remove(&groot, &node_a->node);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_first_unlock_after(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+
+ return node_a->key < node_b->key;
+}
+
+static __always_inline
+long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, cb);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_add);
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove not allowed in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_remove(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_remove);
+}
+
+SEC("?tc")
+__failure __msg("can't spin_{lock,unlock} in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_first_unlock_after(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_first_unlock_after);
+}
+
+char _license[] SEC("license") = "GPL";