diff options
author | Alexei Starovoitov <ast@kernel.org> | 2023-02-10 18:59:57 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2023-02-10 19:00:13 -0800 |
commit | ab86cf337a5b64f0456d2d0d01edc939ad5c20bb (patch) | |
tree | 6c9048dbac70ddf85660493933287c979ac4921d /kernel/bpf/core.c | |
parent | 7e2a9ebe8126206157056cca5dd30a656f3d8a81 (diff) | |
parent | bf3965082491601bf9cd6d9a0ce2d88cb219168a (diff) |
Merge branch 'bpf, mm: introduce cgroup.memory=nobpf'
Yafang Shao says:
====================
The bpf memory accouting has some known problems in contianer
environment,
- The container memory usage is not consistent if there's pinned bpf
program
After the container restart, the leftover bpf programs won't account
to the new generation, so the memory usage of the container is not
consistent. This issue can be resolved by introducing selectable
memcg, but we don't have an agreement on the solution yet. See also
the discussions at https://lwn.net/Articles/905150/ .
- The leftover non-preallocated bpf map can't be limited
The leftover bpf map will be reparented, and thus it will be limited by
the parent, rather than the container itself. Furthermore, if the
parent is destroyed, it be will limited by its parent's parent, and so
on. It can also be resolved by introducing selectable memcg.
- The memory dynamically allocated in bpf prog is charged into root memcg
only
Nowdays the bpf prog can dynamically allocate memory, for example via
bpf_obj_new(), but it only allocate from the global bpf_mem_alloc
pool, so it will charge into root memcg only. That needs to be
addressed by a new proposal.
So let's give the container user an option to disable bpf memory accouting.
The idea of "cgroup.memory=nobpf" is originally by Tejun[1].
[1]. https://lwn.net/ml/linux-mm/YxjOawzlgE458ezL@slm.duckdns.org/
Changes,
v1->v2:
- squash patches (Roman)
- commit log improvement in patch #2. (Johannes)
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r-- | kernel/bpf/core.c | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 16da51093aff8..3390961c4e108 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -35,6 +35,7 @@ #include <linux/bpf_verifier.h> #include <linux/nodemask.h> #include <linux/bpf_mem_alloc.h> +#include <linux/memcontrol.h> #include <asm/barrier.h> #include <asm/unaligned.h> @@ -87,7 +88,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -96,12 +97,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag if (fp == NULL) return NULL; - aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags); + aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (aux == NULL) { vfree(fp); return NULL; } - fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags); + fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (!fp->active) { vfree(fp); kfree(aux); @@ -126,7 +127,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *prog; int cpu; @@ -159,7 +160,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, sizeof(*prog->aux->jited_linfo), - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN)); if (!prog->aux->jited_linfo) return -ENOMEM; @@ -234,7 +235,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *fp; u32 pages; |