diff options
author | Alexei Starovoitov <ast@kernel.org> | 2023-02-10 18:59:57 -0800 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2023-02-10 19:00:13 -0800 |
commit | ab86cf337a5b64f0456d2d0d01edc939ad5c20bb (patch) | |
tree | 6c9048dbac70ddf85660493933287c979ac4921d /mm/memcontrol.c | |
parent | 7e2a9ebe8126206157056cca5dd30a656f3d8a81 (diff) | |
parent | bf3965082491601bf9cd6d9a0ce2d88cb219168a (diff) |
Merge branch 'bpf, mm: introduce cgroup.memory=nobpf'
Yafang Shao says:
====================
The bpf memory accouting has some known problems in contianer
environment,
- The container memory usage is not consistent if there's pinned bpf
program
After the container restart, the leftover bpf programs won't account
to the new generation, so the memory usage of the container is not
consistent. This issue can be resolved by introducing selectable
memcg, but we don't have an agreement on the solution yet. See also
the discussions at https://lwn.net/Articles/905150/ .
- The leftover non-preallocated bpf map can't be limited
The leftover bpf map will be reparented, and thus it will be limited by
the parent, rather than the container itself. Furthermore, if the
parent is destroyed, it be will limited by its parent's parent, and so
on. It can also be resolved by introducing selectable memcg.
- The memory dynamically allocated in bpf prog is charged into root memcg
only
Nowdays the bpf prog can dynamically allocate memory, for example via
bpf_obj_new(), but it only allocate from the global bpf_mem_alloc
pool, so it will charge into root memcg only. That needs to be
addressed by a new proposal.
So let's give the container user an option to disable bpf memory accouting.
The idea of "cgroup.memory=nobpf" is originally by Tejun[1].
[1]. https://lwn.net/ml/linux-mm/YxjOawzlgE458ezL@slm.duckdns.org/
Changes,
v1->v2:
- squash patches (Roman)
- commit log improvement in patch #2. (Johannes)
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 73afff8062f9b..49f40730e7117 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -88,6 +88,9 @@ static bool cgroup_memory_nosocket __ro_after_init; /* Kernel memory accounting disabled? */ static bool cgroup_memory_nokmem __ro_after_init; +/* BPF memory accounting disabled? */ +static bool cgroup_memory_nobpf __ro_after_init; + #ifdef CONFIG_CGROUP_WRITEBACK static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); #endif @@ -347,6 +350,9 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, */ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key); EXPORT_SYMBOL(memcg_kmem_enabled_key); + +DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key); +EXPORT_SYMBOL(memcg_bpf_enabled_key); #endif /** @@ -5357,6 +5363,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) static_branch_inc(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) + if (!cgroup_memory_nobpf) + static_branch_inc(&memcg_bpf_enabled_key); +#endif + return &memcg->css; } @@ -5441,6 +5452,11 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_active) static_branch_dec(&memcg_sockets_enabled_key); +#if defined(CONFIG_MEMCG_KMEM) + if (!cgroup_memory_nobpf) + static_branch_dec(&memcg_bpf_enabled_key); +#endif + vmpressure_cleanup(&memcg->vmpressure); cancel_work_sync(&memcg->high_work); mem_cgroup_remove_from_trees(memcg); @@ -7269,6 +7285,8 @@ static int __init cgroup_memory(char *s) cgroup_memory_nosocket = true; if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; + if (!strcmp(token, "nobpf")) + cgroup_memory_nobpf = true; } return 1; } |