summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h9
-rw-r--r--include/linux/cgroup-defs.h100
-rw-r--r--include/linux/cgroup.h24
-rw-r--r--include/trace/events/cgroup.h12
4 files changed, 102 insertions, 43 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9de7adb68294..60d1511b4f4d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -114,8 +114,7 @@ struct bpf_prog_list {
u32 flags;
};
-int cgroup_bpf_inherit(struct cgroup *cgrp);
-void cgroup_bpf_offline(struct cgroup *cgrp);
+void __init cgroup_bpf_lifetime_notifier_init(void);
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
@@ -431,8 +430,10 @@ const struct bpf_func_proto *
cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
#else
-static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
-static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_lifetime_notifier_init(void)
+{
+ return;
+}
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype,
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5bc8f55c8cca..e61687d5e496 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -170,6 +170,23 @@ struct cgroup_subsys_state {
struct percpu_ref refcnt;
/*
+ * Depending on the context, this field is initialized
+ * via css_rstat_init() at different places:
+ *
+ * when css is associated with cgroup::self
+ * when css->cgroup is the root cgroup
+ * performed in cgroup_init()
+ * when css->cgroup is not the root cgroup
+ * performed in cgroup_create()
+ * when css is associated with a subsystem
+ * when css->cgroup is the root cgroup
+ * performed in cgroup_init_subsys() in the non-early path
+ * when css->cgroup is not the root cgroup
+ * performed in css_create()
+ */
+ struct css_rstat_cpu __percpu *rstat_cpu;
+
+ /*
* siblings list anchored at the parent's ->children
*
* linkage is protected by cgroup_mutex or RCU
@@ -177,9 +194,6 @@ struct cgroup_subsys_state {
struct list_head sibling;
struct list_head children;
- /* flush target list anchored at cgrp->rstat_css_list */
- struct list_head rstat_css_node;
-
/*
* PI: Subsys-unique ID. 0 is unused and root is always 1. The
* matching css can be looked up using css_from_id().
@@ -219,6 +233,16 @@ struct cgroup_subsys_state {
* Protected by cgroup_mutex.
*/
int nr_descendants;
+
+ /*
+ * A singly-linked list of css structures to be rstat flushed.
+ * This is a scratch field to be used exclusively by
+ * css_rstat_flush().
+ *
+ * Protected by rstat_base_lock when css is cgroup::self.
+ * Protected by css->ss->rstat_ss_lock otherwise.
+ */
+ struct cgroup_subsys_state *rstat_flush_next;
};
/*
@@ -329,10 +353,10 @@ struct cgroup_base_stat {
/*
* rstat - cgroup scalable recursive statistics. Accounting is done
- * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * per-cpu in css_rstat_cpu which is then lazily propagated up the
* hierarchy on reads.
*
- * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
+ * When a stat gets updated, the css_rstat_cpu and its ancestors are
* linked into the updated tree. On the following read, propagation only
* considers and consumes the updated tree. This makes reading O(the
* number of descendants which have been active since last read) instead of
@@ -344,10 +368,29 @@ struct cgroup_base_stat {
* frequency decreases the cost of each read.
*
* This struct hosts both the fields which implement the above -
- * updated_children and updated_next - and the fields which track basic
- * resource statistics on top of it - bsync, bstat and last_bstat.
+ * updated_children and updated_next.
*/
-struct cgroup_rstat_cpu {
+struct css_rstat_cpu {
+ /*
+ * Child cgroups with stat updates on this cpu since the last read
+ * are linked on the parent's ->updated_children through
+ * ->updated_next. updated_children is terminated by its container css.
+ *
+ * In addition to being more compact, singly-linked list pointing to
+ * the css makes it unnecessary for each per-cpu struct to point back
+ * to the associated css.
+ *
+ * Protected by per-cpu css->ss->rstat_ss_cpu_lock.
+ */
+ struct cgroup_subsys_state *updated_children;
+ struct cgroup_subsys_state *updated_next; /* NULL if not on the list */
+};
+
+/*
+ * This struct hosts the fields which track basic resource statistics on
+ * top of it - bsync, bstat and last_bstat.
+ */
+struct cgroup_rstat_base_cpu {
/*
* ->bsync protects ->bstat. These are the only fields which get
* updated in the hot path.
@@ -374,20 +417,6 @@ struct cgroup_rstat_cpu {
* deltas to propagate to the per-cpu subtree_bstat.
*/
struct cgroup_base_stat last_subtree_bstat;
-
- /*
- * Child cgroups with stat updates on this cpu since the last read
- * are linked on the parent's ->updated_children through
- * ->updated_next.
- *
- * In addition to being more compact, singly-linked list pointing
- * to the cgroup makes it unnecessary for each per-cpu struct to
- * point back to the associated cgroup.
- *
- * Protected by per-cpu cgroup_rstat_cpu_lock.
- */
- struct cgroup *updated_children; /* terminated by self cgroup */
- struct cgroup *updated_next; /* NULL iff not on the list */
};
struct cgroup_freezer_state {
@@ -516,23 +545,23 @@ struct cgroup {
struct cgroup *dom_cgrp;
struct cgroup *old_dom_cgrp; /* used while enabling threaded */
- /* per-cpu recursive resource statistics */
- struct cgroup_rstat_cpu __percpu *rstat_cpu;
- struct list_head rstat_css_list;
-
/*
- * Add padding to separate the read mostly rstat_cpu and
- * rstat_css_list into a different cacheline from the following
- * rstat_flush_next and *bstat fields which can have frequent updates.
+ * Depending on the context, this field is initialized via
+ * css_rstat_init() at different places:
+ *
+ * when cgroup is the root cgroup
+ * performed in cgroup_setup_root()
+ * otherwise
+ * performed in cgroup_create()
*/
- CACHELINE_PADDING(_pad_);
+ struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
/*
- * A singly-linked list of cgroup structures to be rstat flushed.
- * This is a scratch field to be used exclusively by
- * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+ * Add padding to keep the read mostly rstat per-cpu pointer on a
+ * different cacheline than the following *bstat fields which can have
+ * frequent updates.
*/
- struct cgroup *rstat_flush_next;
+ CACHELINE_PADDING(_pad_);
/* cgroup basic resource statistics */
struct cgroup_base_stat last_bstat;
@@ -790,6 +819,9 @@ struct cgroup_subsys {
* specifies the mask of subsystems that this one depends on.
*/
unsigned int depends_on;
+
+ spinlock_t rstat_ss_lock;
+ raw_spinlock_t __percpu *rstat_ss_cpu_lock;
};
extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 166d6de50dbf..b18fb5fcb38e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -19,6 +19,7 @@
#include <linux/kernfs.h>
#include <linux/jump_label.h>
#include <linux/types.h>
+#include <linux/notifier.h>
#include <linux/ns_common.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
@@ -40,7 +41,7 @@ struct kernel_clone_args;
#ifdef CONFIG_CGROUPS
-enum {
+enum css_task_iter_flags {
CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */
@@ -66,10 +67,16 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+enum cgroup_lifetime_events {
+ CGROUP_LIFETIME_ONLINE,
+ CGROUP_LIFETIME_OFFLINE,
+};
+
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
extern spinlock_t css_set_lock;
+extern struct blocking_notifier_head cgroup_lifetime_notifier;
#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
@@ -347,6 +354,17 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css)
return css->flags & CSS_DYING;
}
+static inline bool css_is_self(struct cgroup_subsys_state *css)
+{
+ if (css == &css->cgroup->self) {
+ /* cgroup::self should not have subsystem association */
+ WARN_ON(css->ss != NULL);
+ return true;
+ }
+
+ return false;
+}
+
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
@@ -688,8 +706,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
/*
* cgroup scalable recursive statistics.
*/
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
-void cgroup_rstat_flush(struct cgroup *cgrp);
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
/*
* Basic resource stats.
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index af2755bda6eb..7d332387be6c 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -231,7 +231,11 @@ DECLARE_EVENT_CLASS(cgroup_rstat,
__entry->cpu, __entry->contended)
);
-/* Related to global: cgroup_rstat_lock */
+/*
+ * Related to locks:
+ * global rstat_base_lock for base stats
+ * cgroup_subsys::rstat_ss_lock for subsystem stats
+ */
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_lock_contended,
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),
@@ -253,7 +257,11 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_unlock,
TP_ARGS(cgrp, cpu, contended)
);
-/* Related to per CPU: cgroup_rstat_cpu_lock */
+/*
+ * Related to per CPU locks:
+ * global rstat_base_cpu_lock for base stats
+ * cgroup_subsys::rstat_ss_cpu_lock for subsystem stats
+ */
DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_lock_contended,
TP_PROTO(struct cgroup *cgrp, int cpu, bool contended),