summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-04-12 13:27:11 -0700
committerDavid S. Miller <davem@davemloft.net>2021-04-12 13:27:11 -0700
commit645b34a7b544686d82c6a082bc78df33c322cae9 (patch)
tree4f871cebe602618f0bf13b51746d9499c9b7cca9
parenta115d24a636e892ddd1ae58f8e23c78a0390cb68 (diff)
parent2671fa4dc0109d3fb581bc3078fdf17b5d9080f6 (diff)
Merge branch 'netns-sysctl-isolation'
Jonathon Reinhart says: ==================== Ensuring net sysctl isolation This patchset is the result of an audit of /proc/sys/net to prove that it is safe to be mouted read-write in a container when a net namespace is in use. See [1]. The first commit adds code to detect sysctls which are not netns-safe, and can "leak" changes to other net namespaces. My manual audit found, and the above feature confirmed, that there are two nf_conntrack sysctls which are in fact not netns-safe. I considered sending the latter to netfilter-devel, but I think it's better to have both together on net-next: Adding only the former causes undesirable warnings in the kernel log. [1]: https://github.com/opencontainers/runc/issues/2826 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/netfilter/nf_conntrack_standalone.c10
-rw-r--r--net/sysctl_net.c48
2 files changed, 50 insertions, 8 deletions
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3f2cc7b04b20..54d36d3eb905 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1060,16 +1060,10 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
nf_conntrack_standalone_init_dccp_sysctl(net, table);
nf_conntrack_standalone_init_gre_sysctl(net, table);
- /* Don't allow unprivileged users to alter certain sysctls */
- if (net->user_ns != &init_user_ns) {
+ /* Don't allow non-init_net ns to alter global sysctls */
+ if (!net_eq(&init_net, net)) {
table[NF_SYSCTL_CT_MAX].mode = 0444;
table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
- table[NF_SYSCTL_CT_HELPER].mode = 0444;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
- table[NF_SYSCTL_CT_EVENTS].mode = 0444;
-#endif
- table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
- } else if (!net_eq(&init_net, net)) {
table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index d14dab8b6774..f6cb0d4d114c 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -115,9 +115,57 @@ out1:
goto out;
}
+/* Verify that sysctls for non-init netns are safe by either:
+ * 1) being read-only, or
+ * 2) having a data pointer which points outside of the global kernel/module
+ * data segment, and rather into the heap where a per-net object was
+ * allocated.
+ */
+static void ensure_safe_net_sysctl(struct net *net, const char *path,
+ struct ctl_table *table)
+{
+ struct ctl_table *ent;
+
+ pr_debug("Registering net sysctl (net %p): %s\n", net, path);
+ for (ent = table; ent->procname; ent++) {
+ unsigned long addr;
+ const char *where;
+
+ pr_debug(" procname=%s mode=%o proc_handler=%ps data=%p\n",
+ ent->procname, ent->mode, ent->proc_handler, ent->data);
+
+ /* If it's not writable inside the netns, then it can't hurt. */
+ if ((ent->mode & 0222) == 0) {
+ pr_debug(" Not writable by anyone\n");
+ continue;
+ }
+
+ /* Where does data point? */
+ addr = (unsigned long)ent->data;
+ if (is_module_address(addr))
+ where = "module";
+ else if (core_kernel_data(addr))
+ where = "kernel";
+ else
+ continue;
+
+ /* If it is writable and points to kernel/module global
+ * data, then it's probably a netns leak.
+ */
+ WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n",
+ path, ent->procname, where, ent->data);
+
+ /* Make it "safe" by dropping writable perms */
+ ent->mode &= ~0222;
+ }
+}
+
struct ctl_table_header *register_net_sysctl(struct net *net,
const char *path, struct ctl_table *table)
{
+ if (!net_eq(net, &init_net))
+ ensure_safe_net_sysctl(net, path, table);
+
return __register_sysctl_table(&net->sysctls, path, table);
}
EXPORT_SYMBOL_GPL(register_net_sysctl);