diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 185 |
1 files changed, 104 insertions, 81 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 1b466c54a357..e13d9ab4f564 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -355,12 +355,13 @@ static struct mount *alloc_vfsmnt(const char *name) if (err) goto out_free_cache; - if (name) { + if (name) mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL_ACCOUNT); - if (!mnt->mnt_devname) - goto out_free_id; - } + else + mnt->mnt_devname = "none"; + if (!mnt->mnt_devname) + goto out_free_id; #ifdef CONFIG_SMP mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); @@ -1264,7 +1265,7 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) if (!fc->root) return ERR_PTR(-EINVAL); - mnt = alloc_vfsmnt(fc->source ?: "none"); + mnt = alloc_vfsmnt(fc->source); if (!mnt) return ERR_PTR(-ENOMEM); @@ -1325,21 +1326,6 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type, } EXPORT_SYMBOL_GPL(vfs_kern_mount); -struct vfsmount * -vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, - const char *name, void *data) -{ - /* Until it is worked out how to pass the user namespace - * through from the parent mount to the submount don't support - * unprivileged mounts with submounts. - */ - if (mountpoint->d_sb->s_user_ns != &init_user_ns) - return ERR_PTR(-EPERM); - - return vfs_kern_mount(type, SB_SUBMOUNT, name, data); -} -EXPORT_SYMBOL_GPL(vfs_submount); - static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { @@ -2424,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -bool has_locked_children(struct mount *mnt, struct dentry *dentry) +static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2438,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + bool res; + + read_seqlock_excl(&mount_lock); + res = __has_locked_children(mnt, dentry); + read_sequnlock_excl(&mount_lock); + return res; +} + /* * Check that there aren't references to earlier/same mount namespaces in the * specified subtree. Such references can act as pins for mount namespaces @@ -2482,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - if (mnt_has_parent(old_mnt)) { - if (!check_mnt(old_mnt)) - return ERR_PTR(-EINVAL); - } else { - if (!is_mounted(&old_mnt->mnt)) - return ERR_PTR(-EINVAL); - - /* Make sure this isn't something purely kernel internal. */ - if (!is_anon_ns(old_mnt->mnt_ns)) + /* + * Make sure the source mount is acceptable. + * Anything mounted in our mount namespace is allowed. + * Otherwise, it must be the root of an anonymous mount + * namespace, and we need to make sure no namespace + * loops get created. + */ + if (!check_mnt(old_mnt)) { + if (!is_mounted(&old_mnt->mnt) || + !is_anon_ns(old_mnt->mnt_ns) || + mnt_has_parent(old_mnt)) return ERR_PTR(-EINVAL); - /* Make sure we don't create mount namespace loops. */ if (!check_for_nsfs_mounts(old_mnt)) return ERR_PTR(-EINVAL); } - if (has_locked_children(old_mnt, path->dentry)) + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + if (__has_locked_children(old_mnt, path->dentry)) return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); @@ -2944,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + if (!check_mnt(mnt)) { + err = -EINVAL; + goto out_unlock; + } if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3035,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse) if (!may_copy_tree(old_path)) return mnt; - if (!recurse && has_locked_children(old, old_path->dentry)) + if (!recurse && __has_locked_children(old, old_path->dentry)) return mnt; if (recurse) @@ -3428,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) goto out; /* From mount should not have locked children in place of To's root */ - if (has_locked_children(from, to->mnt.mnt_root)) + if (__has_locked_children(from, to->mnt.mnt_root)) goto out; /* Setting sharing groups is only allowed on private mounts */ @@ -3442,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) if (IS_MNT_SLAVE(from)) { struct mount *m = from->mnt_master; - list_add(&to->mnt_slave, &m->mnt_slave_list); + list_add(&to->mnt_slave, &from->mnt_slave); to->mnt_master = m; } @@ -3467,18 +3471,25 @@ out: * Check if path is overmounted, i.e., if there's a mount on top of * @path->mnt with @path->dentry as mountpoint. * - * Context: This function expects namespace_lock() to be held. + * Context: namespace_sem must be held at least shared. + * MUST NOT be called under lock_mount_hash() (there one should just + * call __lookup_mnt() and check if it returns NULL). * Return: If path is overmounted true is returned, false if not. */ static inline bool path_overmounted(const struct path *path) { + unsigned seq = read_seqbegin(&mount_lock); + bool no_child; + rcu_read_lock(); - if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { - rcu_read_unlock(); - return true; - } + no_child = !__lookup_mnt(path->mnt, path->dentry); rcu_read_unlock(); - return false; + if (need_seqretry(&mount_lock, seq)) { + read_seqlock_excl(&mount_lock); + no_child = !__lookup_mnt(path->mnt, path->dentry); + read_sequnlock_excl(&mount_lock); + } + return unlikely(!no_child); } /** @@ -3637,46 +3648,41 @@ static int do_move_mount(struct path *old_path, ns = old->mnt_ns; err = -EINVAL; - if (!may_use_mount(p)) - goto out; - /* The thing moved must be mounted... */ if (!is_mounted(&old->mnt)) goto out; - /* ... and either ours or the root of anon namespace */ - if (!(attached ? check_mnt(old) : is_anon_ns(ns))) - goto out; - - if (is_anon_ns(ns)) { + if (check_mnt(old)) { + /* if the source is in our namespace... */ + /* ... it should be detachable from parent */ + if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) + goto out; + /* ... and the target should be in our namespace */ + if (!check_mnt(p)) + goto out; + } else { /* - * Ending up with two files referring to the root of the - * same anonymous mount namespace would cause an error - * as this would mean trying to move the same mount - * twice into the mount tree which would be rejected - * later. But be explicit about it right here. + * otherwise the source must be the root of some anon namespace. + * AV: check for mount being root of an anon namespace is worth + * an inlined predicate... */ - if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns)) + if (!is_anon_ns(ns) || mnt_has_parent(old)) goto out; - /* - * If this is an anonymous mount tree ensure that mount - * propagation can detect mounts that were just - * propagated to the target mount tree so we don't - * propagate onto them. + * Bail out early if the target is within the same namespace - + * subsequent checks would've rejected that, but they lose + * some corner cases if we check it early. */ - ns->mntns_flags |= MNTNS_PROPAGATING; - } else if (is_anon_ns(p->mnt_ns)) { + if (ns == p->mnt_ns) + goto out; /* - * Don't allow moving an attached mount tree to an - * anonymous mount tree. + * Target should be either in our namespace or in an acceptable + * anon namespace, sensu check_anonymous_mnt(). */ - goto out; + if (!may_use_mount(p)) + goto out; } - if (old->mnt.mnt_flags & MNT_LOCKED) - goto out; - if (!path_mounted(old_path)) goto out; @@ -3722,8 +3728,6 @@ static int do_move_mount(struct path *old_path, if (attached) put_mountpoint(old_mp); out: - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; unlock_mount(mp); if (!err) { if (attached) { @@ -3899,10 +3903,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) return PTR_ERR(m); mnt = real_mount(m); - /* The new mount record should have at least 2 refs to prevent it being - * expired before we get a chance to add it - */ - BUG_ON(mnt_get_count(mnt) < 2); if (m->mnt_sb == path->mnt->mnt_sb && m->mnt_root == dentry) { @@ -3935,7 +3935,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) unlock_mount(mp); if (unlikely(err)) goto discard; - mntput(m); return 0; discard_locked: @@ -3949,7 +3948,6 @@ discard: namespace_unlock(); } mntput(m); - mntput(m); return err; } @@ -3986,11 +3984,14 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* extract from the expiration list every vfsmount that matches the * following criteria: + * - already mounted * - only referenced by its parent vfsmount * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { + if (!is_mounted(&mnt->mnt)) + continue; if (!xchg(&mnt->mnt_expiry_mark, 1) || propagate_mount_busy(mnt, 1)) continue; @@ -5491,7 +5492,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq) seq->buf[seq->count] = '\0'; seq->count = start; seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL)); - } else if (r->mnt_devname) { + } else { seq_puts(seq, r->mnt_devname); } return 0; @@ -5804,7 +5805,9 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root) STATMOUNT_SB_SOURCE | \ STATMOUNT_OPT_ARRAY | \ STATMOUNT_OPT_SEC_ARRAY | \ - STATMOUNT_SUPPORTED_MASK) + STATMOUNT_SUPPORTED_MASK | \ + STATMOUNT_MNT_UIDMAP | \ + STATMOUNT_MNT_GIDMAP) static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, struct mnt_namespace *ns) @@ -5839,13 +5842,29 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, return err; s->root = root; - s->idmap = mnt_idmap(s->mnt); - if (s->mask & STATMOUNT_SB_BASIC) - statmount_sb_basic(s); + /* + * Note that mount properties in mnt->mnt_flags, mnt->mnt_idmap + * can change concurrently as we only hold the read-side of the + * namespace semaphore and mount properties may change with only + * the mount lock held. + * + * We could sample the mount lock sequence counter to detect + * those changes and retry. But it's not worth it. Worst that + * happens is that the mnt->mnt_idmap pointer is already changed + * while mnt->mnt_flags isn't or vica versa. So what. + * + * Both mnt->mnt_flags and mnt->mnt_idmap are set and retrieved + * via READ_ONCE()/WRITE_ONCE() and guard against theoretical + * torn read/write. That's all we care about right now. + */ + s->idmap = mnt_idmap(s->mnt); if (s->mask & STATMOUNT_MNT_BASIC) statmount_mnt_basic(s); + if (s->mask & STATMOUNT_SB_BASIC) + statmount_sb_basic(s); + if (s->mask & STATMOUNT_PROPAGATE_FROM) statmount_propagate_from(s); @@ -6157,6 +6176,10 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) return -ENOENT; + /* + * We only need to guard against mount topology changes as + * listmount() doesn't care about any mount properties. + */ scoped_guard(rwsem_read, &namespace_sem) ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids, nr_mnt_ids, (flags & LISTMOUNT_REVERSE)); |