summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 09:33:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-26 09:33:44 -0700
commit8dd53535f1e129b7d75c512dc271bff76461ab6b (patch)
treedb1ce1e0ed6ab44f8a6905e5e75191de35dc6406
parent181d8e399f50c0683b12d40432bb6e1ca5c58d37 (diff)
parent1afe9e7da8c0ab3c17d4a469ed4c0607024cf0d4 (diff)
Merge tag 'vfs-6.16-rc1.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs freezing updates from Christian Brauner: "This contains various filesystem freezing related work for this cycle: - Allow the power subsystem to support filesystem freeze for suspend and hibernate. Now all the pieces are in place to actually allow the power subsystem to freeze/thaw filesystems during suspend/resume. Filesystems are only frozen and thawed if the power subsystem does actually own the freeze. If the filesystem is already frozen by the time we've frozen all userspace processes we don't care to freeze it again. That's userspace's job once the process resumes. We only actually freeze filesystems if we absolutely have to and we ignore other failures to freeze. We could bubble up errors and fail suspend/resume if the error isn't EBUSY (aka it's already frozen) but I don't think that this is worth it. Filesystem freezing during suspend/resume is best-effort. If the user has 500 ext4 filesystems mounted and 4 fail to freeze for whatever reason then we simply skip them. What we have now is already a big improvement and let's see how we fare with it before making our lives even harder (and uglier) than we have to. - Allow efivars to support freeze and thaw Allow efivarfs to partake to resync variable state during system hibernation and suspend. Add freeze/thaw support. This is a pretty straightforward implementation. We simply add regular freeze/thaw support for both userspace and the kernel. efivars is the first pseudofilesystem that adds support for filesystem freezing and thawing. The simplicity comes from the fact that we simply always resync variable state after efivarfs has been frozen. It doesn't matter whether that's because of suspend, userspace initiated freeze or hibernation. Efivars is simple enough that it doesn't matter that we walk all dentries. There are no directories and there aren't insane amounts of entries and both freeze/thaw are already heavy-handed operations. If userspace initiated a freeze/thaw cycle they would need CAP_SYS_ADMIN in the initial user namespace (as that's where efivarfs is mounted) so it can't be triggered by random userspace. IOW, we really really don't care" * tag 'vfs-6.16-rc1.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: f2fs: fix freezing filesystem during resize kernfs: add warning about implementing freeze/thaw efivarfs: support freeze/thaw power: freeze filesystems during suspend/resume libfs: export find_next_child() super: add filesystem freezing helpers for suspend and hibernate gfs2: pass through holder from the VFS for freeze/thaw super: use common iterator (Part 2) super: use a common iterator (Part 1) super: skip dying superblocks early super: simplify user_get_super() super: remove pointless s_root checks fs: allow all writers to be frozen locking/percpu-rwsem: add freezable alternative to down_read
-rw-r--r--fs/efivarfs/internal.h1
-rw-r--r--fs/efivarfs/super.c195
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/gfs2/super.c24
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/internal.h1
-rw-r--r--fs/ioctl.c8
-rw-r--r--fs/kernfs/mount.c15
-rw-r--r--fs/libfs.c3
-rw-r--r--fs/super.c316
-rw-r--r--fs/xfs/scrub/fscounters.c4
-rw-r--r--fs/xfs/xfs_notify_failure.c6
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/percpu-rwsem.h20
-rw-r--r--kernel/locking/percpu-rwsem.c13
-rw-r--r--kernel/power/hibernate.c16
-rw-r--r--kernel/power/main.c31
-rw-r--r--kernel/power/power.h4
-rw-r--r--kernel/power/suspend.c7
19 files changed, 417 insertions, 276 deletions
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index ac6a1dd0a6a5c..f913b68242895 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -17,7 +17,6 @@ struct efivarfs_fs_info {
struct efivarfs_mount_opts mount_opts;
struct super_block *sb;
struct notifier_block nb;
- struct notifier_block pm_nb;
};
struct efi_variable {
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index b2de4079864c9..c900d98bf4945 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -21,6 +21,7 @@
#include <linux/namei.h>
#include "internal.h"
+#include "../internal.h"
static int efivarfs_ops_notifier(struct notifier_block *nb, unsigned long event,
void *data)
@@ -120,12 +121,18 @@ static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+
+static int efivarfs_freeze_fs(struct super_block *sb);
+static int efivarfs_unfreeze_fs(struct super_block *sb);
+
static const struct super_operations efivarfs_ops = {
.statfs = efivarfs_statfs,
.drop_inode = generic_delete_inode,
.alloc_inode = efivarfs_alloc_inode,
.free_inode = efivarfs_free_inode,
.show_options = efivarfs_show_options,
+ .freeze_fs = efivarfs_freeze_fs,
+ .unfreeze_fs = efivarfs_unfreeze_fs,
};
/*
@@ -365,8 +372,6 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc)
if (err)
return err;
- register_pm_notifier(&sfi->pm_nb);
-
return efivar_init(efivarfs_callback, sb, true);
}
@@ -391,48 +396,6 @@ static const struct fs_context_operations efivarfs_context_ops = {
.reconfigure = efivarfs_reconfigure,
};
-struct efivarfs_ctx {
- struct dir_context ctx;
- struct super_block *sb;
- struct dentry *dentry;
-};
-
-static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
- loff_t offset, u64 ino, unsigned mode)
-{
- unsigned long size;
- struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx);
- struct dentry *dentry = try_lookup_noperm(&QSTR_LEN(name, len),
- ectx->sb->s_root);
- struct inode *inode;
- struct efivar_entry *entry;
- int err;
-
- if (IS_ERR_OR_NULL(dentry))
- return true;
-
- inode = d_inode(dentry);
- entry = efivar_entry(inode);
-
- err = efivar_entry_size(entry, &size);
- size += sizeof(__u32); /* attributes */
- if (err)
- size = 0;
-
- inode_lock_nested(inode, I_MUTEX_CHILD);
- i_size_write(inode, size);
- inode_unlock(inode);
-
- if (!size) {
- ectx->dentry = dentry;
- return false;
- }
-
- dput(dentry);
-
- return true;
-}
-
static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
unsigned long name_size, void *data)
{
@@ -469,111 +432,59 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
return err;
}
-static void efivarfs_deactivate_super_work(struct work_struct *work)
-{
- struct super_block *s = container_of(work, struct super_block,
- destroy_work);
- /*
- * note: here s->destroy_work is free for reuse (which
- * will happen in deactivate_super)
- */
- deactivate_super(s);
-}
-
static struct file_system_type efivarfs_type;
-static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
- void *ptr)
+static int efivarfs_freeze_fs(struct super_block *sb)
{
- struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
- pm_nb);
- struct path path;
- struct efivarfs_ctx ectx = {
- .ctx = {
- .actor = efivarfs_actor,
- },
- .sb = sfi->sb,
- };
- struct file *file;
- struct super_block *s = sfi->sb;
- static bool rescan_done = true;
-
- if (action == PM_HIBERNATION_PREPARE) {
- rescan_done = false;
- return NOTIFY_OK;
- } else if (action != PM_POST_HIBERNATION) {
- return NOTIFY_DONE;
- }
-
- if (rescan_done)
- return NOTIFY_DONE;
-
- /* ensure single superblock is alive and pin it */
- if (!atomic_inc_not_zero(&s->s_active))
- return NOTIFY_DONE;
-
- pr_info("efivarfs: resyncing variable state\n");
-
- path.dentry = sfi->sb->s_root;
-
- /*
- * do not add SB_KERNMOUNT which a single superblock could
- * expose to userspace and which also causes MNT_INTERNAL, see
- * below
- */
- path.mnt = vfs_kern_mount(&efivarfs_type, 0,
- efivarfs_type.name, NULL);
- if (IS_ERR(path.mnt)) {
- pr_err("efivarfs: internal mount failed\n");
- /*
- * We may be the last pinner of the superblock but
- * calling efivarfs_kill_sb from within the notifier
- * here would deadlock trying to unregister it
- */
- INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
- schedule_work(&s->destroy_work);
- return PTR_ERR(path.mnt);
- }
-
- /* path.mnt now has pin on superblock, so this must be above one */
- atomic_dec(&s->s_active);
-
- file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
- current_cred());
- /*
- * safe even if last put because no MNT_INTERNAL means this
- * will do delayed deactivate_super and not deadlock
- */
- mntput(path.mnt);
- if (IS_ERR(file))
- return NOTIFY_DONE;
+ /* Nothing for us to do. */
+ return 0;
+}
- rescan_done = true;
+static int efivarfs_unfreeze_fs(struct super_block *sb)
+{
+ struct dentry *child = NULL;
/*
- * First loop over the directory and verify each entry exists,
- * removing it if it doesn't
+ * Unconditionally resync the variable state on a thaw request.
+ * Given the size of efivarfs it really doesn't matter to simply
+ * iterate through all of the entries and resync. Freeze/thaw
+ * requests are rare enough for that to not matter and the
+ * number of entries is pretty low too. So we really don't care.
*/
- file->f_pos = 2; /* skip . and .. */
- do {
- ectx.dentry = NULL;
- iterate_dir(file, &ectx.ctx);
- if (ectx.dentry) {
- pr_info("efivarfs: removing variable %pd\n",
- ectx.dentry);
- simple_recursive_removal(ectx.dentry, NULL);
- dput(ectx.dentry);
+ pr_info("efivarfs: resyncing variable state\n");
+ for (;;) {
+ int err;
+ unsigned long size = 0;
+ struct inode *inode;
+ struct efivar_entry *entry;
+
+ child = find_next_child(sb->s_root, child);
+ if (!child)
+ break;
+
+ inode = d_inode(child);
+ entry = efivar_entry(inode);
+
+ err = efivar_entry_size(entry, &size);
+ if (err)
+ size = 0;
+ else
+ size += sizeof(__u32);
+
+ inode_lock(inode);
+ i_size_write(inode, size);
+ inode_unlock(inode);
+
+ /* The variable doesn't exist anymore, delete it. */
+ if (!size) {
+ pr_info("efivarfs: removing variable %pd\n", child);
+ simple_recursive_removal(child, NULL);
}
- } while (ectx.dentry);
- fput(file);
-
- /*
- * then loop over variables, creating them if there's no matching
- * dentry
- */
- efivar_init(efivarfs_check_missing, sfi->sb, false);
+ }
- return NOTIFY_OK;
+ efivar_init(efivarfs_check_missing, sb, false);
+ pr_info("efivarfs: finished resyncing variable state\n");
+ return 0;
}
static int efivarfs_init_fs_context(struct fs_context *fc)
@@ -593,9 +504,6 @@ static int efivarfs_init_fs_context(struct fs_context *fc)
fc->s_fs_info = sfi;
fc->ops = &efivarfs_context_ops;
- sfi->pm_nb.notifier_call = efivarfs_pm_notify;
- sfi->pm_nb.priority = 0;
-
return 0;
}
@@ -605,7 +513,6 @@ static void efivarfs_kill_sb(struct super_block *sb)
blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
kill_litter_super(sb);
- unregister_pm_notifier(&sfi->pm_nb);
kfree(sfi);
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2b8f9239bede7..dd0ba0532e011 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2271,12 +2271,12 @@ out_drop_write:
if (err)
return err;
- err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ err = freeze_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
- err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ err = thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
if (err)
return err;
return -EROFS;
@@ -2333,6 +2333,6 @@ recover_out:
out_err:
f2fs_up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->gc_lock);
- thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL);
return err;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 44e5658b896c8..436cf168a9f58 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -674,7 +674,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
return sdp->sd_log_error;
}
-static int gfs2_do_thaw(struct gfs2_sbd *sdp)
+static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who, const void *freeze_owner)
{
struct super_block *sb = sdp->sd_vfs;
int error;
@@ -682,7 +682,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail;
- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = thaw_super(sb, who, freeze_owner);
if (!error)
return 0;
@@ -703,14 +703,14 @@ void gfs2_freeze_func(struct work_struct *work)
if (test_bit(SDF_FROZEN, &sdp->sd_flags))
goto freeze_failed;
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
if (error)
goto freeze_failed;
gfs2_freeze_unlock(sdp);
set_bit(SDF_FROZEN, &sdp->sd_flags);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE, NULL);
if (error)
goto out;
@@ -728,10 +728,13 @@ out:
/**
* gfs2_freeze_super - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
@@ -744,7 +747,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
}
for (;;) {
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sb, who, freeze_owner);
if (error) {
fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
error);
@@ -758,7 +761,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
break;
}
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who, freeze_owner);
if (error)
goto out;
@@ -796,10 +799,13 @@ static int gfs2_freeze_fs(struct super_block *sb)
/**
* gfs2_thaw_super - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
+ * @freeze_owner: owner of the freeze
*
*/
-static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
@@ -814,7 +820,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
atomic_inc(&sb->s_active);
gfs2_freeze_unlock(sdp);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who, freeze_owner);
if (!error) {
clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index ecc699f8d9fca..748125653d6c2 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -174,10 +174,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
switch (n) {
case 0:
- error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
+ error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
case 1:
- error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE, NULL);
break;
default:
return -EINVAL;
diff --git a/fs/internal.h b/fs/internal.h
index 0526e88ede39a..393f6c5c24f6b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -344,6 +344,7 @@ static inline bool path_mounted(const struct path *path)
void file_f_owner_release(struct file *file);
bool file_seek_cur_needs_f_lock(struct file *file);
int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+struct dentry *find_next_child(struct dentry *parent, struct dentry *prev);
int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
unsigned int query_flags);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 03d9a11f22475..69107a245b4c2 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -396,8 +396,8 @@ static int ioctl_fsfreeze(struct file *filp)
/* Freeze */
if (sb->s_op->freeze_super)
- return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
- return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return freeze_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int ioctl_fsthaw(struct file *filp)
@@ -409,8 +409,8 @@ static int ioctl_fsthaw(struct file *filp)
/* Thaw */
if (sb->s_op->thaw_super)
- return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
- return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int ioctl_file_dedupe_range(struct file *file,
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index a82fbce25c287..c1719b5778a11 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,6 +62,21 @@ const struct super_operations kernfs_sops = {
.show_options = kernfs_sop_show_options,
.show_path = kernfs_sop_show_path,
+
+ /*
+ * sysfs is built on top of kernfs and sysfs provides the power
+ * management infrastructure to support suspend/hibernate by
+ * writing to various files in /sys/power/. As filesystems may
+ * be automatically frozen during suspend/hibernate implementing
+ * freeze/thaw support for kernfs generically will cause
+ * deadlocks as the suspending/hibernation initiating task will
+ * hold a VFS lock that it will then wait upon to be released.
+ * If freeze/thaw for kernfs is needed talk to the VFS.
+ */
+ .freeze_fs = NULL,
+ .unfreeze_fs = NULL,
+ .freeze_super = NULL,
+ .thaw_super = NULL,
};
static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
diff --git a/fs/libfs.c b/fs/libfs.c
index e28da9574a652..9ea0ecc325a81 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -583,7 +583,7 @@ const struct file_operations simple_offset_dir_operations = {
.fsync = noop_fsync,
};
-static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
+struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{
struct dentry *child = NULL, *d;
@@ -603,6 +603,7 @@ static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev
dput(prev);
return child;
}
+EXPORT_SYMBOL(find_next_child);
void simple_recursive_removal(struct dentry *dentry,
void (*callback)(struct dentry *))
diff --git a/fs/super.c b/fs/super.c
index 6bbdb7e59a8d2..bcc4e87123c8e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -39,7 +39,8 @@
#include <uapi/linux/mount.h>
#include "internal.h"
-static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner);
static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
@@ -887,52 +888,48 @@ void drop_super_exclusive(struct super_block *sb)
}
EXPORT_SYMBOL(drop_super_exclusive);
-static void __iterate_supers(void (*f)(struct super_block *))
-{
- struct super_block *sb, *p = NULL;
-
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (super_flags(sb, SB_DYING))
- continue;
- sb->s_count++;
- spin_unlock(&sb_lock);
+enum super_iter_flags_t {
+ SUPER_ITER_EXCL = (1U << 0),
+ SUPER_ITER_UNLOCKED = (1U << 1),
+ SUPER_ITER_REVERSE = (1U << 2),
+};
- f(sb);
+static inline struct super_block *first_super(enum super_iter_flags_t flags)
+{
+ if (flags & SUPER_ITER_REVERSE)
+ return list_last_entry(&super_blocks, struct super_block, s_list);
+ return list_first_entry(&super_blocks, struct super_block, s_list);
+}
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- p = sb;
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
+static inline struct super_block *next_super(struct super_block *sb,
+ enum super_iter_flags_t flags)
+{
+ if (flags & SUPER_ITER_REVERSE)
+ return list_prev_entry(sb, s_list);
+ return list_next_entry(sb, s_list);
}
-/**
- * iterate_supers - call function for all active superblocks
- * @f: function to call
- * @arg: argument to pass to it
- *
- * Scans the superblock list and calls given function, passing it
- * locked superblock and given argument.
- */
-void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+
+static void __iterate_supers(void (*f)(struct super_block *, void *), void *arg,
+ enum super_iter_flags_t flags)
{
struct super_block *sb, *p = NULL;
+ bool excl = flags & SUPER_ITER_EXCL;
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- bool locked;
+ guard(spinlock)(&sb_lock);
+ for (sb = first_super(flags);
+ !list_entry_is_head(sb, &super_blocks, s_list);
+ sb = next_super(sb, flags)) {
+ if (super_flags(sb, SB_DYING))
+ continue;
sb->s_count++;
spin_unlock(&sb_lock);
- locked = super_lock_shared(sb);
- if (locked) {
- if (sb->s_root)
- f(sb, arg);
- super_unlock_shared(sb);
+ if (flags & SUPER_ITER_UNLOCKED) {
+ f(sb, arg);
+ } else if (super_lock(sb, excl)) {
+ f(sb, arg);
+ super_unlock(sb, excl);
}
spin_lock(&sb_lock);
@@ -942,7 +939,11 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
}
if (p)
__put_super(p);
- spin_unlock(&sb_lock);
+}
+
+void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+{
+ __iterate_supers(f, arg, 0);
}
/**
@@ -963,15 +964,15 @@ void iterate_supers_type(struct file_system_type *type,
hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
bool locked;
+ if (super_flags(sb, SB_DYING))
+ continue;
+
sb->s_count++;
spin_unlock(&sb_lock);
locked = super_lock_shared(sb);
- if (locked) {
- if (sb->s_root)
- f(sb, arg);
- super_unlock_shared(sb);
- }
+ if (locked)
+ f(sb, arg);
spin_lock(&sb_lock);
if (p)
@@ -991,23 +992,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
- if (sb->s_dev == dev) {
- bool locked;
-
- sb->s_count++;
- spin_unlock(&sb_lock);
- /* still alive? */
- locked = super_lock(sb, excl);
- if (locked) {
- if (sb->s_root)
- return sb;
- super_unlock(sb, excl);
- }
- /* nope, got unmounted */
- spin_lock(&sb_lock);
- __put_super(sb);
- break;
- }
+ bool locked;
+
+ if (sb->s_dev != dev)
+ continue;
+
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ locked = super_lock(sb, excl);
+ if (locked)
+ return sb;
+
+ spin_lock(&sb_lock);
+ __put_super(sb);
+ break;
}
spin_unlock(&sb_lock);
return NULL;
@@ -1111,11 +1110,9 @@ cancel_readonly:
return retval;
}
-static void do_emergency_remount_callback(struct super_block *sb)
+static void do_emergency_remount_callback(struct super_block *sb, void *unused)
{
- bool locked = super_lock_excl(sb);
-
- if (locked && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
+ if (sb->s_bdev && !sb_rdonly(sb)) {
struct fs_context *fc;
fc = fs_context_for_reconfigure(sb->s_root,
@@ -1126,13 +1123,12 @@ static void do_emergency_remount_callback(struct super_block *sb)
put_fs_context(fc);
}
}
- if (locked)
- super_unlock_excl(sb);
}
static void do_emergency_remount(struct work_struct *work)
{
- __iterate_supers(do_emergency_remount_callback);
+ __iterate_supers(do_emergency_remount_callback, NULL,
+ SUPER_ITER_EXCL | SUPER_ITER_REVERSE);
kfree(work);
printk("Emergency Remount complete\n");
}
@@ -1148,24 +1144,18 @@ void emergency_remount(void)
}
}
-static void do_thaw_all_callback(struct super_block *sb)
+static void do_thaw_all_callback(struct super_block *sb, void *unused)
{
- bool locked = super_lock_excl(sb);
-
- if (locked && sb->s_root) {
- if (IS_ENABLED(CONFIG_BLOCK))
- while (sb->s_bdev && !bdev_thaw(sb->s_bdev))
- pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
- thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
- return;
- }
- if (locked)
- super_unlock_excl(sb);
+ if (IS_ENABLED(CONFIG_BLOCK))
+ while (sb->s_bdev && !bdev_thaw(sb->s_bdev))
+ pr_warn("Emergency Thaw on %pg\n", sb->s_bdev);
+ thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE, NULL);
+ return;
}
static void do_thaw_all(struct work_struct *work)
{
- __iterate_supers(do_thaw_all_callback);
+ __iterate_supers(do_thaw_all_callback, NULL, SUPER_ITER_EXCL);
kfree(work);
printk(KERN_WARNING "Emergency Thaw complete\n");
}
@@ -1186,6 +1176,66 @@ void emergency_thaw_all(void)
}
}
+static inline bool get_active_super(struct super_block *sb)
+{
+ bool active = false;
+
+ if (super_lock_excl(sb)) {
+ active = atomic_inc_not_zero(&sb->s_active);
+ super_unlock_excl(sb);
+ }
+ return active;
+}
+
+static const char *filesystems_freeze_ptr = "filesystems_freeze";
+
+static void filesystems_freeze_callback(struct super_block *sb, void *unused)
+{
+ if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super)
+ return;
+
+ if (!get_active_super(sb))
+ return;
+
+ if (sb->s_op->freeze_super)
+ sb->s_op->freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+ else
+ freeze_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+
+ deactivate_super(sb);
+}
+
+void filesystems_freeze(void)
+{
+ __iterate_supers(filesystems_freeze_callback, NULL,
+ SUPER_ITER_UNLOCKED | SUPER_ITER_REVERSE);
+}
+
+static void filesystems_thaw_callback(struct super_block *sb, void *unused)
+{
+ if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super)
+ return;
+
+ if (!get_active_super(sb))
+ return;
+
+ if (sb->s_op->thaw_super)
+ sb->s_op->thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+ else
+ thaw_super(sb, FREEZE_EXCL | FREEZE_HOLDER_KERNEL,
+ filesystems_freeze_ptr);
+
+ deactivate_super(sb);
+}
+
+void filesystems_thaw(void)
+{
+ __iterate_supers(filesystems_thaw_callback, NULL, SUPER_ITER_UNLOCKED);
+}
+
static DEFINE_IDA(unnamed_dev_ida);
/**
@@ -1479,10 +1529,10 @@ static int fs_bdev_freeze(struct block_device *bdev)
if (sb->s_op->freeze_super)
error = sb->s_op->freeze_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
else
error = freeze_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
if (!error)
error = sync_blockdev(bdev);
deactivate_super(sb);
@@ -1528,10 +1578,10 @@ static int fs_bdev_thaw(struct block_device *bdev)
if (sb->s_op->thaw_super)
error = sb->s_op->thaw_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
else
error = thaw_super(sb,
- FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE);
+ FREEZE_MAY_NEST | FREEZE_HOLDER_USERSPACE, NULL);
deactivate_super(sb);
return error;
}
@@ -1903,7 +1953,7 @@ static int wait_for_partially_frozen(struct super_block *sb)
}
#define FREEZE_HOLDERS (FREEZE_HOLDER_KERNEL | FREEZE_HOLDER_USERSPACE)
-#define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST)
+#define FREEZE_FLAGS (FREEZE_HOLDERS | FREEZE_MAY_NEST | FREEZE_EXCL)
static inline int freeze_inc(struct super_block *sb, enum freeze_holder who)
{
@@ -1929,11 +1979,34 @@ static inline int freeze_dec(struct super_block *sb, enum freeze_holder who)
return sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount;
}
-static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
+static inline bool may_freeze(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
+ lockdep_assert_held(&sb->s_umount);
+
WARN_ON_ONCE((who & ~FREEZE_FLAGS));
WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);
+ if (who & FREEZE_EXCL) {
+ if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(!freeze_owner))
+ return false;
+ /* This freeze already has a specific owner. */
+ if (sb->s_writers.freeze_owner)
+ return false;
+ /*
+ * This is already frozen multiple times so we're just
+ * going to take a reference count and mark the freeze as
+ * being owned by the caller.
+ */
+ if (sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount)
+ sb->s_writers.freeze_owner = freeze_owner;
+ return true;
+ }
+
if (who & FREEZE_HOLDER_KERNEL)
return (who & FREEZE_MAY_NEST) ||
sb->s_writers.freeze_kcount == 0;
@@ -1943,10 +2016,61 @@ static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
return false;
}
+static inline bool may_unfreeze(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
+{
+ lockdep_assert_held(&sb->s_umount);
+
+ WARN_ON_ONCE((who & ~FREEZE_FLAGS));
+ WARN_ON_ONCE(hweight32(who & FREEZE_HOLDERS) > 1);
+
+ if (who & FREEZE_EXCL) {
+ if (WARN_ON_ONCE(!(who & FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(who & ~(FREEZE_EXCL | FREEZE_HOLDER_KERNEL)))
+ return false;
+ if (WARN_ON_ONCE(!freeze_owner))
+ return false;
+ if (WARN_ON_ONCE(sb->s_writers.freeze_kcount == 0))
+ return false;
+ /* This isn't exclusively frozen. */
+ if (!sb->s_writers.freeze_owner)
+ return false;
+ /* This isn't exclusively frozen by us. */
+ if (sb->s_writers.freeze_owner != freeze_owner)
+ return false;
+ /*
+ * This is still frozen multiple times so we're just
+ * going to drop our reference count and undo our
+ * exclusive freeze.
+ */
+ if ((sb->s_writers.freeze_kcount + sb->s_writers.freeze_ucount) > 1)
+ sb->s_writers.freeze_owner = NULL;
+ return true;
+ }
+
+ if (who & FREEZE_HOLDER_KERNEL) {
+ /*
+ * Someone's trying to steal the reference belonging to
+ * @sb->s_writers.freeze_owner.
+ */
+ if (sb->s_writers.freeze_kcount == 1 &&
+ sb->s_writers.freeze_owner)
+ return false;
+ return sb->s_writers.freeze_kcount > 0;
+ }
+
+ if (who & FREEZE_HOLDER_USERSPACE)
+ return sb->s_writers.freeze_ucount > 0;
+
+ return false;
+}
+
/**
* freeze_super - lock the filesystem and force it into a consistent state
* @sb: the super to lock
* @who: context that wants to freeze
+ * @freeze_owner: owner of the freeze
*
* Syncs the super to make sure the filesystem is consistent and calls the fs's
* freeze_fs. Subsequent calls to this without first thawing the fs may return
@@ -1998,7 +2122,7 @@ static inline bool may_freeze(struct super_block *sb, enum freeze_holder who)
* Return: If the freeze was successful zero is returned. If the freeze
* failed a negative error code is returned.
*/
-int freeze_super(struct super_block *sb, enum freeze_holder who)
+int freeze_super(struct super_block *sb, enum freeze_holder who, const void *freeze_owner)
{
int ret;
@@ -2010,7 +2134,7 @@ int freeze_super(struct super_block *sb, enum freeze_holder who)
retry:
if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
- if (may_freeze(sb, who))
+ if (may_freeze(sb, who, freeze_owner))
ret = !!WARN_ON_ONCE(freeze_inc(sb, who) == 1);
else
ret = -EBUSY;
@@ -2032,6 +2156,7 @@ retry:
if (sb_rdonly(sb)) {
/* Nothing to do really... */
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
+ sb->s_writers.freeze_owner = freeze_owner;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
wake_up_var(&sb->s_writers.frozen);
super_unlock_excl(sb);
@@ -2079,6 +2204,7 @@ retry:
* when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
*/
WARN_ON_ONCE(freeze_inc(sb, who) > 1);
+ sb->s_writers.freeze_owner = freeze_owner;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
wake_up_var(&sb->s_writers.frozen);
lockdep_sb_freeze_release(sb);
@@ -2093,13 +2219,17 @@ EXPORT_SYMBOL(freeze_super);
* removes that state without releasing the other state or unlocking the
* filesystem.
*/
-static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
int error = -EINVAL;
if (sb->s_writers.frozen != SB_FREEZE_COMPLETE)
goto out_unlock;
+ if (!may_unfreeze(sb, who, freeze_owner))
+ goto out_unlock;
+
/*
* All freezers share a single active reference.
* So just unlock in case there are any left.
@@ -2109,6 +2239,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
if (sb_rdonly(sb)) {
sb->s_writers.frozen = SB_UNFROZEN;
+ sb->s_writers.freeze_owner = NULL;
wake_up_var(&sb->s_writers.frozen);
goto out_deactivate;
}
@@ -2126,6 +2257,7 @@ static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
}
sb->s_writers.frozen = SB_UNFROZEN;
+ sb->s_writers.freeze_owner = NULL;
wake_up_var(&sb->s_writers.frozen);
sb_freeze_unlock(sb, SB_FREEZE_FS);
out_deactivate:
@@ -2141,6 +2273,7 @@ out_unlock:
* thaw_super -- unlock filesystem
* @sb: the super to thaw
* @who: context that wants to freeze
+ * @freeze_owner: owner of the freeze
*
* Unlocks the filesystem and marks it writeable again after freeze_super()
* if there are no remaining freezes on the filesystem.
@@ -2154,13 +2287,14 @@ out_unlock:
* have been frozen through the block layer via multiple block devices.
* The filesystem remains frozen until all block devices are unfrozen.
*/
-int thaw_super(struct super_block *sb, enum freeze_holder who)
+int thaw_super(struct super_block *sb, enum freeze_holder who,
+ const void *freeze_owner)
{
if (!super_lock_excl(sb)) {
WARN_ON_ONCE("Dying superblock while thawing!");
return -EINVAL;
}
- return thaw_super_locked(sb, who);
+ return thaw_super_locked(sb, who, freeze_owner);
}
EXPORT_SYMBOL(thaw_super);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index e629663e460ae..9b598c5790ad5 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -123,7 +123,7 @@ xchk_fsfreeze(
{
int error;
- error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsfreeze(sc, error);
return error;
}
@@ -135,7 +135,7 @@ xchk_fsthaw(
int error;
/* This should always succeed, we have a kernel freeze */
- error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL, NULL);
trace_xchk_fsthaw(sc, error);
return error;
}
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index ed8d8ed42f0a2..3545dc1d953c4 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -127,7 +127,7 @@ xfs_dax_notify_failure_freeze(
struct super_block *sb = mp->m_super;
int error;
- error = freeze_super(sb, FREEZE_HOLDER_KERNEL);
+ error = freeze_super(sb, FREEZE_HOLDER_KERNEL, NULL);
if (error)
xfs_emerg(mp, "already frozen by kernel, err=%d", error);
@@ -143,7 +143,7 @@ xfs_dax_notify_failure_thaw(
int error;
if (kernel_frozen) {
- error = thaw_super(sb, FREEZE_HOLDER_KERNEL);
+ error = thaw_super(sb, FREEZE_HOLDER_KERNEL, NULL);
if (error)
xfs_emerg(mp, "still frozen after notify failure, err=%d",
error);
@@ -153,7 +153,7 @@ xfs_dax_notify_failure_thaw(
* Also thaw userspace call anyway because the device is about to be
* removed immediately.
*/
- thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ thaw_super(sb, FREEZE_HOLDER_USERSPACE, NULL);
}
static int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9120fd0759cdd..81f8f0aec5635 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1316,6 +1316,7 @@ struct sb_writers {
unsigned short frozen; /* Is sb frozen? */
int freeze_kcount; /* How many kernel freeze requests? */
int freeze_ucount; /* How many userspace freeze requests? */
+ const void *freeze_owner; /* Owner of the freeze */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1789,7 +1790,7 @@ static inline void __sb_end_write(struct super_block *sb, int level)
static inline void __sb_start_write(struct super_block *sb, int level)
{
- percpu_down_read(sb->s_writers.rw_sem + level - 1);
+ percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true);
}
static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
@@ -2288,6 +2289,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
* @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
* @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
* @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
+ * @FREEZE_EXCL: a freeze that can only be undone by the owner
*
* Indicate who the owner of the freeze or thaw request is and whether
* the freeze needs to be exclusive or can nest.
@@ -2301,6 +2303,7 @@ enum freeze_holder {
FREEZE_HOLDER_KERNEL = (1U << 0),
FREEZE_HOLDER_USERSPACE = (1U << 1),
FREEZE_MAY_NEST = (1U << 2),
+ FREEZE_EXCL = (1U << 3),
};
struct super_operations {
@@ -2314,9 +2317,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *, enum freeze_holder who);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *, enum freeze_holder who);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -2726,8 +2729,10 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-int freeze_super(struct super_block *super, enum freeze_holder who);
-int thaw_super(struct super_block *super, enum freeze_holder who);
+int freeze_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
+int thaw_super(struct super_block *super, enum freeze_holder who,
+ const void *freeze_owner);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -3536,9 +3541,11 @@ extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
-extern void iterate_supers(void (*)(struct super_block *, void *), void *);
+extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg);
extern void iterate_supers_type(struct file_system_type *,
void (*)(struct super_block *, void *), void *);
+void filesystems_freeze(void);
+void filesystems_thaw(void);
extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index af7d75ede6197..288f5235649a5 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, static)
-extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool);
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem,
+ bool freezable)
{
might_sleep();
@@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss)))
this_cpu_inc(*sem->read_count);
else
- __percpu_down_read(sem, false); /* Unconditional memory barrier */
+ __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */
/*
* The preempt_enable() prevents the compiler from
* bleeding the critical section out.
@@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
preempt_enable();
}
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+ percpu_down_read_internal(sem, false);
+}
+
+static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem,
+ bool freeze)
+{
+ percpu_down_read_internal(sem, freeze);
+}
+
static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
bool ret = true;
@@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss)))
this_cpu_inc(*sem->read_count);
else
- ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+ ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */
preempt_enable();
/*
* The barrier() from preempt_enable() prevents the compiler from
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index d6964fc29f513..ef234469baaca 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -138,7 +138,8 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
return !reader; /* wake (readers until) 1 writer */
}
-static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader,
+ bool freeze)
{
DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
bool wait;
@@ -156,7 +157,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
spin_unlock_irq(&sem->waiters.lock);
while (wait) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE |
+ (freeze ? TASK_FREEZABLE : 0));
if (!smp_load_acquire(&wq_entry.private))
break;
schedule();
@@ -164,7 +166,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
__set_current_state(TASK_RUNNING);
}
-bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try,
+ bool freeze)
{
if (__percpu_down_read_trylock(sem))
return true;
@@ -174,7 +177,7 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
preempt_enable();
- percpu_rwsem_wait(sem, /* .reader = */ true);
+ percpu_rwsem_wait(sem, /* .reader = */ true, freeze);
preempt_disable();
trace_contention_end(sem, 0);
@@ -237,7 +240,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
*/
if (!__percpu_down_write_trylock(sem)) {
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
- percpu_rwsem_wait(sem, /* .reader = */ false);
+ percpu_rwsem_wait(sem, /* .reader = */ false, false);
contended = true;
}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 23c0f4e6cb2ff..338c9917d4ee2 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -778,6 +778,8 @@ int hibernate(void)
goto Restore;
ksys_sync_helper();
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
error = freeze_processes();
if (error)
@@ -846,6 +848,7 @@ int hibernate(void)
/* Don't bother checking whether freezer_test_done is true */
freezer_test_done = false;
Exit:
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_HIBERNATION);
Restore:
pm_restore_console();
@@ -882,6 +885,9 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
if (error)
goto restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
+
error = freeze_processes();
if (error)
goto exit;
@@ -941,6 +947,7 @@ thaw:
thaw_processes();
exit:
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_HIBERNATION);
restore:
@@ -1029,19 +1036,26 @@ static int software_resume(void)
if (error)
goto Restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
+
pm_pr_dbg("Preparing processes for hibernation restore.\n");
error = freeze_processes();
- if (error)
+ if (error) {
+ filesystems_thaw();
goto Close_Finish;
+ }
error = freeze_kernel_threads();
if (error) {
thaw_processes();
+ filesystems_thaw();
goto Close_Finish;
}
error = load_image_and_restore();
thaw_processes();
+ filesystems_thaw();
Finish:
pm_notifier_call_chain(PM_POST_RESTORE);
Restore:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6254814d48171..0b0e76324c435 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -962,6 +962,34 @@ power_attr(pm_freeze_timeout);
#endif /* CONFIG_FREEZER*/
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+bool filesystem_freeze_enabled = false;
+
+static ssize_t freeze_filesystems_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", filesystem_freeze_enabled);
+}
+
+static ssize_t freeze_filesystems_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ if (val > 1)
+ return -EINVAL;
+
+ filesystem_freeze_enabled = !!val;
+ return n;
+}
+
+power_attr(freeze_filesystems);
+#endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */
+
static struct attribute * g[] = {
&state_attr.attr,
#ifdef CONFIG_PM_TRACE
@@ -992,6 +1020,9 @@ static struct attribute * g[] = {
#ifdef CONFIG_FREEZER
&pm_freeze_timeout_attr.attr,
#endif
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+ &freeze_filesystems_attr.attr,
+#endif
NULL,
};
diff --git a/kernel/power/power.h b/kernel/power/power.h
index c352dea2f67b5..2eb81662b8fa5 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -18,6 +18,10 @@ struct swsusp_info {
unsigned long size;
} __aligned(PAGE_SIZE);
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+extern bool filesystem_freeze_enabled;
+#endif
+
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
extern void __init hibernate_reserved_size_init(void);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8eaec4ab121d4..76b141b9aac01 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,6 +30,7 @@
#include <trace/events/power.h>
#include <linux/compiler.h>
#include <linux/moduleparam.h>
+#include <linux/fs.h>
#include "power.h"
@@ -374,6 +375,8 @@ static int suspend_prepare(suspend_state_t state)
if (error)
goto Restore;
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
trace_suspend_resume(TPS("freeze_processes"), 0, true);
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
@@ -550,6 +553,7 @@ int suspend_devices_and_enter(suspend_state_t state)
static void suspend_finish(void)
{
suspend_thaw_processes();
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_SUSPEND);
pm_restore_console();
}
@@ -588,6 +592,8 @@ static int enter_state(suspend_state_t state)
ksys_sync_helper();
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
+ if (filesystem_freeze_enabled)
+ filesystems_freeze();
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
@@ -609,6 +615,7 @@ static int enter_state(suspend_state_t state)
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
+ filesystems_thaw();
mutex_unlock(&system_transition_mutex);
return error;
}