diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2025-09-29 10:10:05 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2025-09-29 10:10:05 +0200 |
commit | 17eb8812917b10ce4146b5595f91d8e45bfe68cc (patch) | |
tree | f905aa6c71212f4b460e23ceaec98346b999066a /fs/btrfs/tree-log.c | |
parent | 57610d69f909ea2ea072775c6ad64ba9fad590bb (diff) | |
parent | 23199d2aa6dcaf6dd2da772f93d2c94317d71459 (diff) |
Merge tag 'linux-cpupower-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux
Merge cpupower utility updates for 6.18-rc1 from Shuah Khan:
"Fixes incorrect return vale in cpupower_write_sysfs() error path
and passing incorrect size to cpuidle_state_write_file() while
writing status to disable file in cpuidle_state_disable()."
* tag 'linux-cpupower-6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux: (1125 commits)
tools/cpupower: Fix incorrect size in cpuidle_state_disable()
tools/cpupower: fix error return value in cpupower_write_sysfs()
Linux 6.17-rc6
MAINTAINERS: Input: Drop melfas-mip4 section
USB: core: remove the move buf action
MAINTAINERS: Update the DMA Rust entry
erofs: fix long xattr name prefix placement
Revert "net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups"
hsr: hold rcu and dev lock for hsr_get_port_ndev
hsr: use hsr_for_each_port_rtnl in hsr_port_get_hsr
hsr: use rtnl lock when iterating over ports
wifi: nl80211: completely disable per-link stats for now
net: usb: asix: ax88772: drop phylink use in PM to avoid MDIO runtime PM wakeups
net: ethtool: fix wrong type used in struct kernel_ethtool_ts_info
selftests/bpf: Skip timer cases when bpf_timer is not supported
bpf: Reject bpf_timer for PREEMPT_RT
libceph: fix invalid accesses to ceph_connection_v1_info
PM: hibernate: Restrict GFP mask in hibernation_snapshot()
MAINTAINERS: add Phil as netfilter reviewer
netfilter: nf_tables: restart set lookup on base_seq change
...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 78 |
1 files changed, 53 insertions, 25 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 69e11557fd13..7d5d90845ca9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } +static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans, + struct btrfs_inode *inode) +{ + bool ret = false; + + /* + * Do this only if ->logged_trans is still 0 to prevent races with + * concurrent logging as we may see the inode not logged when + * inode_logged() is called but it gets logged after inode_logged() did + * not find it in the log tree and we end up setting ->logged_trans to a + * value less than trans->transid after the concurrent logging task has + * set it to trans->transid. As a consequence, subsequent rename, unlink + * and link operations may end up not logging new names and removing old + * names from the log. + */ + spin_lock(&inode->lock); + if (inode->logged_trans == 0) + inode->logged_trans = trans->transid - 1; + else if (inode->logged_trans == trans->transid) + ret = true; + spin_unlock(&inode->lock); + + return ret; +} + /* * Check if an inode was logged in the current transaction. This correctly deals * with the case where the inode was logged but has a logged_trans of 0, which @@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans, struct btrfs_key key; int ret; - if (inode->logged_trans == trans->transid) + /* + * Quick lockless call, since once ->logged_trans is set to the current + * transaction, we never set it to a lower value anywhere else. + */ + if (data_race(inode->logged_trans) == trans->transid) return 1; /* - * If logged_trans is not 0, then we know the inode logged was not logged - * in this transaction, so we can return false right away. + * If logged_trans is not 0 and not trans->transid, then we know the + * inode was not logged in this transaction, so we can return false + * right away. We take the lock to avoid a race caused by load/store + * tearing with a concurrent btrfs_log_inode() call or a concurrent task + * in this function further below - an update to trans->transid can be + * teared into two 32 bits updates for example, in which case we could + * see a positive value that is not trans->transid and assume the inode + * was not logged when it was. */ - if (inode->logged_trans > 0) + spin_lock(&inode->lock); + if (inode->logged_trans == trans->transid) { + spin_unlock(&inode->lock); + return 1; + } else if (inode->logged_trans > 0) { + spin_unlock(&inode->lock); return 0; + } + spin_unlock(&inode->lock); /* * If no log tree was created for this root in this transaction, then @@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * transaction's ID, to avoid the search below in a future call in case * a log tree gets created after this. */ - if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) { - inode->logged_trans = trans->transid - 1; - return 0; - } + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) + return mark_inode_as_not_logged(trans, inode); /* * We have a log tree and the inode's logged_trans is 0. We can't tell @@ -3431,8 +3471,7 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * Set logged_trans to a value greater than 0 and less then the * current transaction to avoid doing the search in future calls. */ - inode->logged_trans = trans->transid - 1; - return 0; + return mark_inode_as_not_logged(trans, inode); } /* @@ -3440,20 +3479,9 @@ static int inode_logged(const struct btrfs_trans_handle *trans, * the current transacion's ID, to avoid future tree searches as long as * the inode is not evicted again. */ + spin_lock(&inode->lock); inode->logged_trans = trans->transid; - - /* - * If it's a directory, then we must set last_dir_index_offset to the - * maximum possible value, so that the next attempt to log the inode does - * not skip checking if dir index keys found in modified subvolume tree - * leaves have been logged before, otherwise it would result in attempts - * to insert duplicate dir index keys in the log tree. This must be done - * because last_dir_index_offset is an in-memory only field, not persisted - * in the inode item or any other on-disk structure, so its value is lost - * once the inode is evicted. - */ - if (S_ISDIR(inode->vfs_inode.i_mode)) - inode->last_dir_index_offset = (u64)-1; + spin_unlock(&inode->lock); return 1; } @@ -4045,7 +4073,7 @@ done: /* * If the inode was logged before and it was evicted, then its - * last_dir_index_offset is (u64)-1, so we don't the value of the last index + * last_dir_index_offset is 0, so we don't know the value of the last index * key offset. If that's the case, search for it and update the inode. This * is to avoid lookups in the log tree every time we try to insert a dir index * key from a leaf changed in the current transaction, and to allow us to always @@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode, lockdep_assert_held(&inode->log_mutex); - if (inode->last_dir_index_offset != (u64)-1) + if (inode->last_dir_index_offset != 0) return 0; if (!ctx->logged_before) { |