diff options
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/Kconfig | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_log_recover.h | 22 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 3 | ||||
| -rw-r--r-- | fs/xfs/scrub/scrub.c | 4 | ||||
| -rw-r--r-- | fs/xfs/scrub/stats.c | 5 | ||||
| -rw-r--r-- | fs/xfs/xfs_attr_inactive.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_attr_item.c | 7 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap_item.c | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_export.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_extfree_item.c | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsmap.c | 25 | ||||
| -rw-r--r-- | fs/xfs/xfs_icache.c | 80 | ||||
| -rw-r--r-- | fs/xfs/xfs_icache.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 209 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.h | 34 | ||||
| -rw-r--r-- | fs/xfs/xfs_itable.c | 9 | ||||
| -rw-r--r-- | fs/xfs/xfs_log.c | 17 | ||||
| -rw-r--r-- | fs/xfs/xfs_log_cil.c | 52 | ||||
| -rw-r--r-- | fs/xfs/xfs_log_priv.h | 14 | ||||
| -rw-r--r-- | fs/xfs/xfs_log_recover.c | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.h | 17 | ||||
| -rw-r--r-- | fs/xfs/xfs_qm.c | 7 | ||||
| -rw-r--r-- | fs/xfs/xfs_refcount_item.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_rmap_item.c | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_super.c | 86 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 45 | ||||
| -rw-r--r-- | fs/xfs/xfs_xattr.c | 11 | 
27 files changed, 441 insertions, 240 deletions
| diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index c9d653168ad0..ed0bc8cbc703 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS  	bool "XFS online metadata check usage data collection"  	default y  	depends on XFS_ONLINE_SCRUB -	select FS_DEBUG +	select XFS_DEBUG  	help  	  If you say Y here, the kernel will gather usage data about  	  the online metadata check subsystem.  This includes the number diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 2420865f3007..a5100a11faf9 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct xlog *log);  #define xlog_check_buf_cancel_table(log) do { } while (0)  #endif +/* + * Transform a regular reservation into one suitable for recovery of a log + * intent item. + * + * Intent recovery only runs a single step of the transaction chain and defers + * the rest to a separate transaction.  Therefore, we reduce logcount to 1 here + * to avoid livelocks if the log grant space is nearly exhausted due to the + * recovered intent pinning the tail.  Keep the same logflags to avoid tripping + * asserts elsewhere.  Struct copies abound below. + */ +static inline struct xfs_trans_res +xlog_recover_resv(const struct xfs_trans_res *r) +{ +	struct xfs_trans_res ret = { +		.tr_logres	= r->tr_logres, +		.tr_logcount	= 1, +		.tr_logflags	= r->tr_logflags, +	}; + +	return ret; +} +  #endif	/* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5e174685a77c..6264daaab37b 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -266,7 +266,8 @@ xfs_validate_sb_write(  		return -EFSCORRUPTED;  	} -	if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { +	if (!xfs_is_readonly(mp) && +	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {  		xfs_alert(mp,  "Corruption detected in superblock read-only compatible features (0x%x)!",  			(sbp->sb_features_ro_compat & diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 7d3aa14d81b5..4849efcaa33a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -588,6 +588,8 @@ out_nofix:  out_teardown:  	error = xchk_teardown(sc, error);  out_sc: +	if (error != -ENOENT) +		xchk_stats_merge(mp, sm, &run);  	kfree(sc);  out:  	trace_xchk_done(XFS_I(file_inode(file)), sm, error); @@ -595,8 +597,6 @@ out:  		sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;  		error = 0;  	} -	if (error != -ENOENT) -		xchk_stats_merge(mp, sm, &run);  	return error;  need_drain:  	error = xchk_teardown(sc, 0); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index aeb92624176b..cd91db4a5548 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -185,7 +185,10 @@ xchk_stats_merge_one(  {  	struct xchk_scrub_stats		*css; -	ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); +	if (sm->sm_type >= XFS_SCRUB_TYPE_NR) { +		ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR); +		return; +	}  	css = &cs->cs_stats[sm->sm_type];  	spin_lock(&css->css_lock); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 5db87b34fb6e..89c7a9f4f930 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -333,7 +333,6 @@ xfs_attr_inactive(  	int			error = 0;  	mp = dp->i_mount; -	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));  	xfs_ilock(dp, lock_mode);  	if (!xfs_inode_has_attr_fork(dp)) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 2788a6f2edcd..36fe2abb16e6 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -547,7 +547,7 @@ xfs_attri_item_recover(  	struct xfs_inode		*ip;  	struct xfs_da_args		*args;  	struct xfs_trans		*tp; -	struct xfs_trans_res		tres; +	struct xfs_trans_res		resv;  	struct xfs_attri_log_format	*attrp;  	struct xfs_attri_log_nameval	*nv = attrip->attri_nameval;  	int				error; @@ -618,8 +618,9 @@ xfs_attri_item_recover(  		goto out;  	} -	xfs_init_attr_trans(args, &tres, &total); -	error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp); +	xfs_init_attr_trans(args, &resv, &total); +	resv = xlog_recover_resv(&resv); +	error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp);  	if (error)  		goto out; diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 7551c3ec4ea5..e736a0844c89 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -490,6 +490,7 @@ xfs_bui_item_recover(  	struct list_head		*capture_list)  {  	struct xfs_bmap_intent		fake = { }; +	struct xfs_trans_res		resv;  	struct xfs_bui_log_item		*buip = BUI_ITEM(lip);  	struct xfs_trans		*tp;  	struct xfs_inode		*ip = NULL; @@ -515,7 +516,8 @@ xfs_bui_item_recover(  		return error;  	/* Allocate transaction and do the work. */ -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, +	resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); +	error = xfs_trans_alloc(mp, &resv,  			XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);  	if (error)  		goto err_rele; diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1064c2342876..f71ea786a6d2 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -146,6 +146,12 @@ xfs_nfs_get_inode(  		return ERR_PTR(error);  	} +	error = xfs_inode_reload_unlinked(ip); +	if (error) { +		xfs_irele(ip); +		return ERR_PTR(error); +	} +  	if (VFS_I(ip)->i_generation != generation) {  		xfs_irele(ip);  		return ERR_PTR(-ESTALE); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f1a5ecf099aa..3fa8789820ad 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -660,6 +660,7 @@ xfs_efi_item_recover(  	struct xfs_log_item		*lip,  	struct list_head		*capture_list)  { +	struct xfs_trans_res		resv;  	struct xfs_efi_log_item		*efip = EFI_ITEM(lip);  	struct xfs_mount		*mp = lip->li_log->l_mp;  	struct xfs_efd_log_item		*efdp; @@ -683,7 +684,8 @@ xfs_efi_item_recover(  		}  	} -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); +	resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); +	error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp);  	if (error)  		return error;  	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 10403ba9b58f..736e5545f584 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -565,6 +565,19 @@ err:  }  #endif /* CONFIG_XFS_RT */ +static inline bool +rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) +{ +	if (!xfs_has_reflink(mp)) +		return true; +	if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) +		return true; +	if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | +			   XFS_RMAP_UNWRITTEN)) +		return true; +	return false; +} +  /* Execute a getfsmap query against the regular data device. */  STATIC int  __xfs_getfsmap_datadev( @@ -598,7 +611,6 @@ __xfs_getfsmap_datadev(  	 * low to the fsmap low key and max out the high key to the end  	 * of the AG.  	 */ -	info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);  	info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);  	error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);  	if (error) @@ -608,12 +620,9 @@ __xfs_getfsmap_datadev(  	/* Adjust the low key if we are continuing from where we left off. */  	if (info->low.rm_blockcount == 0) { -		/* empty */ -	} else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || -		   (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | -					  XFS_RMAP_BMBT_BLOCK | -					  XFS_RMAP_UNWRITTEN))) { -		info->low.rm_startblock += info->low.rm_blockcount; +		/* No previous record from which to continue */ +	} else if (rmap_not_shareable(mp, &info->low)) { +		/* Last record seen was an unshareable extent */  		info->low.rm_owner = 0;  		info->low.rm_offset = 0; @@ -621,8 +630,10 @@ __xfs_getfsmap_datadev(  		if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)  			return 0;  	} else { +		/* Last record seen was a shareable file data extent */  		info->low.rm_offset += info->low.rm_blockcount;  	} +	info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);  	info->high.rm_startblock = -1U;  	info->high.rm_owner = ULLONG_MAX; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index e541f5c0bc25..3c210ac83713 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -113,7 +113,7 @@ xfs_inode_alloc(  	INIT_LIST_HEAD(&ip->i_ioend_list);  	spin_lock_init(&ip->i_ioend_lock);  	ip->i_next_unlinked = NULLAGINO; -	ip->i_prev_unlinked = NULLAGINO; +	ip->i_prev_unlinked = 0;  	return ip;  } @@ -443,7 +443,7 @@ xfs_inodegc_queue_all(  	int			cpu;  	bool			ret = false; -	for_each_online_cpu(cpu) { +	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {  		gc = per_cpu_ptr(mp->m_inodegc, cpu);  		if (!llist_empty(&gc->list)) {  			mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); @@ -463,7 +463,7 @@ xfs_inodegc_wait_all(  	int			error = 0;  	flush_workqueue(mp->m_inodegc_wq); -	for_each_online_cpu(cpu) { +	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {  		struct xfs_inodegc	*gc;  		gc = per_cpu_ptr(mp->m_inodegc, cpu); @@ -1845,9 +1845,17 @@ xfs_inodegc_worker(  						struct xfs_inodegc, work);  	struct llist_node	*node = llist_del_all(&gc->list);  	struct xfs_inode	*ip, *n; +	struct xfs_mount	*mp = gc->mp;  	unsigned int		nofs_flag; -	ASSERT(gc->cpu == smp_processor_id()); +	/* +	 * Clear the cpu mask bit and ensure that we have seen the latest +	 * update of the gc structure associated with this CPU. This matches +	 * with the release semantics used when setting the cpumask bit in +	 * xfs_inodegc_queue. +	 */ +	cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask); +	smp_mb__after_atomic();  	WRITE_ONCE(gc->items, 0); @@ -1862,7 +1870,7 @@ xfs_inodegc_worker(  	nofs_flag = memalloc_nofs_save();  	ip = llist_entry(node, struct xfs_inode, i_gclist); -	trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); +	trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits));  	WRITE_ONCE(gc->shrinker_hits, 0);  	llist_for_each_entry_safe(ip, n, node, i_gclist) { @@ -2057,6 +2065,7 @@ xfs_inodegc_queue(  	struct xfs_inodegc	*gc;  	int			items;  	unsigned int		shrinker_hits; +	unsigned int		cpu_nr;  	unsigned long		queue_delay = 1;  	trace_xfs_inode_set_need_inactive(ip); @@ -2064,18 +2073,28 @@ xfs_inodegc_queue(  	ip->i_flags |= XFS_NEED_INACTIVE;  	spin_unlock(&ip->i_flags_lock); -	gc = get_cpu_ptr(mp->m_inodegc); +	cpu_nr = get_cpu(); +	gc = this_cpu_ptr(mp->m_inodegc);  	llist_add(&ip->i_gclist, &gc->list);  	items = READ_ONCE(gc->items);  	WRITE_ONCE(gc->items, items + 1);  	shrinker_hits = READ_ONCE(gc->shrinker_hits);  	/* +	 * Ensure the list add is always seen by anyone who finds the cpumask +	 * bit set. This effectively gives the cpumask bit set operation +	 * release ordering semantics. +	 */ +	smp_mb__before_atomic(); +	if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask)) +		cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask); + +	/*  	 * We queue the work while holding the current CPU so that the work  	 * is scheduled to run on this CPU.  	 */  	if (!xfs_is_inodegc_enabled(mp)) { -		put_cpu_ptr(gc); +		put_cpu();  		return;  	} @@ -2085,7 +2104,7 @@ xfs_inodegc_queue(  	trace_xfs_inodegc_queue(mp, __return_address);  	mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,  			queue_delay); -	put_cpu_ptr(gc); +	put_cpu();  	if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {  		trace_xfs_inodegc_throttle(mp, __return_address); @@ -2094,47 +2113,6 @@ xfs_inodegc_queue(  }  /* - * Fold the dead CPU inodegc queue into the current CPUs queue. - */ -void -xfs_inodegc_cpu_dead( -	struct xfs_mount	*mp, -	unsigned int		dead_cpu) -{ -	struct xfs_inodegc	*dead_gc, *gc; -	struct llist_node	*first, *last; -	unsigned int		count = 0; - -	dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu); -	cancel_delayed_work_sync(&dead_gc->work); - -	if (llist_empty(&dead_gc->list)) -		return; - -	first = dead_gc->list.first; -	last = first; -	while (last->next) { -		last = last->next; -		count++; -	} -	dead_gc->list.first = NULL; -	dead_gc->items = 0; - -	/* Add pending work to current CPU */ -	gc = get_cpu_ptr(mp->m_inodegc); -	llist_add_batch(first, last, &gc->list); -	count += READ_ONCE(gc->items); -	WRITE_ONCE(gc->items, count); - -	if (xfs_is_inodegc_enabled(mp)) { -		trace_xfs_inodegc_queue(mp, __return_address); -		mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work, -				0); -	} -	put_cpu_ptr(gc); -} - -/*   * We set the inode flag atomically with the radix tree tag.  Once we get tag   * lookups on the radix tree, this inode flag can go away.   * @@ -2195,7 +2173,7 @@ xfs_inodegc_shrinker_count(  	if (!xfs_is_inodegc_enabled(mp))  		return 0; -	for_each_online_cpu(cpu) { +	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {  		gc = per_cpu_ptr(mp->m_inodegc, cpu);  		if (!llist_empty(&gc->list))  			return XFS_INODEGC_SHRINKER_COUNT; @@ -2220,7 +2198,7 @@ xfs_inodegc_shrinker_scan(  	trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address); -	for_each_online_cpu(cpu) { +	for_each_cpu(cpu, &mp->m_inodegc_cpumask) {  		gc = per_cpu_ptr(mp->m_inodegc, cpu);  		if (!llist_empty(&gc->list)) {  			unsigned int	h = READ_ONCE(gc->shrinker_hits); diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 2fa6f2e09d07..905944dafbe5 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -79,7 +79,6 @@ void xfs_inodegc_push(struct xfs_mount *mp);  int xfs_inodegc_flush(struct xfs_mount *mp);  void xfs_inodegc_stop(struct xfs_mount *mp);  void xfs_inodegc_start(struct xfs_mount *mp); -void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);  int xfs_inodegc_register_shrinker(struct xfs_mount *mp);  #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 360fe83a334f..f94f7b374041 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1642,8 +1642,11 @@ xfs_inode_needs_inactive(  	if (VFS_I(ip)->i_mode == 0)  		return false; -	/* If this is a read-only mount, don't do this (would generate I/O) */ -	if (xfs_is_readonly(mp)) +	/* +	 * If this is a read-only mount, don't do this (would generate I/O) +	 * unless we're in log recovery and cleaning the iunlinked list. +	 */ +	if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log))  		return false;  	/* If the log isn't running, push inodes straight to reclaim. */ @@ -1703,8 +1706,11 @@ xfs_inactive(  	mp = ip->i_mount;  	ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); -	/* If this is a read-only mount, don't do this (would generate I/O) */ -	if (xfs_is_readonly(mp)) +	/* +	 * If this is a read-only mount, don't do this (would generate I/O) +	 * unless we're in log recovery and cleaning the iunlinked list. +	 */ +	if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log))  		goto out;  	/* Metadata inodes require explicit resource cleanup. */ @@ -1736,9 +1742,13 @@ xfs_inactive(  	     ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))  		truncate = 1; -	error = xfs_qm_dqattach(ip); -	if (error) -		goto out; +	if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) { +		xfs_qm_dqdetach(ip); +	} else { +		error = xfs_qm_dqattach(ip); +		if (error) +			goto out; +	}  	if (S_ISLNK(VFS_I(ip)->i_mode))  		error = xfs_inactive_symlink(ip); @@ -1822,12 +1832,17 @@ xfs_iunlink_lookup(  	rcu_read_lock();  	ip = radix_tree_lookup(&pag->pag_ici_root, agino); +	if (!ip) { +		/* Caller can handle inode not being in memory. */ +		rcu_read_unlock(); +		return NULL; +	}  	/* -	 * Inode not in memory or in RCU freeing limbo should not happen. -	 * Warn about this and let the caller handle the failure. +	 * Inode in RCU freeing limbo should not happen.  Warn about this and +	 * let the caller handle the failure.  	 */ -	if (WARN_ON_ONCE(!ip || !ip->i_ino)) { +	if (WARN_ON_ONCE(!ip->i_ino)) {  		rcu_read_unlock();  		return NULL;  	} @@ -1836,7 +1851,10 @@ xfs_iunlink_lookup(  	return ip;  } -/* Update the prev pointer of the next agino. */ +/* + * Update the prev pointer of the next agino.  Returns -ENOLINK if the inode + * is not in cache. + */  static int  xfs_iunlink_update_backref(  	struct xfs_perag	*pag, @@ -1851,7 +1869,8 @@ xfs_iunlink_update_backref(  	ip = xfs_iunlink_lookup(pag, next_agino);  	if (!ip) -		return -EFSCORRUPTED; +		return -ENOLINK; +  	ip->i_prev_unlinked = prev_agino;  	return 0;  } @@ -1895,6 +1914,64 @@ xfs_iunlink_update_bucket(  	return 0;  } +/* + * Load the inode @next_agino into the cache and set its prev_unlinked pointer + * to @prev_agino.  Caller must hold the AGI to synchronize with other changes + * to the unlinked list. + */ +STATIC int +xfs_iunlink_reload_next( +	struct xfs_trans	*tp, +	struct xfs_buf		*agibp, +	xfs_agino_t		prev_agino, +	xfs_agino_t		next_agino) +{ +	struct xfs_perag	*pag = agibp->b_pag; +	struct xfs_mount	*mp = pag->pag_mount; +	struct xfs_inode	*next_ip = NULL; +	xfs_ino_t		ino; +	int			error; + +	ASSERT(next_agino != NULLAGINO); + +#ifdef DEBUG +	rcu_read_lock(); +	next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino); +	ASSERT(next_ip == NULL); +	rcu_read_unlock(); +#endif + +	xfs_info_ratelimited(mp, + "Found unrecovered unlinked inode 0x%x in AG 0x%x.  Initiating recovery.", +			next_agino, pag->pag_agno); + +	/* +	 * Use an untrusted lookup just to be cautious in case the AGI has been +	 * corrupted and now points at a free inode.  That shouldn't happen, +	 * but we'd rather shut down now since we're already running in a weird +	 * situation. +	 */ +	ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino); +	error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip); +	if (error) +		return error; + +	/* If this is not an unlinked inode, something is very wrong. */ +	if (VFS_I(next_ip)->i_nlink != 0) { +		error = -EFSCORRUPTED; +		goto rele; +	} + +	next_ip->i_prev_unlinked = prev_agino; +	trace_xfs_iunlink_reload_next(next_ip); +rele: +	ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE)); +	if (xfs_is_quotacheck_running(mp) && next_ip) +		xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED); +	xfs_irele(next_ip); +	return error; +} +  static int  xfs_iunlink_insert_inode(  	struct xfs_trans	*tp, @@ -1926,6 +2003,8 @@ xfs_iunlink_insert_inode(  	 * inode.  	 */  	error = xfs_iunlink_update_backref(pag, agino, next_agino); +	if (error == -ENOLINK) +		error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);  	if (error)  		return error; @@ -1941,6 +2020,7 @@ xfs_iunlink_insert_inode(  	}  	/* Point the head of the list to point to this inode. */ +	ip->i_prev_unlinked = NULLAGINO;  	return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);  } @@ -2020,6 +2100,9 @@ xfs_iunlink_remove_inode(  	 */  	error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,  			ip->i_next_unlinked); +	if (error == -ENOLINK) +		error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked, +				ip->i_next_unlinked);  	if (error)  		return error; @@ -2040,7 +2123,7 @@ xfs_iunlink_remove_inode(  	}  	ip->i_next_unlinked = NULLAGINO; -	ip->i_prev_unlinked = NULLAGINO; +	ip->i_prev_unlinked = 0;  	return error;  } @@ -3529,3 +3612,103 @@ xfs_iunlock2_io_mmap(  	if (ip1 != ip2)  		inode_unlock(VFS_I(ip1));  } + +/* + * Reload the incore inode list for this inode.  Caller should ensure that + * the link count cannot change, either by taking ILOCK_SHARED or otherwise + * preventing other threads from executing. + */ +int +xfs_inode_reload_unlinked_bucket( +	struct xfs_trans	*tp, +	struct xfs_inode	*ip) +{ +	struct xfs_mount	*mp = tp->t_mountp; +	struct xfs_buf		*agibp; +	struct xfs_agi		*agi; +	struct xfs_perag	*pag; +	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino); +	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino); +	xfs_agino_t		prev_agino, next_agino; +	unsigned int		bucket; +	bool			foundit = false; +	int			error; + +	/* Grab the first inode in the list */ +	pag = xfs_perag_get(mp, agno); +	error = xfs_ialloc_read_agi(pag, tp, &agibp); +	xfs_perag_put(pag); +	if (error) +		return error; + +	bucket = agino % XFS_AGI_UNLINKED_BUCKETS; +	agi = agibp->b_addr; + +	trace_xfs_inode_reload_unlinked_bucket(ip); + +	xfs_info_ratelimited(mp, + "Found unrecovered unlinked inode 0x%x in AG 0x%x.  Initiating list recovery.", +			agino, agno); + +	prev_agino = NULLAGINO; +	next_agino = be32_to_cpu(agi->agi_unlinked[bucket]); +	while (next_agino != NULLAGINO) { +		struct xfs_inode	*next_ip = NULL; + +		if (next_agino == agino) { +			/* Found this inode, set its backlink. */ +			next_ip = ip; +			next_ip->i_prev_unlinked = prev_agino; +			foundit = true; +		} +		if (!next_ip) { +			/* Inode already in memory. */ +			next_ip = xfs_iunlink_lookup(pag, next_agino); +		} +		if (!next_ip) { +			/* Inode not in memory, reload. */ +			error = xfs_iunlink_reload_next(tp, agibp, prev_agino, +					next_agino); +			if (error) +				break; + +			next_ip = xfs_iunlink_lookup(pag, next_agino); +		} +		if (!next_ip) { +			/* No incore inode at all?  We reloaded it... */ +			ASSERT(next_ip != NULL); +			error = -EFSCORRUPTED; +			break; +		} + +		prev_agino = next_agino; +		next_agino = next_ip->i_next_unlinked; +	} + +	xfs_trans_brelse(tp, agibp); +	/* Should have found this inode somewhere in the iunlinked bucket. */ +	if (!error && !foundit) +		error = -EFSCORRUPTED; +	return error; +} + +/* Decide if this inode is missing its unlinked list and reload it. */ +int +xfs_inode_reload_unlinked( +	struct xfs_inode	*ip) +{ +	struct xfs_trans	*tp; +	int			error; + +	error = xfs_trans_alloc_empty(ip->i_mount, &tp); +	if (error) +		return error; + +	xfs_ilock(ip, XFS_ILOCK_SHARED); +	if (xfs_inode_unlinked_incomplete(ip)) +		error = xfs_inode_reload_unlinked_bucket(tp, ip); +	xfs_iunlock(ip, XFS_ILOCK_SHARED); +	xfs_trans_cancel(tp); + +	return error; +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7547caf2f2ab..0c5bdb91152e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -68,8 +68,21 @@ typedef struct xfs_inode {  	uint64_t		i_diflags2;	/* XFS_DIFLAG2_... */  	struct timespec64	i_crtime;	/* time created */ -	/* unlinked list pointers */ +	/* +	 * Unlinked list pointers.  These point to the next and previous inodes +	 * in the AGI unlinked bucket list, respectively.  These fields can +	 * only be updated with the AGI locked. +	 * +	 * i_next_unlinked caches di_next_unlinked. +	 */  	xfs_agino_t		i_next_unlinked; + +	/* +	 * If the inode is not on an unlinked list, this field is zero.  If the +	 * inode is the first element in an unlinked list, this field is +	 * NULLAGINO.  Otherwise, i_prev_unlinked points to the previous inode +	 * in the unlinked list. +	 */  	xfs_agino_t		i_prev_unlinked;  	/* VFS inode */ @@ -81,6 +94,11 @@ typedef struct xfs_inode {  	struct list_head	i_ioend_list;  } xfs_inode_t; +static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip) +{ +	return ip->i_prev_unlinked != 0; +} +  static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip)  {  	return ip->i_forkoff > 0; @@ -326,6 +344,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)   */  #define XFS_INACTIVATING	(1 << 13) +/* Quotacheck is running but inode has not been added to quota counts. */ +#define XFS_IQUOTAUNCHECKED	(1 << 14) +  /* All inode state flags related to inode reclaim. */  #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \  				 XFS_IRECLAIM | \ @@ -340,7 +361,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)  #define XFS_IRECLAIM_RESET_FLAGS	\  	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \  	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ -	 XFS_INACTIVATING) +	 XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)  /*   * Flags for inode locking. @@ -575,4 +596,13 @@ void xfs_end_io(struct work_struct *work);  int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);  void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); +static inline bool +xfs_inode_unlinked_incomplete( +	struct xfs_inode	*ip) +{ +	return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); +} +int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); +int xfs_inode_reload_unlinked(struct xfs_inode *ip); +  #endif	/* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c2093cb56092..ccf0c4ff4490 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -80,6 +80,15 @@ xfs_bulkstat_one_int(  	if (error)  		goto out; +	if (xfs_inode_unlinked_incomplete(ip)) { +		error = xfs_inode_reload_unlinked_bucket(tp, ip); +		if (error) { +			xfs_iunlock(ip, XFS_ILOCK_SHARED); +			xfs_irele(ip); +			return error; +		} +	} +  	ASSERT(ip != NULL);  	ASSERT(ip->i_imap.im_blkno != 0);  	inode = VFS_I(ip); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 79004d193e54..51c100c86177 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -715,15 +715,7 @@ xfs_log_mount(  	 * just worked.  	 */  	if (!xfs_has_norecovery(mp)) { -		/* -		 * log recovery ignores readonly state and so we need to clear -		 * mount-based read only state so it can write to disk. -		 */ -		bool	readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, -						&mp->m_opstate);  		error = xlog_recover(log); -		if (readonly) -			set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);  		if (error) {  			xfs_warn(mp, "log mount/recovery failed: error %d",  				error); @@ -772,7 +764,6 @@ xfs_log_mount_finish(  	struct xfs_mount	*mp)  {  	struct xlog		*log = mp->m_log; -	bool			readonly;  	int			error = 0;  	if (xfs_has_norecovery(mp)) { @@ -781,12 +772,6 @@ xfs_log_mount_finish(  	}  	/* -	 * log recovery ignores readonly state and so we need to clear -	 * mount-based read only state so it can write to disk. -	 */ -	readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - -	/*  	 * During the second phase of log recovery, we need iget and  	 * iput to behave like they do for an active filesystem.  	 * xfs_fs_drop_inode needs to be able to prevent the deletion @@ -835,8 +820,6 @@ xfs_log_mount_finish(  	xfs_buftarg_drain(mp->m_ddev_targp);  	clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); -	if (readonly) -		set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate);  	/* Make sure the log is dead if we're returning failure. */  	ASSERT(!error || xlog_is_shutdown(log)); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index eccbfb99e894..ebc70aaa299c 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -124,7 +124,7 @@ xlog_cil_push_pcp_aggregate(  	struct xlog_cil_pcp	*cilpcp;  	int			cpu; -	for_each_online_cpu(cpu) { +	for_each_cpu(cpu, &ctx->cil_pcpmask) {  		cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);  		ctx->ticket->t_curr_res += cilpcp->space_reserved; @@ -165,7 +165,13 @@ xlog_cil_insert_pcp_aggregate(  	if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags))  		return; -	for_each_online_cpu(cpu) { +	/* +	 * We can race with other cpus setting cil_pcpmask.  However, we've +	 * atomically cleared PCP_SPACE which forces other threads to add to +	 * the global space used count.  cil_pcpmask is a superset of cilpcp +	 * structures that could have a nonzero space_used. +	 */ +	for_each_cpu(cpu, &ctx->cil_pcpmask) {  		int	old, prev;  		cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); @@ -554,6 +560,7 @@ xlog_cil_insert_items(  	int			iovhdr_res = 0, split_res = 0, ctx_res = 0;  	int			space_used;  	int			order; +	unsigned int		cpu_nr;  	struct xlog_cil_pcp	*cilpcp;  	ASSERT(tp); @@ -577,7 +584,12 @@ xlog_cil_insert_items(  	 * can't be scheduled away between split sample/update operations that  	 * are done without outside locking to serialise them.  	 */ -	cilpcp = get_cpu_ptr(cil->xc_pcp); +	cpu_nr = get_cpu(); +	cilpcp = this_cpu_ptr(cil->xc_pcp); + +	/* Tell the future push that there was work added by this CPU. */ +	if (!cpumask_test_cpu(cpu_nr, &ctx->cil_pcpmask)) +		cpumask_test_and_set_cpu(cpu_nr, &ctx->cil_pcpmask);  	/*  	 * We need to take the CIL checkpoint unit reservation on the first @@ -663,7 +675,7 @@ xlog_cil_insert_items(  			continue;  		list_add_tail(&lip->li_cil, &cilpcp->log_items);  	} -	put_cpu_ptr(cilpcp); +	put_cpu();  	/*  	 * If we've overrun the reservation, dump the tx details before we move @@ -1791,38 +1803,6 @@ out_shutdown:  }  /* - * Move dead percpu state to the relevant CIL context structures. - * - * We have to lock the CIL context here to ensure that nothing is modifying - * the percpu state, either addition or removal. Both of these are done under - * the CIL context lock, so grabbing that exclusively here will ensure we can - * safely drain the cilpcp for the CPU that is dying. - */ -void -xlog_cil_pcp_dead( -	struct xlog		*log, -	unsigned int		cpu) -{ -	struct xfs_cil		*cil = log->l_cilp; -	struct xlog_cil_pcp	*cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); -	struct xfs_cil_ctx	*ctx; - -	down_write(&cil->xc_ctx_lock); -	ctx = cil->xc_ctx; -	if (ctx->ticket) -		ctx->ticket->t_curr_res += cilpcp->space_reserved; -	cilpcp->space_reserved = 0; - -	if (!list_empty(&cilpcp->log_items)) -		list_splice_init(&cilpcp->log_items, &ctx->log_items); -	if (!list_empty(&cilpcp->busy_extents)) -		list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents); -	atomic_add(cilpcp->space_used, &ctx->space_used); -	cilpcp->space_used = 0; -	up_write(&cil->xc_ctx_lock); -} - -/*   * Perform initial CIL structure initialisation.   */  int diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 1bd2963e8fbd..af87648331d5 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -231,6 +231,12 @@ struct xfs_cil_ctx {  	struct work_struct	discard_endio_work;  	struct work_struct	push_work;  	atomic_t		order_id; + +	/* +	 * CPUs that could have added items to the percpu CIL data.  Access is +	 * coordinated with xc_ctx_lock. +	 */ +	struct cpumask		cil_pcpmask;  };  /* @@ -278,9 +284,6 @@ struct xfs_cil {  	wait_queue_head_t	xc_push_wait;	/* background push throttle */  	void __percpu		*xc_pcp;	/* percpu CIL structures */ -#ifdef CONFIG_HOTPLUG_CPU -	struct list_head	xc_pcp_list; -#endif  } ____cacheline_aligned_in_smp;  /* xc_flags bit values */ @@ -705,9 +708,4 @@ xlog_kvmalloc(  	return p;  } -/* - * CIL CPU dead notifier - */ -void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu); -  #endif	/* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82c81d20459d..13b94d2e605b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -329,7 +329,7 @@ xlog_find_verify_cycle(  	 * try a smaller size.  We need to be able to read at least  	 * a log sector, or we're out of luck.  	 */ -	bufblks = 1 << ffs(nbblks); +	bufblks = roundup_pow_of_two(nbblks);  	while (bufblks > log->l_logBBsize)  		bufblks >>= 1;  	while (!(buffer = xlog_alloc_buffer(log, bufblks))) { @@ -1528,7 +1528,7 @@ xlog_write_log_records(  	 * a smaller size.  We need to be able to write at least a  	 * log sector, or we're out of luck.  	 */ -	bufblks = 1 << ffs(blocks); +	bufblks = roundup_pow_of_two(blocks);  	while (bufblks > log->l_logBBsize)  		bufblks >>= 1;  	while (!(buffer = xlog_alloc_buffer(log, bufblks))) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a25eece3be2b..d19cca099bc3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -60,6 +60,7 @@ struct xfs_error_cfg {   * Per-cpu deferred inode inactivation GC lists.   */  struct xfs_inodegc { +	struct xfs_mount	*mp;  	struct llist_head	list;  	struct delayed_work	work;  	int			error; @@ -67,9 +68,7 @@ struct xfs_inodegc {  	/* approximate count of inodes in the list */  	unsigned int		items;  	unsigned int		shrinker_hits; -#if defined(DEBUG) || defined(XFS_WARN)  	unsigned int		cpu; -#endif  };  /* @@ -98,7 +97,6 @@ typedef struct xfs_mount {  	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */  	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */  	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */ -	struct list_head	m_mount_list;	/* global mount list */  	void __percpu		*m_inodegc;	/* percpu inodegc structures */  	/* @@ -249,6 +247,9 @@ typedef struct xfs_mount {  	unsigned int		*m_errortag;  	struct xfs_kobj		m_errortag_kobj;  #endif + +	/* cpus that have inodes queued for inactivation */ +	struct cpumask		m_inodegc_cpumask;  } xfs_mount_t;  #define M_IGEO(mp)		(&(mp)->m_ino_geo) @@ -404,6 +405,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID)  #define XFS_OPSTATE_WARNED_SHRINK	8  /* Kernel has logged a warning about logged xattr updates being used. */  #define XFS_OPSTATE_WARNED_LARP		9 +/* Mount time quotacheck is running */ +#define XFS_OPSTATE_QUOTACHECK_RUNNING	10  #define __XFS_IS_OPSTATE(name, NAME) \  static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ @@ -426,6 +429,11 @@ __XFS_IS_OPSTATE(inode32, INODE32)  __XFS_IS_OPSTATE(readonly, READONLY)  __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)  __XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED) +#ifdef CONFIG_XFS_QUOTA +__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING) +#else +# define xfs_is_quotacheck_running(mp)	(false) +#endif  static inline bool  xfs_should_warn(struct xfs_mount *mp, long nr) @@ -443,7 +451,8 @@ xfs_should_warn(struct xfs_mount *mp, long nr)  	{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED),		"blockgc" }, \  	{ (1UL << XFS_OPSTATE_WARNED_SCRUB),		"wscrub" }, \  	{ (1UL << XFS_OPSTATE_WARNED_SHRINK),		"wshrink" }, \ -	{ (1UL << XFS_OPSTATE_WARNED_LARP),		"wlarp" } +	{ (1UL << XFS_OPSTATE_WARNED_LARP),		"wlarp" }, \ +	{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING),	"quotacheck" }  /*   * Max and min values for mount-option defined I/O diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6abcc34fafd8..7256090c3895 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1160,6 +1160,10 @@ xfs_qm_dqusage_adjust(  	if (error)  		return error; +	error = xfs_inode_reload_unlinked(ip); +	if (error) +		goto error0; +  	ASSERT(ip->i_delayed_blks == 0);  	if (XFS_IS_REALTIME_INODE(ip)) { @@ -1173,6 +1177,7 @@ xfs_qm_dqusage_adjust(  	}  	nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks; +	xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);  	/*  	 * Add the (disk blocks and inode) resources occupied by this @@ -1319,8 +1324,10 @@ xfs_qm_quotacheck(  		flags |= XFS_PQUOTA_CHKD;  	} +	xfs_set_quotacheck_running(mp);  	error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,  			NULL); +	xfs_clear_quotacheck_running(mp);  	/*  	 * On error, the inode walk may have partially populated the dquot diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index edd8587658d5..2d4444d61e98 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -477,6 +477,7 @@ xfs_cui_item_recover(  	struct xfs_log_item		*lip,  	struct list_head		*capture_list)  { +	struct xfs_trans_res		resv;  	struct xfs_cui_log_item		*cuip = CUI_ITEM(lip);  	struct xfs_cud_log_item		*cudp;  	struct xfs_trans		*tp; @@ -514,8 +515,9 @@ xfs_cui_item_recover(  	 * doesn't fit.  We need to reserve enough blocks to handle a  	 * full btree split on either end of the refcount range.  	 */ -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, -			mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); +	resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); +	error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, +			XFS_TRANS_RESERVE, &tp);  	if (error)  		return error; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 520c7ebdfed8..0e0e747028da 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -507,6 +507,7 @@ xfs_rui_item_recover(  	struct xfs_log_item		*lip,  	struct list_head		*capture_list)  { +	struct xfs_trans_res		resv;  	struct xfs_rui_log_item		*ruip = RUI_ITEM(lip);  	struct xfs_rud_log_item		*rudp;  	struct xfs_trans		*tp; @@ -530,8 +531,9 @@ xfs_rui_item_recover(  		}  	} -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, -			mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); +	resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); +	error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0, +			XFS_TRANS_RESERVE, &tp);  	if (error)  		return error;  	rudp = xfs_trans_get_rud(tp, ruip); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b5c202f5d96c..819a3568b28f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -56,28 +56,6 @@ static struct kset *xfs_kset;		/* top-level xfs sysfs dir */  static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */  #endif -#ifdef CONFIG_HOTPLUG_CPU -static LIST_HEAD(xfs_mount_list); -static DEFINE_SPINLOCK(xfs_mount_list_lock); - -static inline void xfs_mount_list_add(struct xfs_mount *mp) -{ -	spin_lock(&xfs_mount_list_lock); -	list_add(&mp->m_mount_list, &xfs_mount_list); -	spin_unlock(&xfs_mount_list_lock); -} - -static inline void xfs_mount_list_del(struct xfs_mount *mp) -{ -	spin_lock(&xfs_mount_list_lock); -	list_del(&mp->m_mount_list); -	spin_unlock(&xfs_mount_list_lock); -} -#else /* !CONFIG_HOTPLUG_CPU */ -static inline void xfs_mount_list_add(struct xfs_mount *mp) {} -static inline void xfs_mount_list_del(struct xfs_mount *mp) {} -#endif -  enum xfs_dax_mode {  	XFS_DAX_INODE = 0,  	XFS_DAX_ALWAYS = 1, @@ -1135,9 +1113,8 @@ xfs_inodegc_init_percpu(  	for_each_possible_cpu(cpu) {  		gc = per_cpu_ptr(mp->m_inodegc, cpu); -#if defined(DEBUG) || defined(XFS_WARN)  		gc->cpu = cpu; -#endif +		gc->mp = mp;  		init_llist_head(&gc->list);  		gc->items = 0;  		gc->error = 0; @@ -1168,7 +1145,6 @@ xfs_fs_put_super(  	xfs_freesb(mp);  	xchk_mount_stats_free(mp);  	free_percpu(mp->m_stats.xs_stats); -	xfs_mount_list_del(mp);  	xfs_inodegc_free_percpu(mp);  	xfs_destroy_percpu_counters(mp);  	xfs_destroy_mount_workqueues(mp); @@ -1577,13 +1553,6 @@ xfs_fs_fill_super(  	if (error)  		goto out_destroy_counters; -	/* -	 * All percpu data structures requiring cleanup when a cpu goes offline -	 * must be allocated before adding this @mp to the cpu-dead handler's -	 * mount list. -	 */ -	xfs_mount_list_add(mp); -  	/* Allocate stats memory before we do operations that might use it */  	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);  	if (!mp->m_stats.xs_stats) { @@ -1781,7 +1750,6 @@ xfs_fs_fill_super(   out_free_stats:  	free_percpu(mp->m_stats.xs_stats);   out_destroy_inodegc: -	xfs_mount_list_del(mp);  	xfs_inodegc_free_percpu(mp);   out_destroy_counters:  	xfs_destroy_percpu_counters(mp); @@ -2326,49 +2294,6 @@ xfs_destroy_workqueues(void)  	destroy_workqueue(xfs_alloc_wq);  } -#ifdef CONFIG_HOTPLUG_CPU -static int -xfs_cpu_dead( -	unsigned int		cpu) -{ -	struct xfs_mount	*mp, *n; - -	spin_lock(&xfs_mount_list_lock); -	list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { -		spin_unlock(&xfs_mount_list_lock); -		xfs_inodegc_cpu_dead(mp, cpu); -		xlog_cil_pcp_dead(mp->m_log, cpu); -		spin_lock(&xfs_mount_list_lock); -	} -	spin_unlock(&xfs_mount_list_lock); -	return 0; -} - -static int __init -xfs_cpu_hotplug_init(void) -{ -	int	error; - -	error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, -			xfs_cpu_dead); -	if (error < 0) -		xfs_alert(NULL, -"Failed to initialise CPU hotplug, error %d. XFS is non-functional.", -			error); -	return error; -} - -static void -xfs_cpu_hotplug_destroy(void) -{ -	cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); -} - -#else /* !CONFIG_HOTPLUG_CPU */ -static inline int xfs_cpu_hotplug_init(void) { return 0; } -static inline void xfs_cpu_hotplug_destroy(void) {} -#endif -  STATIC int __init  init_xfs_fs(void)  { @@ -2385,13 +2310,9 @@ init_xfs_fs(void)  	xfs_dir_startup(); -	error = xfs_cpu_hotplug_init(); -	if (error) -		goto out; -  	error = xfs_init_caches();  	if (error) -		goto out_destroy_hp; +		goto out;  	error = xfs_init_workqueues();  	if (error) @@ -2475,8 +2396,6 @@ init_xfs_fs(void)  	xfs_destroy_workqueues();   out_destroy_caches:  	xfs_destroy_caches(); - out_destroy_hp: -	xfs_cpu_hotplug_destroy();   out:  	return error;  } @@ -2500,7 +2419,6 @@ exit_xfs_fs(void)  	xfs_destroy_workqueues();  	xfs_destroy_caches();  	xfs_uuid_table_free(); -	xfs_cpu_hotplug_destroy();  }  module_init(init_xfs_fs); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 902c7f67a117..3926cf7f2a6e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3824,6 +3824,51 @@ TRACE_EVENT(xfs_iunlink_update_dinode,  		  __entry->new_ptr)  ); +TRACE_EVENT(xfs_iunlink_reload_next, +	TP_PROTO(struct xfs_inode *ip), +	TP_ARGS(ip), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_agnumber_t, agno) +		__field(xfs_agino_t, agino) +		__field(xfs_agino_t, prev_agino) +		__field(xfs_agino_t, next_agino) +	), +	TP_fast_assign( +		__entry->dev = ip->i_mount->m_super->s_dev; +		__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); +		__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); +		__entry->prev_agino = ip->i_prev_unlinked; +		__entry->next_agino = ip->i_next_unlinked; +	), +	TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->agno, +		  __entry->agino, +		  __entry->prev_agino, +		  __entry->next_agino) +); + +TRACE_EVENT(xfs_inode_reload_unlinked_bucket, +	TP_PROTO(struct xfs_inode *ip), +	TP_ARGS(ip), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_agnumber_t, agno) +		__field(xfs_agino_t, agino) +	), +	TP_fast_assign( +		__entry->dev = ip->i_mount->m_super->s_dev; +		__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino); +		__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino); +	), +	TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->agno, +		  __entry->agino, +		  __entry->agino % XFS_AGI_UNLINKED_BUCKETS) +); +  DECLARE_EVENT_CLASS(xfs_ag_inode_class,  	TP_PROTO(struct xfs_inode *ip),  	TP_ARGS(ip), diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 43e5c219aaed..a3975f325f4e 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -46,6 +46,17 @@ xfs_attr_grab_log_assist(  	if (xfs_sb_version_haslogxattrs(&mp->m_sb))  		return 0; +	/* +	 * Check if the filesystem featureset is new enough to set this log +	 * incompat feature bit.  Strictly speaking, the minimum requirement is +	 * a V5 filesystem for the superblock field, but we'll require rmap +	 * or reflink to avoid having to deal with really old kernels. +	 */ +	if (!xfs_has_reflink(mp) && !xfs_has_rmapbt(mp)) { +		error = -EOPNOTSUPP; +		goto drop_incompat; +	} +  	/* Enable log-assisted xattrs. */  	error = xfs_add_incompat_log_feature(mp,  			XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); | 
