diff options
Diffstat (limited to 'fs/xfs/xfs_icache.c')
| -rw-r--r-- | fs/xfs/xfs_icache.c | 290 | 
1 files changed, 156 insertions, 134 deletions
| diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index bf2d60749278..99ee6eee5e0b 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -37,9 +37,6 @@  #include <linux/kthread.h>  #include <linux/freezer.h> -STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, -				struct xfs_perag *pag, struct xfs_inode *ip); -  /*   * Allocate and initialise an xfs_inode.   */ @@ -94,13 +91,6 @@ xfs_inode_free_callback(  	struct inode		*inode = container_of(head, struct inode, i_rcu);  	struct xfs_inode	*ip = XFS_I(inode); -	kmem_zone_free(xfs_inode_zone, ip); -} - -void -xfs_inode_free( -	struct xfs_inode	*ip) -{  	switch (VFS_I(ip)->i_mode & S_IFMT) {  	case S_IFREG:  	case S_IFDIR: @@ -118,6 +108,25 @@ xfs_inode_free(  		ip->i_itemp = NULL;  	} +	kmem_zone_free(xfs_inode_zone, ip); +} + +static void +__xfs_inode_free( +	struct xfs_inode	*ip) +{ +	/* asserts to verify all state is correct here */ +	ASSERT(atomic_read(&ip->i_pincount) == 0); +	ASSERT(!xfs_isiflocked(ip)); +	XFS_STATS_DEC(ip->i_mount, vn_active); + +	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); +} + +void +xfs_inode_free( +	struct xfs_inode	*ip) +{  	/*  	 * Because we use RCU freeing we need to ensure the inode always  	 * appears to be reclaimed with an invalid inode number when in the @@ -129,12 +138,123 @@ xfs_inode_free(  	ip->i_ino = 0;  	spin_unlock(&ip->i_flags_lock); -	/* asserts to verify all state is correct here */ -	ASSERT(atomic_read(&ip->i_pincount) == 0); -	ASSERT(!xfs_isiflocked(ip)); -	XFS_STATS_DEC(ip->i_mount, vn_active); +	__xfs_inode_free(ip); +} -	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs periodic sync default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_reclaim_work_queue( +	struct xfs_mount        *mp) +{ + +	rcu_read_lock(); +	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { +		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, +			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); +	} +	rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +void +xfs_reclaim_worker( +	struct work_struct *work) +{ +	struct xfs_mount *mp = container_of(to_delayed_work(work), +					struct xfs_mount, m_reclaim_work); + +	xfs_reclaim_inodes(mp, SYNC_TRYLOCK); +	xfs_reclaim_work_queue(mp); +} + +static void +xfs_perag_set_reclaim_tag( +	struct xfs_perag	*pag) +{ +	struct xfs_mount	*mp = pag->pag_mount; + +	ASSERT(spin_is_locked(&pag->pag_ici_lock)); +	if (pag->pag_ici_reclaimable++) +		return; + +	/* propagate the reclaim tag up into the perag radix tree */ +	spin_lock(&mp->m_perag_lock); +	radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, +			   XFS_ICI_RECLAIM_TAG); +	spin_unlock(&mp->m_perag_lock); + +	/* schedule periodic background inode reclaim */ +	xfs_reclaim_work_queue(mp); + +	trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); +} + +static void +xfs_perag_clear_reclaim_tag( +	struct xfs_perag	*pag) +{ +	struct xfs_mount	*mp = pag->pag_mount; + +	ASSERT(spin_is_locked(&pag->pag_ici_lock)); +	if (--pag->pag_ici_reclaimable) +		return; + +	/* clear the reclaim tag from the perag radix tree */ +	spin_lock(&mp->m_perag_lock); +	radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, +			     XFS_ICI_RECLAIM_TAG); +	spin_unlock(&mp->m_perag_lock); +	trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); +} + + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( +	struct xfs_inode	*ip) +{ +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_perag	*pag; + +	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); +	spin_lock(&pag->pag_ici_lock); +	spin_lock(&ip->i_flags_lock); + +	radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), +			   XFS_ICI_RECLAIM_TAG); +	xfs_perag_set_reclaim_tag(pag); +	__xfs_iflags_set(ip, XFS_IRECLAIMABLE); + +	spin_unlock(&ip->i_flags_lock); +	spin_unlock(&pag->pag_ici_lock); +	xfs_perag_put(pag); +} + +STATIC void +xfs_inode_clear_reclaim_tag( +	struct xfs_perag	*pag, +	xfs_ino_t		ino) +{ +	radix_tree_tag_clear(&pag->pag_ici_root, +			     XFS_INO_TO_AGINO(pag->pag_mount, ino), +			     XFS_ICI_RECLAIM_TAG); +	xfs_perag_clear_reclaim_tag(pag);  }  /* @@ -264,7 +384,7 @@ xfs_iget_cache_hit(  		 */  		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;  		ip->i_flags |= XFS_INEW; -		__xfs_inode_clear_reclaim_tag(mp, pag, ip); +		xfs_inode_clear_reclaim_tag(pag, ip->i_ino);  		inode->i_state = I_NEW;  		ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); @@ -723,121 +843,6 @@ xfs_inode_ag_iterator_tag(  }  /* - * Queue a new inode reclaim pass if there are reclaimable inodes and there - * isn't a reclaim pass already in progress. By default it runs every 5s based - * on the xfs periodic sync default of 30s. Perhaps this should have it's own - * tunable, but that can be done if this method proves to be ineffective or too - * aggressive. - */ -static void -xfs_reclaim_work_queue( -	struct xfs_mount        *mp) -{ - -	rcu_read_lock(); -	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { -		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, -			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); -	} -	rcu_read_unlock(); -} - -/* - * This is a fast pass over the inode cache to try to get reclaim moving on as - * many inodes as possible in a short period of time. It kicks itself every few - * seconds, as well as being kicked by the inode cache shrinker when memory - * goes low. It scans as quickly as possible avoiding locked inodes or those - * already being flushed, and once done schedules a future pass. - */ -void -xfs_reclaim_worker( -	struct work_struct *work) -{ -	struct xfs_mount *mp = container_of(to_delayed_work(work), -					struct xfs_mount, m_reclaim_work); - -	xfs_reclaim_inodes(mp, SYNC_TRYLOCK); -	xfs_reclaim_work_queue(mp); -} - -static void -__xfs_inode_set_reclaim_tag( -	struct xfs_perag	*pag, -	struct xfs_inode	*ip) -{ -	radix_tree_tag_set(&pag->pag_ici_root, -			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), -			   XFS_ICI_RECLAIM_TAG); - -	if (!pag->pag_ici_reclaimable) { -		/* propagate the reclaim tag up into the perag radix tree */ -		spin_lock(&ip->i_mount->m_perag_lock); -		radix_tree_tag_set(&ip->i_mount->m_perag_tree, -				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), -				XFS_ICI_RECLAIM_TAG); -		spin_unlock(&ip->i_mount->m_perag_lock); - -		/* schedule periodic background inode reclaim */ -		xfs_reclaim_work_queue(ip->i_mount); - -		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -							-1, _RET_IP_); -	} -	pag->pag_ici_reclaimable++; -} - -/* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. - */ -void -xfs_inode_set_reclaim_tag( -	xfs_inode_t	*ip) -{ -	struct xfs_mount *mp = ip->i_mount; -	struct xfs_perag *pag; - -	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); -	spin_lock(&pag->pag_ici_lock); -	spin_lock(&ip->i_flags_lock); -	__xfs_inode_set_reclaim_tag(pag, ip); -	__xfs_iflags_set(ip, XFS_IRECLAIMABLE); -	spin_unlock(&ip->i_flags_lock); -	spin_unlock(&pag->pag_ici_lock); -	xfs_perag_put(pag); -} - -STATIC void -__xfs_inode_clear_reclaim( -	xfs_perag_t	*pag, -	xfs_inode_t	*ip) -{ -	pag->pag_ici_reclaimable--; -	if (!pag->pag_ici_reclaimable) { -		/* clear the reclaim tag from the perag radix tree */ -		spin_lock(&ip->i_mount->m_perag_lock); -		radix_tree_tag_clear(&ip->i_mount->m_perag_tree, -				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), -				XFS_ICI_RECLAIM_TAG); -		spin_unlock(&ip->i_mount->m_perag_lock); -		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, -							-1, _RET_IP_); -	} -} - -STATIC void -__xfs_inode_clear_reclaim_tag( -	xfs_mount_t	*mp, -	xfs_perag_t	*pag, -	xfs_inode_t	*ip) -{ -	radix_tree_tag_clear(&pag->pag_ici_root, -			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); -	__xfs_inode_clear_reclaim(pag, ip); -} - -/*   * Grab the inode for reclaim exclusively.   * Return 0 if we grabbed it, non-zero otherwise.   */ @@ -929,6 +934,7 @@ xfs_reclaim_inode(  	int			sync_mode)  {  	struct xfs_buf		*bp = NULL; +	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */  	int			error;  restart: @@ -993,6 +999,22 @@ restart:  	xfs_iflock(ip);  reclaim: +	/* +	 * Because we use RCU freeing we need to ensure the inode always appears +	 * to be reclaimed with an invalid inode number when in the free state. +	 * We do this as early as possible under the ILOCK and flush lock so +	 * that xfs_iflush_cluster() can be guaranteed to detect races with us +	 * here. By doing this, we guarantee that once xfs_iflush_cluster has +	 * locked both the XFS_ILOCK and the flush lock that it will see either +	 * a valid, flushable inode that will serialise correctly against the +	 * locks below, or it will see a clean (and invalid) inode that it can +	 * skip. +	 */ +	spin_lock(&ip->i_flags_lock); +	ip->i_flags = XFS_IRECLAIM; +	ip->i_ino = 0; +	spin_unlock(&ip->i_flags_lock); +  	xfs_ifunlock(ip);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1006,9 +1028,9 @@ reclaim:  	 */  	spin_lock(&pag->pag_ici_lock);  	if (!radix_tree_delete(&pag->pag_ici_root, -				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) +				XFS_INO_TO_AGINO(ip->i_mount, ino)))  		ASSERT(0); -	__xfs_inode_clear_reclaim(pag, ip); +	xfs_perag_clear_reclaim_tag(pag);  	spin_unlock(&pag->pag_ici_lock);  	/* @@ -1023,7 +1045,7 @@ reclaim:  	xfs_qm_dqdetach(ip);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); -	xfs_inode_free(ip); +	__xfs_inode_free(ip);  	return error;  out_ifunlock: | 
