diff options
Diffstat (limited to 'fs/xfs')
50 files changed, 1346 insertions, 1205 deletions
| diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 686ba6fb20dd..339c696bbc01 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -93,19 +93,23 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)  }  void * -kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, -	     xfs_km_flags_t flags) +kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)  { -	void	*new; +	int	retries = 0; +	gfp_t	lflags = kmem_flags_convert(flags); +	void	*ptr; -	new = kmem_alloc(newsize, flags); -	if (ptr) { -		if (new) -			memcpy(new, ptr, -				((oldsize < newsize) ? oldsize : newsize)); -		kmem_free(ptr); -	} -	return new; +	do { +		ptr = krealloc(old, newsize, lflags); +		if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) +			return ptr; +		if (!(++retries % 100)) +			xfs_err(NULL, +	"%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)", +				current->comm, current->pid, +				newsize, __func__, lflags); +		congestion_wait(BLK_RW_ASYNC, HZ/50); +	} while (1);  }  void * diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index d1c66e465ca5..689f746224e7 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -62,7 +62,7 @@ kmem_flags_convert(xfs_km_flags_t flags)  extern void *kmem_alloc(size_t, xfs_km_flags_t);  extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); -extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); +extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);  static inline void  kmem_free(const void *ptr)  {  	kvfree(ptr); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index fa3b948ef9c2..4e126f41a0aa 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -242,37 +242,21 @@ xfs_attr_set(  			return error;  	} -	/* -	 * Start our first transaction of the day. -	 * -	 * All future transactions during this code must be "chained" off -	 * this one via the trans_dup() call.  All transactions will contain -	 * the inode, and the inode will always be marked with trans_ihold(). -	 * Since the inode will be locked in all transactions, we must log -	 * the inode in every transaction to let it float upward through -	 * the log. -	 */ -	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); +	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + +			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total; +	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; +	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;  	/*  	 * Root fork attributes can use reserved data blocks for this  	 * operation if necessary  	 */ - -	if (rsvd) -		args.trans->t_flags |= XFS_TRANS_RESERVE; - -	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + -			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total; -	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; -	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; -	error = xfs_trans_reserve(args.trans, &tres, args.total, 0); -	if (error) { -		xfs_trans_cancel(args.trans); +	error = xfs_trans_alloc(mp, &tres, args.total, 0, +			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); +	if (error)  		return error; -	} -	xfs_ilock(dp, XFS_ILOCK_EXCL); +	xfs_ilock(dp, XFS_ILOCK_EXCL);  	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,  				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :  				       XFS_QMOPT_RES_REGBLKS); @@ -429,31 +413,15 @@ xfs_attr_remove(  		return error;  	/* -	 * Start our first transaction of the day. -	 * -	 * All future transactions during this code must be "chained" off -	 * this one via the trans_dup() call.  All transactions will contain -	 * the inode, and the inode will always be marked with trans_ihold(). -	 * Since the inode will be locked in all transactions, we must log -	 * the inode in every transaction to let it float upward through -	 * the log. -	 */ -	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM); - -	/*  	 * Root fork attributes can use reserved data blocks for this  	 * operation if necessary  	 */ - -	if (flags & ATTR_ROOT) -		args.trans->t_flags |= XFS_TRANS_RESERVE; - -	error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, -				  XFS_ATTRRM_SPACE_RES(mp), 0); -	if (error) { -		xfs_trans_cancel(args.trans); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm, +			XFS_ATTRRM_SPACE_RES(mp), 0, +			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0, +			&args.trans); +	if (error)  		return error; -	}  	xfs_ilock(dp, XFS_ILOCK_EXCL);  	/* diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ce41d7fe753c..932381caef1b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1121,15 +1121,14 @@ xfs_bmap_add_attrfork(  	mp = ip->i_mount;  	ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); -	tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); +  	blks = XFS_ADDAFORK_SPACE_RES(mp); -	if (rsvd) -		tp->t_flags |= XFS_TRANS_RESERVE; -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); -	if (error) { -		xfs_trans_cancel(tp); + +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0, +			rsvd ? XFS_TRANS_RESERVE : 0, &tp); +	if (error)  		return error; -	} +  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?  			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : @@ -6026,13 +6025,10 @@ xfs_bmap_split_extent(  	xfs_fsblock_t           firstfsb;  	int                     error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, +			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); +	if (error)  		return error; -	}  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 974d62e677f4..e5bb9cc3b243 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -257,15 +257,12 @@ xfs_dir2_block_to_sf(  	 *  	 * Convert the inode to local format and copy the data in.  	 */ -	dp->i_df.if_flags &= ~XFS_IFEXTENTS; -	dp->i_df.if_flags |= XFS_IFINLINE; -	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;  	ASSERT(dp->i_df.if_bytes == 0); -	xfs_idata_realloc(dp, size, XFS_DATA_FORK); +	xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size); +	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; +	dp->i_d.di_size = size;  	logflags |= XFS_ILOG_DDATA; -	memcpy(dp->i_df.if_u1.if_data, dst, size); -	dp->i_d.di_size = size;  	xfs_dir2_sf_check(args);  out:  	xfs_trans_log_inode(args->trans, dp, logflags); diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 11faf7df14c8..bbcc8c7a44b3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -231,6 +231,48 @@ xfs_iformat_fork(  	return error;  } +void +xfs_init_local_fork( +	struct xfs_inode	*ip, +	int			whichfork, +	const void		*data, +	int			size) +{ +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork); +	int			mem_size = size, real_size = 0; +	bool			zero_terminate; + +	/* +	 * If we are using the local fork to store a symlink body we need to +	 * zero-terminate it so that we can pass it back to the VFS directly. +	 * Overallocate the in-memory fork by one for that and add a zero +	 * to terminate it below. +	 */ +	zero_terminate = S_ISLNK(VFS_I(ip)->i_mode); +	if (zero_terminate) +		mem_size++; + +	if (size == 0) +		ifp->if_u1.if_data = NULL; +	else if (mem_size <= sizeof(ifp->if_u2.if_inline_data)) +		ifp->if_u1.if_data = ifp->if_u2.if_inline_data; +	else { +		real_size = roundup(mem_size, 4); +		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); +	} + +	if (size) { +		memcpy(ifp->if_u1.if_data, data, size); +		if (zero_terminate) +			ifp->if_u1.if_data[size] = '\0'; +	} + +	ifp->if_bytes = size; +	ifp->if_real_bytes = real_size; +	ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); +	ifp->if_flags |= XFS_IFINLINE; +} +  /*   * The file is in-lined in the on-disk inode.   * If it fits into if_inline_data, then copy @@ -248,8 +290,6 @@ xfs_iformat_local(  	int		whichfork,  	int		size)  { -	xfs_ifork_t	*ifp; -	int		real_size;  	/*  	 * If the size is unreasonable, then something @@ -265,22 +305,8 @@ xfs_iformat_local(  				     ip->i_mount, dip);  		return -EFSCORRUPTED;  	} -	ifp = XFS_IFORK_PTR(ip, whichfork); -	real_size = 0; -	if (size == 0) -		ifp->if_u1.if_data = NULL; -	else if (size <= sizeof(ifp->if_u2.if_inline_data)) -		ifp->if_u1.if_data = ifp->if_u2.if_inline_data; -	else { -		real_size = roundup(size, 4); -		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS); -	} -	ifp->if_bytes = size; -	ifp->if_real_bytes = real_size; -	if (size) -		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size); -	ifp->if_flags &= ~XFS_IFEXTENTS; -	ifp->if_flags |= XFS_IFINLINE; + +	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);  	return 0;  } @@ -516,7 +542,6 @@ xfs_iroot_realloc(  		new_max = cur_max + rec_diff;  		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);  		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, -				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),  				KM_SLEEP | KM_NOFS);  		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,  						     ifp->if_broot_bytes); @@ -660,7 +685,6 @@ xfs_idata_realloc(  				ifp->if_u1.if_data =  					kmem_realloc(ifp->if_u1.if_data,  							real_size, -							ifp->if_real_bytes,  							KM_SLEEP | KM_NOFS);  			}  		} else { @@ -1376,8 +1400,7 @@ xfs_iext_realloc_direct(  		if (rnew_size != ifp->if_real_bytes) {  			ifp->if_u1.if_extents =  				kmem_realloc(ifp->if_u1.if_extents, -						rnew_size, -						ifp->if_real_bytes, KM_NOFS); +						rnew_size, KM_NOFS);  		}  		if (rnew_size > ifp->if_real_bytes) {  			memset(&ifp->if_u1.if_extents[ifp->if_bytes / @@ -1461,9 +1484,8 @@ xfs_iext_realloc_indirect(  	if (new_size == 0) {  		xfs_iext_destroy(ifp);  	} else { -		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) -			kmem_realloc(ifp->if_u1.if_ext_irec, -				new_size, size, KM_NOFS); +		ifp->if_u1.if_ext_irec = +			kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);  	}  } @@ -1497,6 +1519,24 @@ xfs_iext_indirect_to_direct(  }  /* + * Remove all records from the indirection array. + */ +STATIC void +xfs_iext_irec_remove_all( +	struct xfs_ifork *ifp) +{ +	int		nlists; +	int		i; + +	ASSERT(ifp->if_flags & XFS_IFEXTIREC); +	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; +	for (i = 0; i < nlists; i++) +		kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); +	kmem_free(ifp->if_u1.if_ext_irec); +	ifp->if_flags &= ~XFS_IFEXTIREC; +} + +/*   * Free incore file extents.   */  void @@ -1504,14 +1544,7 @@ xfs_iext_destroy(  	xfs_ifork_t	*ifp)		/* inode fork pointer */  {  	if (ifp->if_flags & XFS_IFEXTIREC) { -		int	erp_idx; -		int	nlists; - -		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; -		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { -			xfs_iext_irec_remove(ifp, erp_idx); -		} -		ifp->if_flags &= ~XFS_IFEXTIREC; +		xfs_iext_irec_remove_all(ifp);  	} else if (ifp->if_real_bytes) {  		kmem_free(ifp->if_u1.if_extents);  	} else if (ifp->if_bytes) { diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 7d3b1ed6dcbe..f95e072ae646 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -134,6 +134,7 @@ void		xfs_iroot_realloc(struct xfs_inode *, int, int);  int		xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);  int		xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,  				  int); +void		xfs_init_local_fork(struct xfs_inode *, int, const void *, int);  struct xfs_bmbt_rec_host *  		xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d54a8018b079..e8f49c029ff0 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -212,6 +212,11 @@ typedef struct xfs_trans_header {  #define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */  /* + * The only type valid for th_type in CIL-enabled file system logs: + */ +#define XFS_TRANS_CHECKPOINT	40 + +/*   * Log item types.   */  #define	XFS_LI_EFI		0x1236 diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 8a53eaa349f4..12ca86778e02 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -838,12 +838,10 @@ xfs_sync_sb(  	struct xfs_trans	*tp;  	int			error; -	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, +			XFS_TRANS_NO_WRITECOUNT, &tp); +	if (error)  		return error; -	}  	xfs_log_sb(tp);  	if (wait) diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 81ac870834da..16002b5ec4eb 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -56,103 +56,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;  extern const struct xfs_buf_ops xfs_rtbuf_ops;  /* - * Transaction types.  Used to distinguish types of buffers. These never reach - * the log. - */ -#define XFS_TRANS_SETATTR_NOT_SIZE	1 -#define XFS_TRANS_SETATTR_SIZE		2 -#define XFS_TRANS_INACTIVE		3 -#define XFS_TRANS_CREATE		4 -#define XFS_TRANS_CREATE_TRUNC		5 -#define XFS_TRANS_TRUNCATE_FILE		6 -#define XFS_TRANS_REMOVE		7 -#define XFS_TRANS_LINK			8 -#define XFS_TRANS_RENAME		9 -#define XFS_TRANS_MKDIR			10 -#define XFS_TRANS_RMDIR			11 -#define XFS_TRANS_SYMLINK		12 -#define XFS_TRANS_SET_DMATTRS		13 -#define XFS_TRANS_GROWFS		14 -#define XFS_TRANS_STRAT_WRITE		15 -#define XFS_TRANS_DIOSTRAT		16 -/* 17 was XFS_TRANS_WRITE_SYNC */ -#define	XFS_TRANS_WRITEID		18 -#define	XFS_TRANS_ADDAFORK		19 -#define	XFS_TRANS_ATTRINVAL		20 -#define	XFS_TRANS_ATRUNCATE		21 -#define	XFS_TRANS_ATTR_SET		22 -#define	XFS_TRANS_ATTR_RM		23 -#define	XFS_TRANS_ATTR_FLAG		24 -#define	XFS_TRANS_CLEAR_AGI_BUCKET	25 -#define XFS_TRANS_SB_CHANGE		26 -/* - * Dummy entries since we use the transaction type to index into the - * trans_type[] in xlog_recover_print_trans_head() - */ -#define XFS_TRANS_DUMMY1		27 -#define XFS_TRANS_DUMMY2		28 -#define XFS_TRANS_QM_QUOTAOFF		29 -#define XFS_TRANS_QM_DQALLOC		30 -#define XFS_TRANS_QM_SETQLIM		31 -#define XFS_TRANS_QM_DQCLUSTER		32 -#define XFS_TRANS_QM_QINOCREATE		33 -#define XFS_TRANS_QM_QUOTAOFF_END	34 -#define XFS_TRANS_FSYNC_TS		35 -#define	XFS_TRANS_GROWFSRT_ALLOC	36 -#define	XFS_TRANS_GROWFSRT_ZERO		37 -#define	XFS_TRANS_GROWFSRT_FREE		38 -#define	XFS_TRANS_SWAPEXT		39 -#define	XFS_TRANS_CHECKPOINT		40 -#define	XFS_TRANS_ICREATE		41 -#define	XFS_TRANS_CREATE_TMPFILE	42 -#define	XFS_TRANS_TYPE_MAX		43 -/* new transaction types need to be reflected in xfs_logprint(8) */ - -#define XFS_TRANS_TYPES \ -	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \ -	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \ -	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \ -	{ XFS_TRANS_CREATE,		"CREATE" }, \ -	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \ -	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \ -	{ XFS_TRANS_REMOVE,		"REMOVE" }, \ -	{ XFS_TRANS_LINK,		"LINK" }, \ -	{ XFS_TRANS_RENAME,		"RENAME" }, \ -	{ XFS_TRANS_MKDIR,		"MKDIR" }, \ -	{ XFS_TRANS_RMDIR,		"RMDIR" }, \ -	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \ -	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \ -	{ XFS_TRANS_GROWFS,		"GROWFS" }, \ -	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \ -	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \ -	{ XFS_TRANS_WRITEID,		"WRITEID" }, \ -	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \ -	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \ -	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \ -	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \ -	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \ -	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \ -	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \ -	{ XFS_TRANS_SB_CHANGE,		"SBCHANGE" }, \ -	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \ -	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \ -	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \ -	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \ -	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \ -	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \ -	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \ -	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \ -	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \ -	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \ -	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \ -	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \ -	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \ -	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \ -	{ XFS_TRANS_ICREATE,		"ICREATE" }, \ -	{ XFS_TRANS_CREATE_TMPFILE,	"CREATE_TMPFILE" }, \ -	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" } - -/*   * This structure is used to track log items associated with   * a transaction.  It points to the log item and keeps some   * flags to track the state of the log item.  It also tracks @@ -181,8 +84,9 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);  #define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */  #define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */  #define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */ -#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer -					   count in superblock */ +#define XFS_TRANS_NO_WRITECOUNT 0x40	/* do not elevate SB writecount */ +#define XFS_TRANS_NOFS		0x80	/* pass KM_NOFS to kmem_alloc */ +  /*   * Field values for xfs_trans_mod_sb.   */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 2d5df1f23bbc..b6e527b8eccb 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type)  	if (error) {  		/*  		 * If the attribute doesn't exist make sure we have a negative -		 * cache entry, for any other error assume it is transient and -		 * leave the cache entry as ACL_NOT_CACHED. +		 * cache entry, for any other error assume it is transient.  		 */ -		if (error == -ENOATTR) -			goto out_update_cache; -		acl = ERR_PTR(error); -		goto out; +		if (error != -ENOATTR) +			acl = ERR_PTR(error); +	} else  { +		acl = xfs_acl_from_disk(xfs_acl, len, +					XFS_ACL_MAX_ENTRIES(ip->i_mount));  	} - -	acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); -	if (IS_ERR(acl)) -		goto out; - -out_update_cache: -	set_cached_acl(inode, type, acl); -out:  	kmem_free(xfs_acl);  	return acl;  } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e49b2406d15d..4c463b99fe57 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -84,23 +84,71 @@ xfs_find_bdev_for_inode(  }  /* - * We're now finished for good with this ioend structure. - * Update the page state via the associated buffer_heads, - * release holds on the inode and bio, and finally free - * up memory.  Do not use the ioend after this. + * We're now finished for good with this page.  Update the page state via the + * associated buffer_heads, paying attention to the start and end offsets that + * we need to process on the page. + */ +static void +xfs_finish_page_writeback( +	struct inode		*inode, +	struct bio_vec		*bvec, +	int			error) +{ +	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1; +	struct buffer_head	*head, *bh; +	unsigned int		off = 0; + +	ASSERT(bvec->bv_offset < PAGE_SIZE); +	ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0); +	ASSERT(end < PAGE_SIZE); +	ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0); + +	bh = head = page_buffers(bvec->bv_page); + +	do { +		if (off < bvec->bv_offset) +			goto next_bh; +		if (off > end) +			break; +		bh->b_end_io(bh, !error); +next_bh: +		off += bh->b_size; +	} while ((bh = bh->b_this_page) != head); +} + +/* + * We're now finished for good with this ioend structure.  Update the page + * state, release holds on bios, and finally free up memory.  Do not use the + * ioend after this.   */  STATIC void  xfs_destroy_ioend( -	xfs_ioend_t		*ioend) +	struct xfs_ioend	*ioend, +	int			error)  { -	struct buffer_head	*bh, *next; +	struct inode		*inode = ioend->io_inode; +	struct bio		*last = ioend->io_bio; +	struct bio		*bio, *next; -	for (bh = ioend->io_buffer_head; bh; bh = next) { -		next = bh->b_private; -		bh->b_end_io(bh, !ioend->io_error); -	} +	for (bio = &ioend->io_inline_bio; bio; bio = next) { +		struct bio_vec	*bvec; +		int		i; + +		/* +		 * For the last bio, bi_private points to the ioend, so we +		 * need to explicitly end the iteration here. +		 */ +		if (bio == last) +			next = NULL; +		else +			next = bio->bi_private; -	mempool_free(ioend, xfs_ioend_pool); +		/* walk each page on bio, ending page IO on them */ +		bio_for_each_segment_all(bvec, bio, i) +			xfs_finish_page_writeback(inode, bvec, error); + +		bio_put(bio); +	}  }  /* @@ -120,13 +168,9 @@ xfs_setfilesize_trans_alloc(  	struct xfs_trans	*tp;  	int			error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); +	if (error)  		return error; -	}  	ioend->io_append_trans = tp; @@ -174,7 +218,8 @@ xfs_setfilesize(  STATIC int  xfs_setfilesize_ioend( -	struct xfs_ioend	*ioend) +	struct xfs_ioend	*ioend, +	int			error)  {  	struct xfs_inode	*ip = XFS_I(ioend->io_inode);  	struct xfs_trans	*tp = ioend->io_append_trans; @@ -188,53 +233,32 @@ xfs_setfilesize_ioend(  	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);  	/* we abort the update if there was an IO error */ -	if (ioend->io_error) { +	if (error) {  		xfs_trans_cancel(tp); -		return ioend->io_error; +		return error;  	}  	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);  }  /* - * Schedule IO completion handling on the final put of an ioend. - * - * If there is no work to do we might as well call it a day and free the - * ioend right now. - */ -STATIC void -xfs_finish_ioend( -	struct xfs_ioend	*ioend) -{ -	if (atomic_dec_and_test(&ioend->io_remaining)) { -		struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount; - -		if (ioend->io_type == XFS_IO_UNWRITTEN) -			queue_work(mp->m_unwritten_workqueue, &ioend->io_work); -		else if (ioend->io_append_trans) -			queue_work(mp->m_data_workqueue, &ioend->io_work); -		else -			xfs_destroy_ioend(ioend); -	} -} - -/*   * IO write completion.   */  STATIC void  xfs_end_io(  	struct work_struct *work)  { -	xfs_ioend_t	*ioend = container_of(work, xfs_ioend_t, io_work); -	struct xfs_inode *ip = XFS_I(ioend->io_inode); -	int		error = 0; +	struct xfs_ioend	*ioend = +		container_of(work, struct xfs_ioend, io_work); +	struct xfs_inode	*ip = XFS_I(ioend->io_inode); +	int			error = ioend->io_bio->bi_error;  	/*  	 * Set an error if the mount has shut down and proceed with end I/O  	 * processing so it can perform whatever cleanups are necessary.  	 */  	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		ioend->io_error = -EIO; +		error = -EIO;  	/*  	 * For unwritten extents we need to issue transactions to convert a @@ -244,55 +268,33 @@ xfs_end_io(  	 * on error.  	 */  	if (ioend->io_type == XFS_IO_UNWRITTEN) { -		if (ioend->io_error) +		if (error)  			goto done;  		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,  						  ioend->io_size);  	} else if (ioend->io_append_trans) { -		error = xfs_setfilesize_ioend(ioend); +		error = xfs_setfilesize_ioend(ioend, error);  	} else {  		ASSERT(!xfs_ioend_is_append(ioend));  	}  done: -	if (error) -		ioend->io_error = error; -	xfs_destroy_ioend(ioend); +	xfs_destroy_ioend(ioend, error);  } -/* - * Allocate and initialise an IO completion structure. - * We need to track unwritten extent write completion here initially. - * We'll need to extend this for updating the ondisk inode size later - * (vs. incore size). - */ -STATIC xfs_ioend_t * -xfs_alloc_ioend( -	struct inode		*inode, -	unsigned int		type) +STATIC void +xfs_end_bio( +	struct bio		*bio)  { -	xfs_ioend_t		*ioend; - -	ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); - -	/* -	 * Set the count to 1 initially, which will prevent an I/O -	 * completion callback from happening before we have started -	 * all the I/O from calling the completion routine too early. -	 */ -	atomic_set(&ioend->io_remaining, 1); -	ioend->io_error = 0; -	INIT_LIST_HEAD(&ioend->io_list); -	ioend->io_type = type; -	ioend->io_inode = inode; -	ioend->io_buffer_head = NULL; -	ioend->io_buffer_tail = NULL; -	ioend->io_offset = 0; -	ioend->io_size = 0; -	ioend->io_append_trans = NULL; +	struct xfs_ioend	*ioend = bio->bi_private; +	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount; -	INIT_WORK(&ioend->io_work, xfs_end_io); -	return ioend; +	if (ioend->io_type == XFS_IO_UNWRITTEN) +		queue_work(mp->m_unwritten_workqueue, &ioend->io_work); +	else if (ioend->io_append_trans) +		queue_work(mp->m_data_workqueue, &ioend->io_work); +	else +		xfs_destroy_ioend(ioend, bio->bi_error);  }  STATIC int @@ -364,50 +366,6 @@ xfs_imap_valid(  		offset < imap->br_startoff + imap->br_blockcount;  } -/* - * BIO completion handler for buffered IO. - */ -STATIC void -xfs_end_bio( -	struct bio		*bio) -{ -	xfs_ioend_t		*ioend = bio->bi_private; - -	if (!ioend->io_error) -		ioend->io_error = bio->bi_error; - -	/* Toss bio and pass work off to an xfsdatad thread */ -	bio->bi_private = NULL; -	bio->bi_end_io = NULL; -	bio_put(bio); - -	xfs_finish_ioend(ioend); -} - -STATIC void -xfs_submit_ioend_bio( -	struct writeback_control *wbc, -	xfs_ioend_t		*ioend, -	struct bio		*bio) -{ -	atomic_inc(&ioend->io_remaining); -	bio->bi_private = ioend; -	bio->bi_end_io = xfs_end_bio; -	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); -} - -STATIC struct bio * -xfs_alloc_ioend_bio( -	struct buffer_head	*bh) -{ -	struct bio		*bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - -	ASSERT(bio->bi_private == NULL); -	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); -	bio->bi_bdev = bh->b_bdev; -	return bio; -} -  STATIC void  xfs_start_buffer_writeback(  	struct buffer_head	*bh) @@ -452,28 +410,35 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)  }  /* - * Submit all of the bios for an ioend. We are only passed a single ioend at a - * time; the caller is responsible for chaining prior to submission. + * Submit the bio for an ioend. We are passed an ioend with a bio attached to + * it, and we submit that bio. The ioend may be used for multiple bio + * submissions, so we only want to allocate an append transaction for the ioend + * once. In the case of multiple bio submission, each bio will take an IO + * reference to the ioend to ensure that the ioend completion is only done once + * all bios have been submitted and the ioend is really done.   *   * If @fail is non-zero, it means that we have a situation where some part of   * the submission process has failed after we have marked paged for writeback - * and unlocked them. In this situation, we need to fail the ioend chain rather - * than submit it to IO. This typically only happens on a filesystem shutdown. + * and unlocked them. In this situation, we need to fail the bio and ioend + * rather than submit it to IO. This typically only happens on a filesystem + * shutdown.   */  STATIC int  xfs_submit_ioend(  	struct writeback_control *wbc, -	xfs_ioend_t		*ioend, +	struct xfs_ioend	*ioend,  	int			status)  { -	struct buffer_head	*bh; -	struct bio		*bio; -	sector_t		lastblock = 0; -  	/* Reserve log space if we might write beyond the on-disk inode size. */  	if (!status && -	     ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) +	    ioend->io_type != XFS_IO_UNWRITTEN && +	    xfs_ioend_is_append(ioend) && +	    !ioend->io_append_trans)  		status = xfs_setfilesize_trans_alloc(ioend); + +	ioend->io_bio->bi_private = ioend; +	ioend->io_bio->bi_end_io = xfs_end_bio; +  	/*  	 * If we are failing the IO now, just mark the ioend with an  	 * error and finish it. This will run IO completion immediately @@ -481,33 +446,73 @@ xfs_submit_ioend(  	 * time.  	 */  	if (status) { -		ioend->io_error = status; -		xfs_finish_ioend(ioend); +		ioend->io_bio->bi_error = status; +		bio_endio(ioend->io_bio);  		return status;  	} -	bio = NULL; -	for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { +	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, +		   ioend->io_bio); +	return 0; +} -		if (!bio) { -retry: -			bio = xfs_alloc_ioend_bio(bh); -		} else if (bh->b_blocknr != lastblock + 1) { -			xfs_submit_ioend_bio(wbc, ioend, bio); -			goto retry; -		} +static void +xfs_init_bio_from_bh( +	struct bio		*bio, +	struct buffer_head	*bh) +{ +	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); +	bio->bi_bdev = bh->b_bdev; +} -		if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { -			xfs_submit_ioend_bio(wbc, ioend, bio); -			goto retry; -		} +static struct xfs_ioend * +xfs_alloc_ioend( +	struct inode		*inode, +	unsigned int		type, +	xfs_off_t		offset, +	struct buffer_head	*bh) +{ +	struct xfs_ioend	*ioend; +	struct bio		*bio; -		lastblock = bh->b_blocknr; -	} -	if (bio) -		xfs_submit_ioend_bio(wbc, ioend, bio); -	xfs_finish_ioend(ioend); -	return 0; +	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); +	xfs_init_bio_from_bh(bio, bh); + +	ioend = container_of(bio, struct xfs_ioend, io_inline_bio); +	INIT_LIST_HEAD(&ioend->io_list); +	ioend->io_type = type; +	ioend->io_inode = inode; +	ioend->io_size = 0; +	ioend->io_offset = offset; +	INIT_WORK(&ioend->io_work, xfs_end_io); +	ioend->io_append_trans = NULL; +	ioend->io_bio = bio; +	return ioend; +} + +/* + * Allocate a new bio, and chain the old bio to the new one. + * + * Note that we have to do perform the chaining in this unintuitive order + * so that the bi_private linkage is set up in the right direction for the + * traversal in xfs_destroy_ioend(). + */ +static void +xfs_chain_bio( +	struct xfs_ioend	*ioend, +	struct writeback_control *wbc, +	struct buffer_head	*bh) +{ +	struct bio *new; + +	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); +	xfs_init_bio_from_bh(new, bh); + +	bio_chain(ioend->io_bio, new); +	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */ +	submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, +		   ioend->io_bio); +	ioend->io_bio = new;  }  /* @@ -523,27 +528,24 @@ xfs_add_to_ioend(  	struct buffer_head	*bh,  	xfs_off_t		offset,  	struct xfs_writepage_ctx *wpc, +	struct writeback_control *wbc,  	struct list_head	*iolist)  {  	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||  	    bh->b_blocknr != wpc->last_block + 1 ||  	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) { -		struct xfs_ioend	*new; -  		if (wpc->ioend)  			list_add(&wpc->ioend->io_list, iolist); - -		new = xfs_alloc_ioend(inode, wpc->io_type); -		new->io_offset = offset; -		new->io_buffer_head = bh; -		new->io_buffer_tail = bh; -		wpc->ioend = new; -	} else { -		wpc->ioend->io_buffer_tail->b_private = bh; -		wpc->ioend->io_buffer_tail = bh; +		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);  	} -	bh->b_private = NULL; +	/* +	 * If the buffer doesn't fit into the bio we need to allocate a new +	 * one.  This shouldn't happen more than once for a given buffer. +	 */ +	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) +		xfs_chain_bio(wpc->ioend, wbc, bh); +  	wpc->ioend->io_size += bh->b_size;  	wpc->last_block = bh->b_blocknr;  	xfs_start_buffer_writeback(bh); @@ -803,7 +805,7 @@ xfs_writepage_map(  			lock_buffer(bh);  			if (wpc->io_type != XFS_IO_OVERWRITE)  				xfs_map_at_offset(inode, bh, &wpc->imap, offset); -			xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list); +			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);  			count++;  		} @@ -1391,13 +1393,10 @@ xfs_end_io_direct_write(  		trace_xfs_end_io_direct_write_append(ip, offset, size); -		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); -		if (error) { -			xfs_trans_cancel(tp); -			return error; -		} -		error = xfs_setfilesize(ip, tp, offset, size); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, +				&tp); +		if (!error) +			error = xfs_setfilesize(ip, tp, offset, size);  	}  	return error; @@ -1406,8 +1405,7 @@ xfs_end_io_direct_write(  STATIC ssize_t  xfs_vm_direct_IO(  	struct kiocb		*iocb, -	struct iov_iter		*iter, -	loff_t			offset) +	struct iov_iter		*iter)  {  	struct inode		*inode = iocb->ki_filp->f_mapping->host;  	dio_iodone_t		*endio = NULL; @@ -1420,12 +1418,12 @@ xfs_vm_direct_IO(  	}  	if (IS_DAX(inode)) { -		return dax_do_io(iocb, inode, iter, offset, +		return dax_do_io(iocb, inode, iter,  				 xfs_get_blocks_direct, endio, 0);  	}  	bdev = xfs_find_bdev_for_inode(inode); -	return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset, +	return  __blockdev_direct_IO(iocb, inode, bdev, iter,  			xfs_get_blocks_direct, endio, NULL, flags);  } diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index b4421177b68d..814aab790713 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -18,7 +18,7 @@  #ifndef __XFS_AOPS_H__  #define __XFS_AOPS_H__ -extern mempool_t *xfs_ioend_pool; +extern struct bio_set *xfs_ioend_bioset;  /*   * Types of I/O for bmap clustering and I/O completion tracking. @@ -37,22 +37,19 @@ enum {  	{ XFS_IO_OVERWRITE,		"overwrite" }  /* - * xfs_ioend struct manages large extent writes for XFS. - * It can manage several multi-page bio's at once. + * Structure for buffered I/O completions.   */ -typedef struct xfs_ioend { +struct xfs_ioend {  	struct list_head	io_list;	/* next ioend in chain */  	unsigned int		io_type;	/* delalloc / unwritten */ -	int			io_error;	/* I/O error code */ -	atomic_t		io_remaining;	/* hold count */  	struct inode		*io_inode;	/* file being written to */ -	struct buffer_head	*io_buffer_head;/* buffer linked list head */ -	struct buffer_head	*io_buffer_tail;/* buffer linked list tail */  	size_t			io_size;	/* size of the extent */  	xfs_off_t		io_offset;	/* offset in the file */  	struct work_struct	io_work;	/* xfsdatad work queue */  	struct xfs_trans	*io_append_trans;/* xact. for size update */ -} xfs_ioend_t; +	struct bio		*io_bio;	/* bio being built */ +	struct bio		io_inline_bio;	/* MUST BE LAST! */ +};  extern const struct address_space_operations xfs_address_space_operations; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index dd4824589470..e3da5d448bcf 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -112,8 +112,9 @@ typedef struct attrlist_cursor_kern {   *========================================================================*/ +/* Return 0 on success, or -errno; other state communicated via *context */  typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, -			      unsigned char *, int, int, unsigned char *); +			      unsigned char *, int, int);  typedef struct xfs_attr_list_context {  	struct xfs_inode		*dp;		/* inode */ @@ -126,7 +127,6 @@ typedef struct xfs_attr_list_context {  	int				firstu;		/* first used byte in buffer */  	int				flags;		/* from VOP call */  	int				resynch;	/* T/F: resynch with cursor */ -	int				put_value;	/* T/F: need value for listent */  	put_listent_func_t		put_listent;	/* list output fmt function */  	int				index;		/* index into output buffer */  } xfs_attr_list_context_t; diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 2bb959ada45b..55d214981ed2 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -405,21 +405,11 @@ xfs_attr_inactive(  		goto out_destroy_fork;  	xfs_iunlock(dp, lock_mode); -	/* -	 * Start our first transaction of the day. -	 * -	 * All future transactions during this code must be "chained" off -	 * this one via the trans_dup() call.  All transactions will contain -	 * the inode, and the inode will always be marked with trans_ihold(). -	 * Since the inode will be locked in all transactions, we must log -	 * the inode in every transaction to let it float upward through -	 * the log. -	 */  	lock_mode = 0; -	trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); -	error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); + +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);  	if (error) -		goto out_cancel; +		goto out_destroy_fork;  	lock_mode = XFS_ILOCK_EXCL;  	xfs_ilock(dp, lock_mode); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 4fa14820e2e2..d25f26b22ac9 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -106,18 +106,15 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)  					   sfe->flags,  					   sfe->nameval,  					   (int)sfe->namelen, -					   (int)sfe->valuelen, -					   &sfe->nameval[sfe->namelen]); - +					   (int)sfe->valuelen); +			if (error) +				return error;  			/*  			 * Either search callback finished early or  			 * didn't fit it all in the buffer after all.  			 */  			if (context->seen_enough)  				break; - -			if (error) -				return error;  			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);  		}  		trace_xfs_attr_list_sf_all(context); @@ -200,8 +197,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)  					sbp->flags,  					sbp->name,  					sbp->namelen, -					sbp->valuelen, -					&sbp->name[sbp->namelen]); +					sbp->valuelen);  		if (error) {  			kmem_free(sbuf);  			return error; @@ -416,6 +412,9 @@ xfs_attr3_leaf_list_int(  	 */  	retval = 0;  	for (; i < ichdr.count; entry++, i++) { +		char *name; +		int namelen, valuelen; +  		if (be32_to_cpu(entry->hashval) != cursor->hashval) {  			cursor->hashval = be32_to_cpu(entry->hashval);  			cursor->offset = 0; @@ -425,56 +424,25 @@ xfs_attr3_leaf_list_int(  			continue;		/* skip incomplete entries */  		if (entry->flags & XFS_ATTR_LOCAL) { -			xfs_attr_leaf_name_local_t *name_loc = -				xfs_attr3_leaf_name_local(leaf, i); - -			retval = context->put_listent(context, -						entry->flags, -						name_loc->nameval, -						(int)name_loc->namelen, -						be16_to_cpu(name_loc->valuelen), -						&name_loc->nameval[name_loc->namelen]); -			if (retval) -				return retval; +			xfs_attr_leaf_name_local_t *name_loc; + +			name_loc = xfs_attr3_leaf_name_local(leaf, i); +			name = name_loc->nameval; +			namelen = name_loc->namelen; +			valuelen = be16_to_cpu(name_loc->valuelen);  		} else { -			xfs_attr_leaf_name_remote_t *name_rmt = -				xfs_attr3_leaf_name_remote(leaf, i); - -			int valuelen = be32_to_cpu(name_rmt->valuelen); - -			if (context->put_value) { -				xfs_da_args_t args; - -				memset((char *)&args, 0, sizeof(args)); -				args.geo = context->dp->i_mount->m_attr_geo; -				args.dp = context->dp; -				args.whichfork = XFS_ATTR_FORK; -				args.valuelen = valuelen; -				args.rmtvaluelen = valuelen; -				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); -				args.rmtblkno = be32_to_cpu(name_rmt->valueblk); -				args.rmtblkcnt = xfs_attr3_rmt_blocks( -							args.dp->i_mount, valuelen); -				retval = xfs_attr_rmtval_get(&args); -				if (!retval) -					retval = context->put_listent(context, -							entry->flags, -							name_rmt->name, -							(int)name_rmt->namelen, -							valuelen, -							args.value); -				kmem_free(args.value); -			} else { -				retval = context->put_listent(context, -						entry->flags, -						name_rmt->name, -						(int)name_rmt->namelen, -						valuelen, -						NULL); -			} -			if (retval) -				return retval; +			xfs_attr_leaf_name_remote_t *name_rmt; + +			name_rmt = xfs_attr3_leaf_name_remote(leaf, i); +			name = name_rmt->name; +			namelen = name_rmt->namelen; +			valuelen = be32_to_cpu(name_rmt->valuelen);  		} + +		retval = context->put_listent(context, entry->flags, +					      name, namelen, valuelen); +		if (retval) +			break;  		if (context->seen_enough)  			break;  		cursor->offset++; @@ -551,8 +519,7 @@ xfs_attr_put_listent(  	int		flags,  	unsigned char	*name,  	int		namelen, -	int		valuelen, -	unsigned char	*value) +	int		valuelen)  {  	struct attrlist *alist = (struct attrlist *)context->alist;  	attrlist_ent_t *aep; @@ -581,7 +548,7 @@ xfs_attr_put_listent(  		trace_xfs_attr_list_full(context);  		alist->al_more = 1;  		context->seen_enough = 1; -		return 1; +		return 0;  	}  	aep = (attrlist_ent_t *)&context->alist[context->firstu]; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3b6309865c65..586bb64e674b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -72,18 +72,11 @@ xfs_zero_extent(  	struct xfs_mount *mp = ip->i_mount;  	xfs_daddr_t	sector = xfs_fsb_to_db(ip, start_fsb);  	sector_t	block = XFS_BB_TO_FSBT(mp, sector); -	ssize_t		size = XFS_FSB_TO_B(mp, count_fsb); - -	if (IS_DAX(VFS_I(ip))) -		return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), -				sector, size); - -	/* -	 * let the block layer decide on the fastest method of -	 * implementing the zeroing. -	 */ -	return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); +	return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), +		block << (mp->m_super->s_blocksize_bits - 9), +		count_fsb << (mp->m_super->s_blocksize_bits - 9), +		GFP_NOFS, true);  }  /* @@ -900,19 +893,15 @@ xfs_free_eofblocks(  		 * Free them up now by truncating the file to  		 * its current size.  		 */ -		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -  		if (need_iolock) { -			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { -				xfs_trans_cancel(tp); +			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))  				return -EAGAIN; -			}  		} -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, +				&tp);  		if (error) {  			ASSERT(XFS_FORCED_SHUTDOWN(mp)); -			xfs_trans_cancel(tp);  			if (need_iolock)  				xfs_iunlock(ip, XFS_IOLOCK_EXCL);  			return error; @@ -1037,9 +1026,9 @@ xfs_alloc_file_space(  		/*  		 * Allocate and setup the transaction.  		 */ -		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -					  resblks, resrtextents); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, +				resrtextents, 0, &tp); +  		/*  		 * Check for running out of space  		 */ @@ -1048,7 +1037,6 @@ xfs_alloc_file_space(  			 * Free the transaction structure.  			 */  			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); -			xfs_trans_cancel(tp);  			break;  		}  		xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1311,18 +1299,10 @@ xfs_free_file_space(  		 * transaction to dip into the reserve blocks to ensure  		 * the freeing of the space succeeds at ENOSPC.  		 */ -		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); - -		/* -		 * check for running out of space -		 */ +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, +				&tp);  		if (error) { -			/* -			 * Free the transaction structure. -			 */  			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); -			xfs_trans_cancel(tp);  			break;  		}  		xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1482,19 +1462,16 @@ xfs_shift_file_space(  	}  	while (!error && !done) { -		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);  		/*  		 * We would need to reserve permanent block for transaction.  		 * This will come into picture when after shifting extent into  		 * hole we found that adjacent extents can be merged which  		 * may lead to freeing of a block during record update.  		 */ -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); -		if (error) { -			xfs_trans_cancel(tp); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, +				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); +		if (error)  			break; -		}  		xfs_ilock(ip, XFS_ILOCK_EXCL);  		error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, @@ -1747,12 +1724,9 @@ xfs_swap_extents(  	if (error)  		goto out_unlock; -	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); +	if (error)  		goto out_unlock; -	}  	/*  	 * Lock and join the inodes to the tansaction so that transaction commit diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9a2191b91137..e71cfbd5acb3 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1100,22 +1100,18 @@ xfs_bwrite(  	return error;  } -STATIC void +static void  xfs_buf_bio_end_io(  	struct bio		*bio)  { -	xfs_buf_t		*bp = (xfs_buf_t *)bio->bi_private; +	struct xfs_buf		*bp = (struct xfs_buf *)bio->bi_private;  	/*  	 * don't overwrite existing errors - otherwise we can lose errors on  	 * buffers that require multiple bios to complete.  	 */ -	if (bio->bi_error) { -		spin_lock(&bp->b_lock); -		if (!bp->b_io_error) -			bp->b_io_error = bio->bi_error; -		spin_unlock(&bp->b_lock); -	} +	if (bio->bi_error) +		cmpxchg(&bp->b_io_error, 0, bio->bi_error);  	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))  		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 4eb89bd4ee73..8bfb974f0772 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -183,6 +183,26 @@ typedef struct xfs_buf {  	unsigned int		b_page_count;	/* size of page array */  	unsigned int		b_offset;	/* page offset in first page */  	int			b_error;	/* error code on I/O */ + +	/* +	 * async write failure retry count. Initialised to zero on the first +	 * failure, then when it exceeds the maximum configured without a +	 * success the write is considered to be failed permanently and the +	 * iodone handler will take appropriate action. +	 * +	 * For retry timeouts, we record the jiffie of the first failure. This +	 * means that we can change the retry timeout for buffers already under +	 * I/O and thus avoid getting stuck in a retry loop with a long timeout. +	 * +	 * last_error is used to ensure that we are getting repeated errors, not +	 * different errors. e.g. a block device might change ENOSPC to EIO when +	 * a failure timeout occurs, so we want to re-initialise the error +	 * retry behaviour appropriately when that happens. +	 */ +	int			b_retries; +	unsigned long		b_first_retry_time; /* in jiffies */ +	int			b_last_error; +  	const struct xfs_buf_ops	*b_ops;  #ifdef XFS_BUF_LOCK_TRACKING diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 99e91a0e554e..34257992934c 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(  	}  } -/* - * This is the iodone() function for buffers which have had callbacks - * attached to them by xfs_buf_attach_iodone().  It should remove each - * log item from the buffer's list and call the callback of each in turn. - * When done, the buffer's fsprivate field is set to NULL and the buffer - * is unlocked with a call to iodone(). - */ -void -xfs_buf_iodone_callbacks( +static bool +xfs_buf_iodone_callback_error(  	struct xfs_buf		*bp)  {  	struct xfs_log_item	*lip = bp->b_fspriv;  	struct xfs_mount	*mp = lip->li_mountp;  	static ulong		lasttime;  	static xfs_buftarg_t	*lasttarg; - -	if (likely(!bp->b_error)) -		goto do_callbacks; +	struct xfs_error_cfg	*cfg;  	/*  	 * If we've already decided to shutdown the filesystem because of  	 * I/O errors, there's no point in giving this a retry.  	 */ -	if (XFS_FORCED_SHUTDOWN(mp)) { -		xfs_buf_stale(bp); -		bp->b_flags |= XBF_DONE; -		trace_xfs_buf_item_iodone(bp, _RET_IP_); -		goto do_callbacks; -	} +	if (XFS_FORCED_SHUTDOWN(mp)) +		goto out_stale;  	if (bp->b_target != lasttarg ||  	    time_after(jiffies, (lasttime + 5*HZ))) { @@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(  	}  	lasttarg = bp->b_target; +	/* synchronous writes will have callers process the error */ +	if (!(bp->b_flags & XBF_ASYNC)) +		goto out_stale; + +	trace_xfs_buf_item_iodone_async(bp, _RET_IP_); +	ASSERT(bp->b_iodone != NULL); +  	/*  	 * If the write was asynchronous then no one will be looking for the -	 * error.  Clear the error state and write the buffer out again. -	 * -	 * XXX: This helps against transient write errors, but we need to find -	 * a way to shut the filesystem down if the writes keep failing. -	 * -	 * In practice we'll shut the filesystem down soon as non-transient -	 * errors tend to affect the whole device and a failing log write -	 * will make us give up.  But we really ought to do better here. +	 * error.  If this is the first failure of this type, clear the error +	 * state and write the buffer out again. This means we always retry an +	 * async write failure at least once, but we also need to set the buffer +	 * up to behave correctly now for repeated failures.  	 */ -	if (bp->b_flags & XBF_ASYNC) { -		ASSERT(bp->b_iodone != NULL); +	if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) || +	     bp->b_last_error != bp->b_error) { +		bp->b_flags |= (XBF_WRITE | XBF_ASYNC | +			        XBF_DONE | XBF_WRITE_FAIL); +		bp->b_last_error = bp->b_error; +		bp->b_retries = 0; +		bp->b_first_retry_time = jiffies; + +		xfs_buf_ioerror(bp, 0); +		xfs_buf_submit(bp); +		return true; +	} -		trace_xfs_buf_item_iodone_async(bp, _RET_IP_); +	/* +	 * Repeated failure on an async write. Take action according to the +	 * error configuration we have been set up to use. +	 */ +	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); -		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ +	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && +	    ++bp->b_retries > cfg->max_retries) +			goto permanent_error; +	if (cfg->retry_timeout && +	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) +			goto permanent_error; -		if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { -			bp->b_flags |= XBF_WRITE | XBF_ASYNC | -				       XBF_DONE | XBF_WRITE_FAIL; -			xfs_buf_submit(bp); -		} else { -			xfs_buf_relse(bp); -		} +	/* At unmount we may treat errors differently */ +	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) +		goto permanent_error; -		return; -	} +	/* still a transient error, higher layers will retry */ +	xfs_buf_ioerror(bp, 0); +	xfs_buf_relse(bp); +	return true;  	/* -	 * If the write of the buffer was synchronous, we want to make -	 * sure to return the error to the caller of xfs_bwrite(). +	 * Permanent error - we need to trigger a shutdown if we haven't already +	 * to indicate that inconsistency will result from this action.  	 */ +permanent_error: +	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); +out_stale:  	xfs_buf_stale(bp);  	bp->b_flags |= XBF_DONE; -  	trace_xfs_buf_error_relse(bp, _RET_IP_); +	return false; +} + +/* + * This is the iodone() function for buffers which have had callbacks attached + * to them by xfs_buf_attach_iodone(). We need to iterate the items on the + * callback list, mark the buffer as having no more callbacks and then push the + * buffer through IO completion processing. + */ +void +xfs_buf_iodone_callbacks( +	struct xfs_buf		*bp) +{ +	/* +	 * If there is an error, process it. Some errors require us +	 * to run callbacks after failure processing is done so we +	 * detect that and take appropriate action. +	 */ +	if (bp->b_error && xfs_buf_iodone_callback_error(bp)) +		return; + +	/* +	 * Successful IO or permanent error. Either way, we can clear the +	 * retry state here in preparation for the next error that may occur. +	 */ +	bp->b_last_error = 0; +	bp->b_retries = 0; -do_callbacks:  	xfs_buf_do_callbacks(bp);  	bp->b_fspriv = NULL;  	bp->b_iodone = NULL; diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 93b3ab0c5435..f44f79996978 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -273,10 +273,11 @@ xfs_dir2_leaf_readbuf(  	size_t			bufsize,  	struct xfs_dir2_leaf_map_info *mip,  	xfs_dir2_off_t		*curoff, -	struct xfs_buf		**bpp) +	struct xfs_buf		**bpp, +	bool			trim_map)  {  	struct xfs_inode	*dp = args->dp; -	struct xfs_buf		*bp = *bpp; +	struct xfs_buf		*bp = NULL;  	struct xfs_bmbt_irec	*map = mip->map;  	struct blk_plug		plug;  	int			error = 0; @@ -286,13 +287,10 @@ xfs_dir2_leaf_readbuf(  	struct xfs_da_geometry	*geo = args->geo;  	/* -	 * If we have a buffer, we need to release it and -	 * take it out of the mapping. +	 * If the caller just finished processing a buffer, it will tell us +	 * we need to trim that block out of the mapping now it is done.  	 */ - -	if (bp) { -		xfs_trans_brelse(NULL, bp); -		bp = NULL; +	if (trim_map) {  		mip->map_blocks -= geo->fsbcount;  		/*  		 * Loop to get rid of the extents for the @@ -533,10 +531,17 @@ xfs_dir2_leaf_getdents(  		 */  		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {  			int	lock_mode; +			bool	trim_map = false; + +			if (bp) { +				xfs_trans_brelse(NULL, bp); +				bp = NULL; +				trim_map = true; +			}  			lock_mode = xfs_ilock_data_map_shared(dp);  			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, -						      &curoff, &bp); +						      &curoff, &bp, trim_map);  			xfs_iunlock(dp, lock_mode);  			if (error || !map_info->map_valid)  				break; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 316b2a1bdba5..e0646659ce16 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -614,11 +614,10 @@ xfs_qm_dqread(  	trace_xfs_dqread(dqp);  	if (flags & XFS_QMOPT_DQALLOC) { -		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, -					  XFS_QM_DQALLOC_SPACE_RES(mp), 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, +				XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);  		if (error) -			goto error1; +			goto error0;  	}  	/* @@ -692,7 +691,7 @@ error0:   * end of the chunk, skip ahead to first id in next allocated chunk   * using the SEEK_DATA interface.   */ -int +static int  xfs_dq_get_next_id(  	xfs_mount_t		*mp,  	uint			type, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 569938a4a357..47fc63295422 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -145,12 +145,10 @@ xfs_update_prealloc_flags(  	struct xfs_trans	*tp;  	int			error; -	tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); -	error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, +			0, 0, 0, &tp); +	if (error)  		return error; -	}  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -718,18 +716,19 @@ xfs_file_dio_aio_write(  	int			unaligned_io = 0;  	int			iolock;  	size_t			count = iov_iter_count(from); -	loff_t			pos = iocb->ki_pos;  	loff_t			end;  	struct iov_iter		data;  	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?  					mp->m_rtdev_targp : mp->m_ddev_targp;  	/* DIO must be aligned to device logical sector size */ -	if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask)) +	if (!IS_DAX(inode) && +	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))  		return -EINVAL;  	/* "unaligned" here means not aligned to a filesystem block */ -	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) +	if ((iocb->ki_pos & mp->m_blockmask) || +	    ((iocb->ki_pos + count) & mp->m_blockmask))  		unaligned_io = 1;  	/* @@ -760,8 +759,7 @@ xfs_file_dio_aio_write(  	if (ret)  		goto out;  	count = iov_iter_count(from); -	pos = iocb->ki_pos; -	end = pos + count - 1; +	end = iocb->ki_pos + count - 1;  	/*  	 * See xfs_file_read_iter() for why we do a full-file flush here. @@ -794,19 +792,18 @@ xfs_file_dio_aio_write(  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);  	data = *from; -	ret = mapping->a_ops->direct_IO(iocb, &data, pos); +	ret = mapping->a_ops->direct_IO(iocb, &data);  	/* see generic_file_direct_write() for why this is necessary */  	if (mapping->nrpages) {  		invalidate_inode_pages2_range(mapping, -					      pos >> PAGE_SHIFT, +					      iocb->ki_pos >> PAGE_SHIFT,  					      end >> PAGE_SHIFT);  	}  	if (ret > 0) { -		pos += ret; +		iocb->ki_pos += ret;  		iov_iter_advance(from, ret); -		iocb->ki_pos = pos;  	}  out:  	xfs_rw_iunlock(ip, iolock); @@ -904,14 +901,10 @@ xfs_file_write_iter(  		ret = xfs_file_buffered_aio_write(iocb, from);  	if (ret > 0) { -		ssize_t err; -  		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);  		/* Handle various SYNC-type writes */ -		err = generic_write_sync(file, iocb->ki_pos - ret, ret); -		if (err < 0) -			ret = err; +		ret = generic_write_sync(iocb, ret);  	}  	return ret;  } @@ -1558,7 +1551,7 @@ xfs_filemap_page_mkwrite(  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);  	if (IS_DAX(inode)) { -		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); +		ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);  	} else {  		ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);  		ret = block_page_mkwrite_return(ret); @@ -1592,7 +1585,7 @@ xfs_filemap_fault(  		 * changes to xfs_get_blocks_direct() to map unwritten extent  		 * ioend for conversion on read-only mappings.  		 */ -		ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); +		ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault);  	} else  		ret = filemap_fault(vma, vmf);  	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); @@ -1629,8 +1622,7 @@ xfs_filemap_pmd_fault(  	}  	xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); -	ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, -			      NULL); +	ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);  	xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);  	if (flags & FAULT_FLAG_WRITE) @@ -1714,7 +1706,7 @@ const struct file_operations xfs_file_operations = {  const struct file_operations xfs_dir_file_operations = {  	.open		= xfs_dir_open,  	.read		= generic_read_dir, -	.iterate	= xfs_file_readdir, +	.iterate_shared	= xfs_file_readdir,  	.llseek		= generic_file_llseek,  	.unlocked_ioctl	= xfs_file_ioctl,  #ifdef CONFIG_COMPAT diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index ee3aaa0a5317..b4d75825ae37 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -198,14 +198,10 @@ xfs_growfs_data_private(  			return error;  	} -	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); -	tp->t_flags |= XFS_TRANS_RESERVE; -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, -				  XFS_GROWFS_SPACE_RES(mp), 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, +			XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); +	if (error)  		return error; -	}  	/*  	 * Write new AG headers to disk. Non-transactional, but written @@ -243,8 +239,8 @@ xfs_growfs_data_private(  		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));  		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);  		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); -		agf->agf_flfirst = 0; -		agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1); +		agf->agf_flfirst = cpu_to_be32(1); +		agf->agf_fllast = 0;  		agf->agf_flcount = 0;  		tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);  		agf->agf_freeblks = cpu_to_be32(tmpsize); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index bf2d60749278..99ee6eee5e0b 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -37,9 +37,6 @@  #include <linux/kthread.h>  #include <linux/freezer.h> -STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, -				struct xfs_perag *pag, struct xfs_inode *ip); -  /*   * Allocate and initialise an xfs_inode.   */ @@ -94,13 +91,6 @@ xfs_inode_free_callback(  	struct inode		*inode = container_of(head, struct inode, i_rcu);  	struct xfs_inode	*ip = XFS_I(inode); -	kmem_zone_free(xfs_inode_zone, ip); -} - -void -xfs_inode_free( -	struct xfs_inode	*ip) -{  	switch (VFS_I(ip)->i_mode & S_IFMT) {  	case S_IFREG:  	case S_IFDIR: @@ -118,6 +108,25 @@ xfs_inode_free(  		ip->i_itemp = NULL;  	} +	kmem_zone_free(xfs_inode_zone, ip); +} + +static void +__xfs_inode_free( +	struct xfs_inode	*ip) +{ +	/* asserts to verify all state is correct here */ +	ASSERT(atomic_read(&ip->i_pincount) == 0); +	ASSERT(!xfs_isiflocked(ip)); +	XFS_STATS_DEC(ip->i_mount, vn_active); + +	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); +} + +void +xfs_inode_free( +	struct xfs_inode	*ip) +{  	/*  	 * Because we use RCU freeing we need to ensure the inode always  	 * appears to be reclaimed with an invalid inode number when in the @@ -129,12 +138,123 @@ xfs_inode_free(  	ip->i_ino = 0;  	spin_unlock(&ip->i_flags_lock); -	/* asserts to verify all state is correct here */ -	ASSERT(atomic_read(&ip->i_pincount) == 0); -	ASSERT(!xfs_isiflocked(ip)); -	XFS_STATS_DEC(ip->i_mount, vn_active); +	__xfs_inode_free(ip); +} -	call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs periodic sync default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_reclaim_work_queue( +	struct xfs_mount        *mp) +{ + +	rcu_read_lock(); +	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { +		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, +			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); +	} +	rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +void +xfs_reclaim_worker( +	struct work_struct *work) +{ +	struct xfs_mount *mp = container_of(to_delayed_work(work), +					struct xfs_mount, m_reclaim_work); + +	xfs_reclaim_inodes(mp, SYNC_TRYLOCK); +	xfs_reclaim_work_queue(mp); +} + +static void +xfs_perag_set_reclaim_tag( +	struct xfs_perag	*pag) +{ +	struct xfs_mount	*mp = pag->pag_mount; + +	ASSERT(spin_is_locked(&pag->pag_ici_lock)); +	if (pag->pag_ici_reclaimable++) +		return; + +	/* propagate the reclaim tag up into the perag radix tree */ +	spin_lock(&mp->m_perag_lock); +	radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, +			   XFS_ICI_RECLAIM_TAG); +	spin_unlock(&mp->m_perag_lock); + +	/* schedule periodic background inode reclaim */ +	xfs_reclaim_work_queue(mp); + +	trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); +} + +static void +xfs_perag_clear_reclaim_tag( +	struct xfs_perag	*pag) +{ +	struct xfs_mount	*mp = pag->pag_mount; + +	ASSERT(spin_is_locked(&pag->pag_ici_lock)); +	if (--pag->pag_ici_reclaimable) +		return; + +	/* clear the reclaim tag from the perag radix tree */ +	spin_lock(&mp->m_perag_lock); +	radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, +			     XFS_ICI_RECLAIM_TAG); +	spin_unlock(&mp->m_perag_lock); +	trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); +} + + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( +	struct xfs_inode	*ip) +{ +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_perag	*pag; + +	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); +	spin_lock(&pag->pag_ici_lock); +	spin_lock(&ip->i_flags_lock); + +	radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), +			   XFS_ICI_RECLAIM_TAG); +	xfs_perag_set_reclaim_tag(pag); +	__xfs_iflags_set(ip, XFS_IRECLAIMABLE); + +	spin_unlock(&ip->i_flags_lock); +	spin_unlock(&pag->pag_ici_lock); +	xfs_perag_put(pag); +} + +STATIC void +xfs_inode_clear_reclaim_tag( +	struct xfs_perag	*pag, +	xfs_ino_t		ino) +{ +	radix_tree_tag_clear(&pag->pag_ici_root, +			     XFS_INO_TO_AGINO(pag->pag_mount, ino), +			     XFS_ICI_RECLAIM_TAG); +	xfs_perag_clear_reclaim_tag(pag);  }  /* @@ -264,7 +384,7 @@ xfs_iget_cache_hit(  		 */  		ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;  		ip->i_flags |= XFS_INEW; -		__xfs_inode_clear_reclaim_tag(mp, pag, ip); +		xfs_inode_clear_reclaim_tag(pag, ip->i_ino);  		inode->i_state = I_NEW;  		ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); @@ -723,121 +843,6 @@ xfs_inode_ag_iterator_tag(  }  /* - * Queue a new inode reclaim pass if there are reclaimable inodes and there - * isn't a reclaim pass already in progress. By default it runs every 5s based - * on the xfs periodic sync default of 30s. Perhaps this should have it's own - * tunable, but that can be done if this method proves to be ineffective or too - * aggressive. - */ -static void -xfs_reclaim_work_queue( -	struct xfs_mount        *mp) -{ - -	rcu_read_lock(); -	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { -		queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, -			msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); -	} -	rcu_read_unlock(); -} - -/* - * This is a fast pass over the inode cache to try to get reclaim moving on as - * many inodes as possible in a short period of time. It kicks itself every few - * seconds, as well as being kicked by the inode cache shrinker when memory - * goes low. It scans as quickly as possible avoiding locked inodes or those - * already being flushed, and once done schedules a future pass. - */ -void -xfs_reclaim_worker( -	struct work_struct *work) -{ -	struct xfs_mount *mp = container_of(to_delayed_work(work), -					struct xfs_mount, m_reclaim_work); - -	xfs_reclaim_inodes(mp, SYNC_TRYLOCK); -	xfs_reclaim_work_queue(mp); -} - -static void -__xfs_inode_set_reclaim_tag( -	struct xfs_perag	*pag, -	struct xfs_inode	*ip) -{ -	radix_tree_tag_set(&pag->pag_ici_root, -			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), -			   XFS_ICI_RECLAIM_TAG); - -	if (!pag->pag_ici_reclaimable) { -		/* propagate the reclaim tag up into the perag radix tree */ -		spin_lock(&ip->i_mount->m_perag_lock); -		radix_tree_tag_set(&ip->i_mount->m_perag_tree, -				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), -				XFS_ICI_RECLAIM_TAG); -		spin_unlock(&ip->i_mount->m_perag_lock); - -		/* schedule periodic background inode reclaim */ -		xfs_reclaim_work_queue(ip->i_mount); - -		trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -							-1, _RET_IP_); -	} -	pag->pag_ici_reclaimable++; -} - -/* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. - */ -void -xfs_inode_set_reclaim_tag( -	xfs_inode_t	*ip) -{ -	struct xfs_mount *mp = ip->i_mount; -	struct xfs_perag *pag; - -	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); -	spin_lock(&pag->pag_ici_lock); -	spin_lock(&ip->i_flags_lock); -	__xfs_inode_set_reclaim_tag(pag, ip); -	__xfs_iflags_set(ip, XFS_IRECLAIMABLE); -	spin_unlock(&ip->i_flags_lock); -	spin_unlock(&pag->pag_ici_lock); -	xfs_perag_put(pag); -} - -STATIC void -__xfs_inode_clear_reclaim( -	xfs_perag_t	*pag, -	xfs_inode_t	*ip) -{ -	pag->pag_ici_reclaimable--; -	if (!pag->pag_ici_reclaimable) { -		/* clear the reclaim tag from the perag radix tree */ -		spin_lock(&ip->i_mount->m_perag_lock); -		radix_tree_tag_clear(&ip->i_mount->m_perag_tree, -				XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), -				XFS_ICI_RECLAIM_TAG); -		spin_unlock(&ip->i_mount->m_perag_lock); -		trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, -							-1, _RET_IP_); -	} -} - -STATIC void -__xfs_inode_clear_reclaim_tag( -	xfs_mount_t	*mp, -	xfs_perag_t	*pag, -	xfs_inode_t	*ip) -{ -	radix_tree_tag_clear(&pag->pag_ici_root, -			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); -	__xfs_inode_clear_reclaim(pag, ip); -} - -/*   * Grab the inode for reclaim exclusively.   * Return 0 if we grabbed it, non-zero otherwise.   */ @@ -929,6 +934,7 @@ xfs_reclaim_inode(  	int			sync_mode)  {  	struct xfs_buf		*bp = NULL; +	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */  	int			error;  restart: @@ -993,6 +999,22 @@ restart:  	xfs_iflock(ip);  reclaim: +	/* +	 * Because we use RCU freeing we need to ensure the inode always appears +	 * to be reclaimed with an invalid inode number when in the free state. +	 * We do this as early as possible under the ILOCK and flush lock so +	 * that xfs_iflush_cluster() can be guaranteed to detect races with us +	 * here. By doing this, we guarantee that once xfs_iflush_cluster has +	 * locked both the XFS_ILOCK and the flush lock that it will see either +	 * a valid, flushable inode that will serialise correctly against the +	 * locks below, or it will see a clean (and invalid) inode that it can +	 * skip. +	 */ +	spin_lock(&ip->i_flags_lock); +	ip->i_flags = XFS_IRECLAIM; +	ip->i_ino = 0; +	spin_unlock(&ip->i_flags_lock); +  	xfs_ifunlock(ip);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1006,9 +1028,9 @@ reclaim:  	 */  	spin_lock(&pag->pag_ici_lock);  	if (!radix_tree_delete(&pag->pag_ici_root, -				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) +				XFS_INO_TO_AGINO(ip->i_mount, ino)))  		ASSERT(0); -	__xfs_inode_clear_reclaim(pag, ip); +	xfs_perag_clear_reclaim_tag(pag);  	spin_unlock(&pag->pag_ici_lock);  	/* @@ -1023,7 +1045,7 @@ reclaim:  	xfs_qm_dqdetach(ip);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); -	xfs_inode_free(ip); +	__xfs_inode_free(ip);  	return error;  out_ifunlock: diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 96f606deee31..ee6799e0476f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1030,7 +1030,7 @@ xfs_dir_ialloc(  			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);  		} -		code = xfs_trans_roll(&tp, 0); +		code = xfs_trans_roll(&tp, NULL);  		if (committed != NULL)  			*committed = 1; @@ -1161,11 +1161,9 @@ xfs_create(  		rdev = 0;  		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);  		tres = &M_RES(mp)->tr_mkdir; -		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);  	} else {  		resblks = XFS_CREATE_SPACE_RES(mp, name->len);  		tres = &M_RES(mp)->tr_create; -		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);  	}  	/* @@ -1174,20 +1172,19 @@ xfs_create(  	 * the case we'll drop the one we have and get a more  	 * appropriate transaction later.  	 */ -	error = xfs_trans_reserve(tp, tres, resblks, 0); +	error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);  	if (error == -ENOSPC) {  		/* flush outstanding delalloc blocks and retry */  		xfs_flush_inodes(mp); -		error = xfs_trans_reserve(tp, tres, resblks, 0); +		error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);  	}  	if (error == -ENOSPC) {  		/* No space at all so try a "no-allocation" reservation */  		resblks = 0; -		error = xfs_trans_reserve(tp, tres, 0, 0); +		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);  	}  	if (error) -		goto out_trans_cancel; - +		goto out_release_inode;  	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |  		      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); @@ -1337,17 +1334,16 @@ xfs_create_tmpfile(  		return error;  	resblks = XFS_IALLOC_SPACE_RES(mp); -	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE); -  	tres = &M_RES(mp)->tr_create_tmpfile; -	error = xfs_trans_reserve(tp, tres, resblks, 0); + +	error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);  	if (error == -ENOSPC) {  		/* No space at all so try a "no-allocation" reservation */  		resblks = 0; -		error = xfs_trans_reserve(tp, tres, 0, 0); +		error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);  	}  	if (error) -		goto out_trans_cancel; +		goto out_release_inode;  	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,  						pdqp, resblks, 1, 0); @@ -1432,15 +1428,14 @@ xfs_link(  	if (error)  		goto std_return; -	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);  	resblks = XFS_LINK_SPACE_RES(mp, target_name->len); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);  	if (error == -ENOSPC) {  		resblks = 0; -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);  	}  	if (error) -		goto error_return; +		goto std_return;  	xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);  	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); @@ -1710,11 +1705,9 @@ xfs_inactive_truncate(  	struct xfs_trans	*tp;  	int			error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);  	if (error) {  		ASSERT(XFS_FORCED_SHUTDOWN(mp)); -		xfs_trans_cancel(tp);  		return error;  	} @@ -1764,8 +1757,6 @@ xfs_inactive_ifree(  	struct xfs_trans	*tp;  	int			error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -  	/*  	 * The ifree transaction might need to allocate blocks for record  	 * insertion to the finobt. We don't want to fail here at ENOSPC, so @@ -1781,9 +1772,8 @@ xfs_inactive_ifree(  	 * now remains allocated and sits on the unlinked list until the fs is  	 * repaired.  	 */ -	tp->t_flags |= XFS_TRANS_RESERVE; -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, -				  XFS_IFREE_SPACE_RES(mp), 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, +			XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);  	if (error) {  		if (error == -ENOSPC) {  			xfs_warn_ratelimited(mp, @@ -1792,7 +1782,6 @@ xfs_inactive_ifree(  		} else {  			ASSERT(XFS_FORCED_SHUTDOWN(mp));  		} -		xfs_trans_cancel(tp);  		return error;  	} @@ -2525,11 +2514,6 @@ xfs_remove(  	if (error)  		goto std_return; -	if (is_dir) -		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); -	else -		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); -  	/*  	 * We try to get the real space reservation first,  	 * allowing for directory btree deletion(s) implying @@ -2540,14 +2524,15 @@ xfs_remove(  	 * block from the directory.  	 */  	resblks = XFS_REMOVE_SPACE_RES(mp); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);  	if (error == -ENOSPC) {  		resblks = 0; -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, +				&tp);  	}  	if (error) {  		ASSERT(error != -ENOSPC); -		goto out_trans_cancel; +		goto std_return;  	}  	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); @@ -2855,6 +2840,7 @@ xfs_rename_alloc_whiteout(  	 * and flag it as linkable.  	 */  	drop_nlink(VFS_I(tmpfile)); +	xfs_setup_iops(tmpfile);  	xfs_finish_inode_setup(tmpfile);  	VFS_I(tmpfile)->i_state |= I_LINKABLE; @@ -2910,15 +2896,15 @@ xfs_rename(  	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,  				inodes, &num_inodes); -	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);  	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);  	if (error == -ENOSPC) {  		spaceres = 0; -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0, +				&tp);  	}  	if (error) -		goto out_trans_cancel; +		goto out_release_wip;  	/*  	 * Attach the dquots to the inodes @@ -3155,6 +3141,7 @@ out_bmap_cancel:  	xfs_bmap_cancel(&free_list);  out_trans_cancel:  	xfs_trans_cancel(tp); +out_release_wip:  	if (wip)  		IRELE(wip);  	return error; @@ -3162,16 +3149,16 @@ out_trans_cancel:  STATIC int  xfs_iflush_cluster( -	xfs_inode_t	*ip, -	xfs_buf_t	*bp) +	struct xfs_inode	*ip, +	struct xfs_buf		*bp)  { -	xfs_mount_t		*mp = ip->i_mount; +	struct xfs_mount	*mp = ip->i_mount;  	struct xfs_perag	*pag;  	unsigned long		first_index, mask;  	unsigned long		inodes_per_cluster; -	int			ilist_size; -	xfs_inode_t		**ilist; -	xfs_inode_t		*iq; +	int			cilist_size; +	struct xfs_inode	**cilist; +	struct xfs_inode	*cip;  	int			nr_found;  	int			clcount = 0;  	int			bufwasdelwri; @@ -3180,23 +3167,23 @@ xfs_iflush_cluster(  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));  	inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; -	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); -	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); -	if (!ilist) +	cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); +	cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); +	if (!cilist)  		goto out_put;  	mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);  	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;  	rcu_read_lock();  	/* really need a gang lookup range call here */ -	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, +	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,  					first_index, inodes_per_cluster);  	if (nr_found == 0)  		goto out_free;  	for (i = 0; i < nr_found; i++) { -		iq = ilist[i]; -		if (iq == ip) +		cip = cilist[i]; +		if (cip == ip)  			continue;  		/* @@ -3205,20 +3192,30 @@ xfs_iflush_cluster(  		 * We need to check under the i_flags_lock for a valid inode  		 * here. Skip it if it is not valid or the wrong inode.  		 */ -		spin_lock(&ip->i_flags_lock); -		if (!ip->i_ino || -		    (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { -			spin_unlock(&ip->i_flags_lock); +		spin_lock(&cip->i_flags_lock); +		if (!cip->i_ino || +		    __xfs_iflags_test(cip, XFS_ISTALE)) { +			spin_unlock(&cip->i_flags_lock);  			continue;  		} -		spin_unlock(&ip->i_flags_lock); + +		/* +		 * Once we fall off the end of the cluster, no point checking +		 * any more inodes in the list because they will also all be +		 * outside the cluster. +		 */ +		if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { +			spin_unlock(&cip->i_flags_lock); +			break; +		} +		spin_unlock(&cip->i_flags_lock);  		/*  		 * Do an un-protected check to see if the inode is dirty and  		 * is a candidate for flushing.  These checks will be repeated  		 * later after the appropriate locks are acquired.  		 */ -		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) +		if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)  			continue;  		/* @@ -3226,15 +3223,28 @@ xfs_iflush_cluster(  		 * then this inode cannot be flushed and is skipped.  		 */ -		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) +		if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) +			continue; +		if (!xfs_iflock_nowait(cip)) { +			xfs_iunlock(cip, XFS_ILOCK_SHARED);  			continue; -		if (!xfs_iflock_nowait(iq)) { -			xfs_iunlock(iq, XFS_ILOCK_SHARED); +		} +		if (xfs_ipincount(cip)) { +			xfs_ifunlock(cip); +			xfs_iunlock(cip, XFS_ILOCK_SHARED);  			continue;  		} -		if (xfs_ipincount(iq)) { -			xfs_ifunlock(iq); -			xfs_iunlock(iq, XFS_ILOCK_SHARED); + + +		/* +		 * Check the inode number again, just to be certain we are not +		 * racing with freeing in xfs_reclaim_inode(). See the comments +		 * in that function for more information as to why the initial +		 * check is not sufficient. +		 */ +		if (!cip->i_ino) { +			xfs_ifunlock(cip); +			xfs_iunlock(cip, XFS_ILOCK_SHARED);  			continue;  		} @@ -3242,18 +3252,18 @@ xfs_iflush_cluster(  		 * arriving here means that this inode can be flushed.  First  		 * re-check that it's dirty before flushing.  		 */ -		if (!xfs_inode_clean(iq)) { +		if (!xfs_inode_clean(cip)) {  			int	error; -			error = xfs_iflush_int(iq, bp); +			error = xfs_iflush_int(cip, bp);  			if (error) { -				xfs_iunlock(iq, XFS_ILOCK_SHARED); +				xfs_iunlock(cip, XFS_ILOCK_SHARED);  				goto cluster_corrupt_out;  			}  			clcount++;  		} else { -			xfs_ifunlock(iq); +			xfs_ifunlock(cip);  		} -		xfs_iunlock(iq, XFS_ILOCK_SHARED); +		xfs_iunlock(cip, XFS_ILOCK_SHARED);  	}  	if (clcount) { @@ -3263,7 +3273,7 @@ xfs_iflush_cluster(  out_free:  	rcu_read_unlock(); -	kmem_free(ilist); +	kmem_free(cilist);  out_put:  	xfs_perag_put(pag);  	return 0; @@ -3306,8 +3316,8 @@ cluster_corrupt_out:  	/*  	 * Unlocks the flush lock  	 */ -	xfs_iflush_abort(iq, false); -	kmem_free(ilist); +	xfs_iflush_abort(cip, false); +	kmem_free(cilist);  	xfs_perag_put(pag);  	return -EFSCORRUPTED;  } @@ -3327,7 +3337,7 @@ xfs_iflush(  	struct xfs_buf		**bpp)  {  	struct xfs_mount	*mp = ip->i_mount; -	struct xfs_buf		*bp; +	struct xfs_buf		*bp = NULL;  	struct xfs_dinode	*dip;  	int			error; @@ -3369,14 +3379,22 @@ xfs_iflush(  	}  	/* -	 * Get the buffer containing the on-disk inode. +	 * Get the buffer containing the on-disk inode. We are doing a try-lock +	 * operation here, so we may get  an EAGAIN error. In that case, we +	 * simply want to return with the inode still dirty. +	 * +	 * If we get any other error, we effectively have a corruption situation +	 * and we cannot flush the inode, so we treat it the same as failing +	 * xfs_iflush_int().  	 */  	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,  			       0); -	if (error || !bp) { +	if (error == -EAGAIN) {  		xfs_ifunlock(ip);  		return error;  	} +	if (error) +		goto corrupt_out;  	/*  	 * First flush out the inode that xfs_iflush was called with. @@ -3404,7 +3422,8 @@ xfs_iflush(  	return 0;  corrupt_out: -	xfs_buf_relse(bp); +	if (bp) +		xfs_buf_relse(bp);  	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);  cluster_corrupt_out:  	error = -EFSCORRUPTED; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 43e1d51b15eb..e52d7c7aeb5b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -440,6 +440,9 @@ loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,  /* from xfs_iops.c */ +extern void xfs_setup_inode(struct xfs_inode *ip); +extern void xfs_setup_iops(struct xfs_inode *ip); +  /*   * When setting up a newly allocated inode, we need to call   * xfs_finish_inode_setup() once the inode is fully instantiated at @@ -447,7 +450,6 @@ loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,   * before we've completed instantiation. Otherwise we can do it   * the moment the inode lookup is complete.   */ -extern void xfs_setup_inode(struct xfs_inode *ip);  static inline void xfs_finish_inode_setup(struct xfs_inode *ip)  {  	xfs_iflags_clear(ip, XFS_INEW); @@ -458,6 +460,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)  static inline void xfs_setup_existing_inode(struct xfs_inode *ip)  {  	xfs_setup_inode(ip); +	xfs_setup_iops(ip);  	xfs_finish_inode_setup(ip);  } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c48b5b18d771..a1b07612224c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -210,7 +210,7 @@ xfs_inode_item_format_data_fork(  			 */  			data_bytes = roundup(ip->i_df.if_bytes, 4);  			ASSERT(ip->i_df.if_real_bytes == 0 || -			       ip->i_df.if_real_bytes == data_bytes); +			       ip->i_df.if_real_bytes >= data_bytes);  			ASSERT(ip->i_df.if_u1.if_data != NULL);  			ASSERT(ip->i_d.di_size > 0);  			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, @@ -305,7 +305,7 @@ xfs_inode_item_format_attr_fork(  			 */  			data_bytes = roundup(ip->i_afp->if_bytes, 4);  			ASSERT(ip->i_afp->if_real_bytes == 0 || -			       ip->i_afp->if_real_bytes == data_bytes); +			       ip->i_afp->if_real_bytes >= data_bytes);  			ASSERT(ip->i_afp->if_u1.if_data != NULL);  			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,  					ip->i_afp->if_u1.if_data, @@ -479,6 +479,8 @@ STATIC uint  xfs_inode_item_push(  	struct xfs_log_item	*lip,  	struct list_head	*buffer_list) +		__releases(&lip->li_ailp->xa_lock) +		__acquires(&lip->li_ailp->xa_lock)  {  	struct xfs_inode_log_item *iip = INODE_ITEM(lip);  	struct xfs_inode	*ip = iip->ili_inode; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bcb6c19ce3ea..dbca7375deef 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -277,7 +277,6 @@ xfs_readlink_by_handle(  {  	struct dentry		*dentry;  	__u32			olen; -	void			*link;  	int			error;  	if (!capable(CAP_SYS_ADMIN)) @@ -288,7 +287,7 @@ xfs_readlink_by_handle(  		return PTR_ERR(dentry);  	/* Restrict this handle operation to symlinks only. */ -	if (!d_is_symlink(dentry)) { +	if (!d_inode(dentry)->i_op->readlink) {  		error = -EINVAL;  		goto out_dput;  	} @@ -298,21 +297,8 @@ xfs_readlink_by_handle(  		goto out_dput;  	} -	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); -	if (!link) { -		error = -ENOMEM; -		goto out_dput; -	} - -	error = xfs_readlink(XFS_I(d_inode(dentry)), link); -	if (error) -		goto out_kfree; -	error = readlink_copy(hreq->ohandle, olen, link); -	if (error) -		goto out_kfree; +	error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen); - out_kfree: -	kfree(link);   out_dput:  	dput(dentry);  	return error; @@ -334,12 +320,10 @@ xfs_set_dmattrs(  	if (XFS_FORCED_SHUTDOWN(mp))  		return -EIO; -	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); +	if (error)  		return error; -	} +  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); @@ -1141,10 +1125,9 @@ xfs_ioctl_setattr_get_trans(  	if (XFS_FORCED_SHUTDOWN(mp))  		goto out_unlock; -	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);  	if (error) -		goto out_cancel; +		return ERR_PTR(error);  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d81bdc080370..58391355a44d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -132,6 +132,7 @@ xfs_iomap_write_direct(  	int		error;  	int		lockmode;  	int		bmapi_flags = XFS_BMAPI_PREALLOC; +	uint		tflags = 0;  	rt = XFS_IS_REALTIME_INODE(ip);  	extsz = xfs_get_extsz_hint(ip); @@ -192,11 +193,6 @@ xfs_iomap_write_direct(  		return error;  	/* -	 * Allocate and setup the transaction -	 */ -	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - -	/*  	 * For DAX, we do not allocate unwritten extents, but instead we zero  	 * the block before we commit the transaction.  Ideally we'd like to do  	 * this outside the transaction context, but if we commit and then crash @@ -209,23 +205,17 @@ xfs_iomap_write_direct(  	 * the reserve block pool for bmbt block allocation if there is no space  	 * left but we need to do unwritten extent conversion.  	 */ -  	if (IS_DAX(VFS_I(ip))) {  		bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;  		if (ISUNWRITTEN(imap)) { -			tp->t_flags |= XFS_TRANS_RESERVE; +			tflags |= XFS_TRANS_RESERVE;  			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;  		}  	} -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -				  resblks, resrtextents); -	/* -	 * Check for running out of space, note: need lock to return -	 */ -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents, +			tflags, &tp); +	if (error)  		return error; -	}  	lockmode = XFS_ILOCK_EXCL;  	xfs_ilock(ip, lockmode); @@ -726,15 +716,13 @@ xfs_iomap_write_allocate(  		nimaps = 0;  		while (nimaps == 0) { -			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); -			tp->t_flags |= XFS_TRANS_RESERVE;  			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); -			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -						  nres, 0); -			if (error) { -				xfs_trans_cancel(tp); + +			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres, +					0, XFS_TRANS_RESERVE, &tp); +			if (error)  				return error; -			} +  			xfs_ilock(ip, XFS_ILOCK_EXCL);  			xfs_trans_ijoin(tp, ip, 0); @@ -878,25 +866,18 @@ xfs_iomap_write_unwritten(  	do {  		/* -		 * set up a transaction to convert the range of extents +		 * Set up a transaction to convert the range of extents  		 * from unwritten to real. Do allocations in a loop until  		 * we have covered the range passed in.  		 * -		 * Note that we open code the transaction allocation here -		 * to pass KM_NOFS--we can't risk to recursing back into -		 * the filesystem here as we might be asked to write out -		 * the same inode that we complete here and might deadlock -		 * on the iolock. +		 * Note that we can't risk to recursing back into the filesystem +		 * here as we might be asked to write out the same inode that we +		 * complete here and might deadlock on the iolock.  		 */ -		sb_start_intwrite(mp->m_super); -		tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); -		tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, -					  resblks, 0); -		if (error) { -			xfs_trans_cancel(tp); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, +				XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); +		if (error)  			return error; -		}  		xfs_ilock(ip, XFS_ILOCK_EXCL);  		xfs_trans_ijoin(tp, ip, 0); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index fb7dc61f4a29..c5d4eba6972e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -181,6 +181,8 @@ xfs_generic_create(  	}  #endif +	xfs_setup_iops(ip); +  	if (tmpfile)  		d_tmpfile(dentry, inode);  	else @@ -368,6 +370,8 @@ xfs_vn_symlink(  	if (unlikely(error))  		goto out_cleanup_inode; +	xfs_setup_iops(cip); +  	d_instantiate(dentry, inode);  	xfs_finish_inode_setup(cip);  	return 0; @@ -442,6 +446,16 @@ xfs_vn_get_link(  	return ERR_PTR(error);  } +STATIC const char * +xfs_vn_get_link_inline( +	struct dentry		*dentry, +	struct inode		*inode, +	struct delayed_call	*done) +{ +	ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); +	return XFS_I(inode)->i_df.if_u1.if_data; +} +  STATIC int  xfs_vn_getattr(  	struct vfsmount		*mnt, @@ -599,12 +613,12 @@ xfs_setattr_nonsize(  			return error;  	} -	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);  	if (error) -		goto out_trans_cancel; +		goto out_dqrele;  	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, 0);  	/*  	 * Change file ownership.  Must be the owner or privileged. @@ -633,12 +647,10 @@ xfs_setattr_nonsize(  						NULL, capable(CAP_FOWNER) ?  						XFS_QMOPT_FORCE_RES : 0);  			if (error)	/* out of quota */ -				goto out_unlock; +				goto out_cancel;  		}  	} -	xfs_trans_ijoin(tp, ip, 0); -  	/*  	 * Change file ownership.  Must be the owner or privileged.  	 */ @@ -722,10 +734,9 @@ xfs_setattr_nonsize(  	return 0; -out_unlock: -	xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_trans_cancel: +out_cancel:  	xfs_trans_cancel(tp); +out_dqrele:  	xfs_qm_dqrele(udqp);  	xfs_qm_dqrele(gdqp);  	return error; @@ -834,7 +845,7 @@ xfs_setattr_size(  	 * We have to do all the page cache truncate work outside the  	 * transaction context as the "lock" order is page lock->log space  	 * reservation as defined by extent allocation in the writeback path. -	 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but +	 * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but  	 * having already truncated the in-memory version of the file (i.e. made  	 * user visible changes). There's not much we can do about this, except  	 * to hope that the caller sees ENOMEM and retries the truncate @@ -849,10 +860,9 @@ xfs_setattr_size(  		return error;  	truncate_setsize(inode, newsize); -	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);  	if (error) -		goto out_trans_cancel; +		return error;  	lock_flags |= XFS_ILOCK_EXCL;  	xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -971,12 +981,9 @@ xfs_vn_update_time(  	trace_xfs_update_time(ip); -	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); +	if (error)  		return error; -	}  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	if (flags & S_CTIME) @@ -1167,6 +1174,18 @@ static const struct inode_operations xfs_symlink_inode_operations = {  	.update_time		= xfs_vn_update_time,  }; +static const struct inode_operations xfs_inline_symlink_inode_operations = { +	.readlink		= generic_readlink, +	.get_link		= xfs_vn_get_link_inline, +	.getattr		= xfs_vn_getattr, +	.setattr		= xfs_vn_setattr, +	.setxattr		= generic_setxattr, +	.getxattr		= generic_getxattr, +	.removexattr		= generic_removexattr, +	.listxattr		= xfs_vn_listxattr, +	.update_time		= xfs_vn_update_time, +}; +  STATIC void  xfs_diflags_to_iflags(  	struct inode		*inode, @@ -1193,7 +1212,7 @@ xfs_diflags_to_iflags(  }  /* - * Initialize the Linux inode and set up the operation vectors. + * Initialize the Linux inode.   *   * When reading existing inodes from disk this is called directly from xfs_iget,   * when creating a new inode it is called from xfs_ialloc after setting up the @@ -1232,32 +1251,12 @@ xfs_setup_inode(  	i_size_write(inode, ip->i_d.di_size);  	xfs_diflags_to_iflags(inode, ip); -	ip->d_ops = ip->i_mount->m_nondir_inode_ops; -	lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); -	switch (inode->i_mode & S_IFMT) { -	case S_IFREG: -		inode->i_op = &xfs_inode_operations; -		inode->i_fop = &xfs_file_operations; -		inode->i_mapping->a_ops = &xfs_address_space_operations; -		break; -	case S_IFDIR: +	if (S_ISDIR(inode->i_mode)) {  		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); -		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) -			inode->i_op = &xfs_dir_ci_inode_operations; -		else -			inode->i_op = &xfs_dir_inode_operations; -		inode->i_fop = &xfs_dir_file_operations;  		ip->d_ops = ip->i_mount->m_dir_inode_ops; -		break; -	case S_IFLNK: -		inode->i_op = &xfs_symlink_inode_operations; -		if (!(ip->i_df.if_flags & XFS_IFINLINE)) -			inode->i_mapping->a_ops = &xfs_address_space_operations; -		break; -	default: -		inode->i_op = &xfs_inode_operations; -		init_special_inode(inode, inode->i_mode, inode->i_rdev); -		break; +	} else { +		ip->d_ops = ip->i_mount->m_nondir_inode_ops; +		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);  	}  	/* @@ -1277,3 +1276,35 @@ xfs_setup_inode(  		cache_no_acl(inode);  	}  } + +void +xfs_setup_iops( +	struct xfs_inode	*ip) +{ +	struct inode		*inode = &ip->i_vnode; + +	switch (inode->i_mode & S_IFMT) { +	case S_IFREG: +		inode->i_op = &xfs_inode_operations; +		inode->i_fop = &xfs_file_operations; +		inode->i_mapping->a_ops = &xfs_address_space_operations; +		break; +	case S_IFDIR: +		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) +			inode->i_op = &xfs_dir_ci_inode_operations; +		else +			inode->i_op = &xfs_dir_inode_operations; +		inode->i_fop = &xfs_dir_file_operations; +		break; +	case S_IFLNK: +		if (ip->i_df.if_flags & XFS_IFINLINE) +			inode->i_op = &xfs_inline_symlink_inode_operations; +		else +			inode->i_op = &xfs_symlink_inode_operations; +		break; +	default: +		inode->i_op = &xfs_inode_operations; +		init_special_inode(inode, inode->i_mode, inode->i_rdev); +		break; +	} +} diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b49ccf5c1d75..bde02f1fba73 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -435,8 +435,7 @@ xfs_log_reserve(  	int		 	cnt,  	struct xlog_ticket	**ticp,  	__uint8_t	 	client, -	bool			permanent, -	uint		 	t_type) +	bool			permanent)  {  	struct xlog		*log = mp->m_log;  	struct xlog_ticket	*tic; @@ -456,7 +455,6 @@ xfs_log_reserve(  	if (!tic)  		return -ENOMEM; -	tic->t_trans_type = t_type;  	*ticp = tic;  	xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt @@ -823,8 +821,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)  	} while (iclog != first_iclog);  #endif  	if (! (XLOG_FORCED_SHUTDOWN(log))) { -		error = xfs_log_reserve(mp, 600, 1, &tic, -					XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); +		error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);  		if (!error) {  			/* the data section must be 32 bit size aligned */  			struct { @@ -2032,58 +2029,8 @@ xlog_print_tic_res(  	    REG_TYPE_STR(ICREATE, "inode create")  	};  #undef REG_TYPE_STR -#define TRANS_TYPE_STR(type)	[XFS_TRANS_##type] = #type -	static char *trans_type_str[XFS_TRANS_TYPE_MAX] = { -	    TRANS_TYPE_STR(SETATTR_NOT_SIZE), -	    TRANS_TYPE_STR(SETATTR_SIZE), -	    TRANS_TYPE_STR(INACTIVE), -	    TRANS_TYPE_STR(CREATE), -	    TRANS_TYPE_STR(CREATE_TRUNC), -	    TRANS_TYPE_STR(TRUNCATE_FILE), -	    TRANS_TYPE_STR(REMOVE), -	    TRANS_TYPE_STR(LINK), -	    TRANS_TYPE_STR(RENAME), -	    TRANS_TYPE_STR(MKDIR), -	    TRANS_TYPE_STR(RMDIR), -	    TRANS_TYPE_STR(SYMLINK), -	    TRANS_TYPE_STR(SET_DMATTRS), -	    TRANS_TYPE_STR(GROWFS), -	    TRANS_TYPE_STR(STRAT_WRITE), -	    TRANS_TYPE_STR(DIOSTRAT), -	    TRANS_TYPE_STR(WRITEID), -	    TRANS_TYPE_STR(ADDAFORK), -	    TRANS_TYPE_STR(ATTRINVAL), -	    TRANS_TYPE_STR(ATRUNCATE), -	    TRANS_TYPE_STR(ATTR_SET), -	    TRANS_TYPE_STR(ATTR_RM), -	    TRANS_TYPE_STR(ATTR_FLAG), -	    TRANS_TYPE_STR(CLEAR_AGI_BUCKET), -	    TRANS_TYPE_STR(SB_CHANGE), -	    TRANS_TYPE_STR(DUMMY1), -	    TRANS_TYPE_STR(DUMMY2), -	    TRANS_TYPE_STR(QM_QUOTAOFF), -	    TRANS_TYPE_STR(QM_DQALLOC), -	    TRANS_TYPE_STR(QM_SETQLIM), -	    TRANS_TYPE_STR(QM_DQCLUSTER), -	    TRANS_TYPE_STR(QM_QINOCREATE), -	    TRANS_TYPE_STR(QM_QUOTAOFF_END), -	    TRANS_TYPE_STR(FSYNC_TS), -	    TRANS_TYPE_STR(GROWFSRT_ALLOC), -	    TRANS_TYPE_STR(GROWFSRT_ZERO), -	    TRANS_TYPE_STR(GROWFSRT_FREE), -	    TRANS_TYPE_STR(SWAPEXT), -	    TRANS_TYPE_STR(CHECKPOINT), -	    TRANS_TYPE_STR(ICREATE), -	    TRANS_TYPE_STR(CREATE_TMPFILE) -	}; -#undef TRANS_TYPE_STR  	xfs_warn(mp, "xlog_write: reservation summary:"); -	xfs_warn(mp, "  trans type  = %s (%u)", -		 ((ticket->t_trans_type <= 0 || -		   ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? -		  "bad-trans-type" : trans_type_str[ticket->t_trans_type]), -		 ticket->t_trans_type);  	xfs_warn(mp, "  unit res    = %d bytes",  		 ticket->t_unit_res);  	xfs_warn(mp, "  current res = %d bytes", @@ -3378,7 +3325,7 @@ xfs_log_force(  {  	int	error; -	trace_xfs_log_force(mp, 0); +	trace_xfs_log_force(mp, 0, _RET_IP_);  	error = _xfs_log_force(mp, flags, NULL);  	if (error)  		xfs_warn(mp, "%s: error %d returned.", __func__, error); @@ -3527,7 +3474,7 @@ xfs_log_force_lsn(  {  	int	error; -	trace_xfs_log_force(mp, lsn); +	trace_xfs_log_force(mp, lsn, _RET_IP_);  	error = _xfs_log_force_lsn(mp, lsn, flags, NULL);  	if (error)  		xfs_warn(mp, "%s: error %d returned.", __func__, error); @@ -3709,7 +3656,6 @@ xlog_ticket_alloc(  	tic->t_tid		= prandom_u32();  	tic->t_clientid		= client;  	tic->t_flags		= XLOG_TIC_INITED; -	tic->t_trans_type	= 0;  	if (permanent)  		tic->t_flags |= XLOG_TIC_PERM_RESERV; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index aa533a7d50f2..80ba0c047090 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -161,8 +161,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,  			  int		   count,  			  struct xlog_ticket **ticket,  			  __uint8_t	   clientid, -			  bool		   permanent, -			  uint		   t_type); +			  bool		   permanent);  int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);  int	  xfs_log_unmount_write(struct xfs_mount *mp);  void      xfs_log_unmount(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 4e7649351f5a..5e54e7955ea6 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -51,7 +51,6 @@ xlog_cil_ticket_alloc(  	tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,  				KM_SLEEP|KM_NOFS); -	tic->t_trans_type = XFS_TRANS_CHECKPOINT;  	/*  	 * set the current reservation to zero so we know to steal the basic diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ed8896310c00..765f084759b5 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -175,7 +175,6 @@ typedef struct xlog_ticket {  	char		   t_cnt;	 /* current count		 : 1  */  	char		   t_clientid;	 /* who does this belong to;	 : 1  */  	char		   t_flags;	 /* properties of reservation	 : 1  */ -	uint		   t_trans_type; /* transaction type             : 4  */          /* reservation array fields */  	uint		   t_res_num;                    /* num in array : 4 */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 396565f43247..835997843846 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3843,7 +3843,7 @@ xlog_recover_add_to_cont_trans(  	old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;  	old_len = item->ri_buf[item->ri_cnt-1].i_len; -	ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); +	ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);  	memcpy(&ptr[old_len], dp, len);  	item->ri_buf[item->ri_cnt-1].i_len += len;  	item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -4205,10 +4205,9 @@ xlog_recover_process_efi(  		}  	} -	tp = xfs_trans_alloc(mp, 0); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);  	if (error) -		goto abort_error; +		return error;  	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);  	for (i = 0; i < efip->efi_format.efi_nextents; i++) { @@ -4355,10 +4354,9 @@ xlog_recover_clear_agi_bucket(  	int		offset;  	int		error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);  	if (error) -		goto out_abort; +		goto out_error;  	error = xfs_read_agi(mp, tp, agno, &agibp);  	if (error) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index cfd4210dd015..e39b02351b4a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -89,7 +89,6 @@ xfs_uuid_mount(  	if (hole < 0) {  		xfs_uuid_table = kmem_realloc(xfs_uuid_table,  			(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), -			xfs_uuid_table_size  * sizeof(*xfs_uuid_table),  			KM_SLEEP);  		hole = xfs_uuid_table_size++;  	} @@ -681,6 +680,9 @@ xfs_mountfs(  	xfs_set_maxicount(mp); +	/* enable fail_at_unmount as default */ +	mp->m_fail_unmount = 1; +  	error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);  	if (error)  		goto out; @@ -690,10 +692,15 @@ xfs_mountfs(  	if (error)  		goto out_remove_sysfs; -	error = xfs_uuid_mount(mp); +	error = xfs_error_sysfs_init(mp);  	if (error)  		goto out_del_stats; + +	error = xfs_uuid_mount(mp); +	if (error) +		goto out_remove_error_sysfs; +  	/*  	 * Set the minimum read and write sizes  	 */ @@ -957,6 +964,7 @@ xfs_mountfs(  	cancel_delayed_work_sync(&mp->m_reclaim_work);  	xfs_reclaim_inodes(mp, SYNC_WAIT);   out_log_dealloc: +	mp->m_flags |= XFS_MOUNT_UNMOUNTING;  	xfs_log_mount_cancel(mp);   out_fail_wait:  	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) @@ -968,6 +976,8 @@ xfs_mountfs(  	xfs_da_unmount(mp);   out_remove_uuid:  	xfs_uuid_unmount(mp); + out_remove_error_sysfs: +	xfs_error_sysfs_del(mp);   out_del_stats:  	xfs_sysfs_del(&mp->m_stats.xs_kobj);   out_remove_sysfs: @@ -1006,6 +1016,14 @@ xfs_unmountfs(  	xfs_log_force(mp, XFS_LOG_SYNC);  	/* +	 * We now need to tell the world we are unmounting. This will allow +	 * us to detect that the filesystem is going away and we should error +	 * out anything that we have been retrying in the background. This will +	 * prevent neverending retries in AIL pushing from hanging the unmount. +	 */ +	mp->m_flags |= XFS_MOUNT_UNMOUNTING; + +	/*  	 * Flush all pending changes from the AIL.  	 */  	xfs_ail_push_all_sync(mp->m_ail); @@ -1056,6 +1074,7 @@ xfs_unmountfs(  #endif  	xfs_free_perag(mp); +	xfs_error_sysfs_del(mp);  	xfs_sysfs_del(&mp->m_stats.xs_kobj);  	xfs_sysfs_del(&mp->m_kobj);  } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index eafe257b357a..c1b798c72126 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -37,6 +37,32 @@ enum {  	XFS_LOWSP_MAX,  }; +/* + * Error Configuration + * + * Error classes define the subsystem the configuration belongs to. + * Error numbers define the errors that are configurable. + */ +enum { +	XFS_ERR_METADATA, +	XFS_ERR_CLASS_MAX, +}; +enum { +	XFS_ERR_DEFAULT, +	XFS_ERR_EIO, +	XFS_ERR_ENOSPC, +	XFS_ERR_ENODEV, +	XFS_ERR_ERRNO_MAX, +}; + +#define XFS_ERR_RETRY_FOREVER	-1 + +struct xfs_error_cfg { +	struct xfs_kobj	kobj; +	int		max_retries; +	unsigned long	retry_timeout;	/* in jiffies, 0 = no timeout */ +}; +  typedef struct xfs_mount {  	struct super_block	*m_super;  	xfs_tid_t		m_tid;		/* next unused tid for fs */ @@ -127,6 +153,9 @@ typedef struct xfs_mount {  	int64_t			m_low_space[XFS_LOWSP_MAX];  						/* low free space thresholds */  	struct xfs_kobj		m_kobj; +	struct xfs_kobj		m_error_kobj; +	struct xfs_kobj		m_error_meta_kobj; +	struct xfs_error_cfg	m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];  	struct xstats		m_stats;	/* per-fs stats */  	struct workqueue_struct *m_buf_workqueue; @@ -148,6 +177,7 @@ typedef struct xfs_mount {  	 */  	__uint32_t		m_generation; +	bool			m_fail_unmount;  #ifdef DEBUG  	/*  	 * DEBUG mode instrumentation to test and/or trigger delayed allocation @@ -166,6 +196,7 @@ typedef struct xfs_mount {  #define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops  						   must be synchronous except  						   for space allocations */ +#define XFS_MOUNT_UNMOUNTING	(1ULL << 1)	/* filesystem is unmounting */  #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)  #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem  						   operations, typically for @@ -364,4 +395,7 @@ extern void	xfs_set_low_space_thresholds(struct xfs_mount *);  int	xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,  			xfs_off_t count_fsb); +struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, +		int error_class, int error); +  #endif	/* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 51ddaf2c2b8c..d5b756669fb5 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -308,12 +308,9 @@ xfs_fs_commit_blocks(  			goto out_drop_iolock;  	} -	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); +	if (error)  		goto out_drop_iolock; -	}  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index be125e1758c1..a60d9e2739d1 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -783,13 +783,10 @@ xfs_qm_qino_alloc(  		}  	} -	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, -				  XFS_QM_QINOCREATE_SPACE_RES(mp), 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create, +			XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp); +	if (error)  		return error; -	}  	if (need_alloc) {  		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index f4d0e0a8f517..475a3882a81f 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -236,10 +236,8 @@ xfs_qm_scall_trunc_qfile(  	xfs_ilock(ip, XFS_IOLOCK_EXCL); -	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);  	if (error) { -		xfs_trans_cancel(tp);  		xfs_iunlock(ip, XFS_IOLOCK_EXCL);  		goto out_put;  	} @@ -436,12 +434,9 @@ xfs_qm_scall_setqlim(  	defq = xfs_get_defquota(dqp, q);  	xfs_dqunlock(dqp); -	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp); +	if (error)  		goto out_rele; -	}  	xfs_dqlock(dqp);  	xfs_trans_dqjoin(tp, dqp); @@ -569,13 +564,9 @@ xfs_qm_log_quotaoff_end(  	int			error;  	xfs_qoff_logitem_t	*qoffi; -	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); - -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); +	if (error)  		return error; -	}  	qoffi = xfs_trans_get_qoff_item(tp, startqoff,  					flags & XFS_ALL_QUOTA_ACCT); @@ -603,12 +594,9 @@ xfs_qm_log_quotaoff(  	*qoffstartp = NULL; -	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); +	if (error)  		goto out; -	}  	qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);  	xfs_trans_log_quotaoff_item(tp, qoffi); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index abf44435d04a..3938b37d1043 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -780,15 +780,14 @@ xfs_growfs_rt_alloc(  	 * Allocate space to the file, as necessary.  	 */  	while (oblocks < nblocks) { -		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);  		resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);  		/*  		 * Reserve space & log for one extent added to the file.  		 */ -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, -					  resblks, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks, +				0, 0, &tp);  		if (error) -			goto out_trans_cancel; +			return error;  		/*  		 * Lock the inode.  		 */ @@ -823,14 +822,13 @@ xfs_growfs_rt_alloc(  		for (bno = map.br_startoff, fsbno = map.br_startblock;  		     bno < map.br_startoff + map.br_blockcount;  		     bno++, fsbno++) { -			tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);  			/*  			 * Reserve log for one block zeroing.  			 */ -			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, -						  0, 0); +			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, +					0, 0, 0, &tp);  			if (error) -				goto out_trans_cancel; +				return error;  			/*  			 * Lock the bitmap inode.  			 */ @@ -994,11 +992,10 @@ xfs_growfs_rt(  		/*  		 * Start a transaction, get the log reservation.  		 */ -		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree, -					  0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0, +				&tp);  		if (error) -			goto error_cancel; +			break;  		/*  		 * Lock out other callers by grabbing the bitmap inode lock.  		 */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 187e14b696c2..11ea5d51db56 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -58,8 +58,7 @@  #include <linux/parser.h>  static const struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_ioend_zone; -mempool_t *xfs_ioend_pool; +struct bio_set *xfs_ioend_bioset;  static struct kset *xfs_kset;		/* top-level xfs sysfs dir */  #ifdef DEBUG @@ -350,6 +349,7 @@ xfs_parseargs(  		case Opt_pqnoenforce:  			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);  			mp->m_qflags &= ~XFS_PQUOTA_ENFD; +			break;  		case Opt_gquota:  		case Opt_grpquota:  			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | @@ -928,7 +928,7 @@ xfs_fs_alloc_inode(  /*   * Now that the generic code is guaranteed not to be accessing - * the linux inode, we can reclaim the inode. + * the linux inode, we can inactivate and reclaim the inode.   */  STATIC void  xfs_fs_destroy_inode( @@ -938,9 +938,14 @@ xfs_fs_destroy_inode(  	trace_xfs_destroy_inode(ip); -	XFS_STATS_INC(ip->i_mount, vn_reclaim); +	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); +	XFS_STATS_INC(ip->i_mount, vn_rele); +	XFS_STATS_INC(ip->i_mount, vn_remove); + +	xfs_inactive(ip);  	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); +	XFS_STATS_INC(ip->i_mount, vn_reclaim);  	/*  	 * We should never get here with one of the reclaim flags already set. @@ -987,24 +992,6 @@ xfs_fs_inode_init_once(  		     "xfsino", ip->i_ino);  } -STATIC void -xfs_fs_evict_inode( -	struct inode		*inode) -{ -	xfs_inode_t		*ip = XFS_I(inode); - -	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); - -	trace_xfs_evict_inode(ip); - -	truncate_inode_pages_final(&inode->i_data); -	clear_inode(inode); -	XFS_STATS_INC(ip->i_mount, vn_rele); -	XFS_STATS_INC(ip->i_mount, vn_remove); - -	xfs_inactive(ip); -} -  /*   * We do an unlocked check for XFS_IDONTCACHE here because we are already   * serialised against cache hits here via the inode->i_lock and igrab() in @@ -1276,6 +1263,16 @@ xfs_fs_remount(  			return -EINVAL;  		} +		if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && +		    xfs_sb_has_ro_compat_feature(sbp, +					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { +			xfs_warn(mp, +"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", +				(sbp->sb_features_ro_compat & +					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); +			return -EINVAL; +		} +  		mp->m_flags &= ~XFS_MOUNT_RDONLY;  		/* @@ -1558,14 +1555,12 @@ xfs_fs_fill_super(  	if (mp->m_flags & XFS_MOUNT_DAX) {  		xfs_warn(mp, -	"DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); -		if (sb->s_blocksize != PAGE_SIZE) { -			xfs_alert(mp, -		"Filesystem block size invalid for DAX Turning DAX off."); -			mp->m_flags &= ~XFS_MOUNT_DAX; -		} else if (!sb->s_bdev->bd_disk->fops->direct_access) { +		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + +		error = bdev_dax_supported(sb, sb->s_blocksize); +		if (error) {  			xfs_alert(mp, -		"Block device does not support DAX Turning DAX off."); +			"DAX unsupported by block device. Turning off DAX.");  			mp->m_flags &= ~XFS_MOUNT_DAX;  		}  	} @@ -1663,7 +1658,6 @@ xfs_fs_free_cached_objects(  static const struct super_operations xfs_super_operations = {  	.alloc_inode		= xfs_fs_alloc_inode,  	.destroy_inode		= xfs_fs_destroy_inode, -	.evict_inode		= xfs_fs_evict_inode,  	.drop_inode		= xfs_fs_drop_inode,  	.put_super		= xfs_fs_put_super,  	.sync_fs		= xfs_fs_sync_fs, @@ -1688,20 +1682,15 @@ MODULE_ALIAS_FS("xfs");  STATIC int __init  xfs_init_zones(void)  { - -	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); -	if (!xfs_ioend_zone) +	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, +			offsetof(struct xfs_ioend, io_inline_bio)); +	if (!xfs_ioend_bioset)  		goto out; -	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, -						  xfs_ioend_zone); -	if (!xfs_ioend_pool) -		goto out_destroy_ioend_zone; -  	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),  						"xfs_log_ticket");  	if (!xfs_log_ticket_zone) -		goto out_destroy_ioend_pool; +		goto out_free_ioend_bioset;  	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),  						"xfs_bmap_free_item"); @@ -1797,10 +1786,8 @@ xfs_init_zones(void)  	kmem_zone_destroy(xfs_bmap_free_item_zone);   out_destroy_log_ticket_zone:  	kmem_zone_destroy(xfs_log_ticket_zone); - out_destroy_ioend_pool: -	mempool_destroy(xfs_ioend_pool); - out_destroy_ioend_zone: -	kmem_zone_destroy(xfs_ioend_zone); + out_free_ioend_bioset: +	bioset_free(xfs_ioend_bioset);   out:  	return -ENOMEM;  } @@ -1826,9 +1813,7 @@ xfs_destroy_zones(void)  	kmem_zone_destroy(xfs_btree_cur_zone);  	kmem_zone_destroy(xfs_bmap_free_item_zone);  	kmem_zone_destroy(xfs_log_ticket_zone); -	mempool_destroy(xfs_ioend_pool); -	kmem_zone_destroy(xfs_ioend_zone); - +	bioset_free(xfs_ioend_bioset);  }  STATIC int __init diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index b44284c1adda..08a46c6181fd 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -131,6 +131,8 @@ xfs_readlink(  	trace_xfs_readlink(ip); +	ASSERT(!(ip->i_df.if_flags & XFS_IFINLINE)); +  	if (XFS_FORCED_SHUTDOWN(mp))  		return -EIO; @@ -150,12 +152,7 @@ xfs_readlink(  	} -	if (ip->i_df.if_flags & XFS_IFINLINE) { -		memcpy(link, ip->i_df.if_u1.if_data, pathlen); -		link[pathlen] = '\0'; -	} else { -		error = xfs_readlink_bmap(ip, link); -	} +	error = xfs_readlink_bmap(ip, link);   out:  	xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -221,7 +218,6 @@ xfs_symlink(  	if (error)  		return error; -	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);  	/*  	 * The symlink will fit into the inode data fork?  	 * There can't be any attributes so we get the whole variable part. @@ -231,13 +227,15 @@ xfs_symlink(  	else  		fs_blocks = xfs_symlink_blocks(mp, pathlen);  	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); + +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);  	if (error == -ENOSPC && fs_blocks == 0) {  		resblks = 0; -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0, +				&tp);  	}  	if (error) -		goto out_trans_cancel; +		goto out_release_inode;  	xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |  		      XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); @@ -302,19 +300,11 @@ xfs_symlink(  	 * If the symlink will fit into the inode, write it inline.  	 */  	if (pathlen <= XFS_IFORK_DSIZE(ip)) { -		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); -		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); -		ip->i_d.di_size = pathlen; - -		/* -		 * The inode was initially created in extent format. -		 */ -		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); -		ip->i_df.if_flags |= XFS_IFINLINE; +		xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen); +		ip->i_d.di_size = pathlen;  		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;  		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); -  	} else {  		int	offset; @@ -455,12 +445,9 @@ xfs_inactive_symlink_rmt(  	 */  	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); -	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); -	if (error) { -		xfs_trans_cancel(tp); +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); +	if (error)  		return error; -	}  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	xfs_trans_ijoin(tp, ip, 0); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 6ced4f143494..4c2c55086208 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -17,10 +17,11 @@   */  #include "xfs.h" -#include "xfs_sysfs.h" +#include "xfs_shared.h"  #include "xfs_format.h"  #include "xfs_log_format.h"  #include "xfs_trans_resv.h" +#include "xfs_sysfs.h"  #include "xfs_log.h"  #include "xfs_log_priv.h"  #include "xfs_stats.h" @@ -362,3 +363,291 @@ struct kobj_type xfs_log_ktype = {  	.sysfs_ops = &xfs_sysfs_ops,  	.default_attrs = xfs_log_attrs,  }; + +/* + * Metadata IO error configuration + * + * The sysfs structure here is: + *	...xfs/<dev>/error/<class>/<errno>/<error_attrs> + * + * where <class> allows us to discriminate between data IO and metadata IO, + * and any other future type of IO (e.g. special inode or directory error + * handling) we care to support. + */ +static inline struct xfs_error_cfg * +to_error_cfg(struct kobject *kobject) +{ +	struct xfs_kobj *kobj = to_kobj(kobject); +	return container_of(kobj, struct xfs_error_cfg, kobj); +} + +static inline struct xfs_mount * +err_to_mp(struct kobject *kobject) +{ +	struct xfs_kobj *kobj = to_kobj(kobject); +	return container_of(kobj, struct xfs_mount, m_error_kobj); +} + +static ssize_t +max_retries_show( +	struct kobject	*kobject, +	char		*buf) +{ +	struct xfs_error_cfg *cfg = to_error_cfg(kobject); + +	return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries); +} + +static ssize_t +max_retries_store( +	struct kobject	*kobject, +	const char	*buf, +	size_t		count) +{ +	struct xfs_error_cfg *cfg = to_error_cfg(kobject); +	int		ret; +	int		val; + +	ret = kstrtoint(buf, 0, &val); +	if (ret) +		return ret; + +	if (val < -1) +		return -EINVAL; + +	cfg->max_retries = val; +	return count; +} +XFS_SYSFS_ATTR_RW(max_retries); + +static ssize_t +retry_timeout_seconds_show( +	struct kobject	*kobject, +	char		*buf) +{ +	struct xfs_error_cfg *cfg = to_error_cfg(kobject); + +	return snprintf(buf, PAGE_SIZE, "%ld\n", +			jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC); +} + +static ssize_t +retry_timeout_seconds_store( +	struct kobject	*kobject, +	const char	*buf, +	size_t		count) +{ +	struct xfs_error_cfg *cfg = to_error_cfg(kobject); +	int		ret; +	int		val; + +	ret = kstrtoint(buf, 0, &val); +	if (ret) +		return ret; + +	/* 1 day timeout maximum */ +	if (val < 0 || val > 86400) +		return -EINVAL; + +	cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC); +	return count; +} +XFS_SYSFS_ATTR_RW(retry_timeout_seconds); + +static ssize_t +fail_at_unmount_show( +	struct kobject	*kobject, +	char		*buf) +{ +	struct xfs_mount	*mp = err_to_mp(kobject); + +	return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount); +} + +static ssize_t +fail_at_unmount_store( +	struct kobject	*kobject, +	const char	*buf, +	size_t		count) +{ +	struct xfs_mount	*mp = err_to_mp(kobject); +	int		ret; +	int		val; + +	ret = kstrtoint(buf, 0, &val); +	if (ret) +		return ret; + +	if (val < 0 || val > 1) +		return -EINVAL; + +	mp->m_fail_unmount = val; +	return count; +} +XFS_SYSFS_ATTR_RW(fail_at_unmount); + +static struct attribute *xfs_error_attrs[] = { +	ATTR_LIST(max_retries), +	ATTR_LIST(retry_timeout_seconds), +	NULL, +}; + + +struct kobj_type xfs_error_cfg_ktype = { +	.release = xfs_sysfs_release, +	.sysfs_ops = &xfs_sysfs_ops, +	.default_attrs = xfs_error_attrs, +}; + +struct kobj_type xfs_error_ktype = { +	.release = xfs_sysfs_release, +	.sysfs_ops = &xfs_sysfs_ops, +}; + +/* + * Error initialization tables. These need to be ordered in the same + * order as the enums used to index the array. All class init tables need to + * define a "default" behaviour as the first entry, all other entries can be + * empty. + */ +struct xfs_error_init { +	char		*name; +	int		max_retries; +	int		retry_timeout;	/* in seconds */ +}; + +static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = { +	{ .name = "default", +	  .max_retries = XFS_ERR_RETRY_FOREVER, +	  .retry_timeout = 0, +	}, +	{ .name = "EIO", +	  .max_retries = XFS_ERR_RETRY_FOREVER, +	  .retry_timeout = 0, +	}, +	{ .name = "ENOSPC", +	  .max_retries = XFS_ERR_RETRY_FOREVER, +	  .retry_timeout = 0, +	}, +	{ .name = "ENODEV", +	  .max_retries = 0, +	}, +}; + +static int +xfs_error_sysfs_init_class( +	struct xfs_mount	*mp, +	int			class, +	const char		*parent_name, +	struct xfs_kobj		*parent_kobj, +	const struct xfs_error_init init[]) +{ +	struct xfs_error_cfg	*cfg; +	int			error; +	int			i; + +	ASSERT(class < XFS_ERR_CLASS_MAX); + +	error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype, +				&mp->m_error_kobj, parent_name); +	if (error) +		return error; + +	for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) { +		cfg = &mp->m_error_cfg[class][i]; +		error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype, +					parent_kobj, init[i].name); +		if (error) +			goto out_error; + +		cfg->max_retries = init[i].max_retries; +		cfg->retry_timeout = msecs_to_jiffies( +					init[i].retry_timeout * MSEC_PER_SEC); +	} +	return 0; + +out_error: +	/* unwind the entries that succeeded */ +	for (i--; i >= 0; i--) { +		cfg = &mp->m_error_cfg[class][i]; +		xfs_sysfs_del(&cfg->kobj); +	} +	xfs_sysfs_del(parent_kobj); +	return error; +} + +int +xfs_error_sysfs_init( +	struct xfs_mount	*mp) +{ +	int			error; + +	/* .../xfs/<dev>/error/ */ +	error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype, +				&mp->m_kobj, "error"); +	if (error) +		return error; + +	error = sysfs_create_file(&mp->m_error_kobj.kobject, +				  ATTR_LIST(fail_at_unmount)); + +	if (error) +		goto out_error; + +	/* .../xfs/<dev>/error/metadata/ */ +	error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA, +				"metadata", &mp->m_error_meta_kobj, +				xfs_error_meta_init); +	if (error) +		goto out_error; + +	return 0; + +out_error: +	xfs_sysfs_del(&mp->m_error_kobj); +	return error; +} + +void +xfs_error_sysfs_del( +	struct xfs_mount	*mp) +{ +	struct xfs_error_cfg	*cfg; +	int			i, j; + +	for (i = 0; i < XFS_ERR_CLASS_MAX; i++) { +		for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) { +			cfg = &mp->m_error_cfg[i][j]; + +			xfs_sysfs_del(&cfg->kobj); +		} +	} +	xfs_sysfs_del(&mp->m_error_meta_kobj); +	xfs_sysfs_del(&mp->m_error_kobj); +} + +struct xfs_error_cfg * +xfs_error_get_cfg( +	struct xfs_mount	*mp, +	int			error_class, +	int			error) +{ +	struct xfs_error_cfg	*cfg; + +	switch (error) { +	case EIO: +		cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO]; +		break; +	case ENOSPC: +		cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC]; +		break; +	case ENODEV: +		cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV]; +		break; +	default: +		cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT]; +		break; +	} + +	return cfg; +} diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index be692e59938d..d04637181ef2 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -58,4 +58,7 @@ xfs_sysfs_del(  	wait_for_completion(&kobj->complete);  } +int	xfs_error_sysfs_init(struct xfs_mount *mp); +void	xfs_error_sysfs_del(struct xfs_mount *mp); +  #endif	/* __XFS_SYSFS_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index c8d58426008e..ea94ee0fe5ea 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);  DEFINE_BUF_EVENT(xfs_buf_get_uncached);  DEFINE_BUF_EVENT(xfs_bdstrat_shut);  DEFINE_BUF_EVENT(xfs_buf_item_relse); -DEFINE_BUF_EVENT(xfs_buf_item_iodone);  DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);  DEFINE_BUF_EVENT(xfs_buf_error_relse);  DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); @@ -944,7 +943,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,  	TP_ARGS(log, tic),  	TP_STRUCT__entry(  		__field(dev_t, dev) -		__field(unsigned, trans_type)  		__field(char, ocnt)  		__field(char, cnt)  		__field(int, curr_res) @@ -962,7 +960,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,  	),  	TP_fast_assign(  		__entry->dev = log->l_mp->m_super->s_dev; -		__entry->trans_type = tic->t_trans_type;  		__entry->ocnt = tic->t_ocnt;  		__entry->cnt = tic->t_cnt;  		__entry->curr_res = tic->t_curr_res; @@ -980,14 +977,13 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,  		__entry->curr_block = log->l_curr_block;  		__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);  	), -	TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " +	TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "  		  "t_unit_res %u t_flags %s reserveq %s "  		  "writeq %s grant_reserve_cycle %d "  		  "grant_reserve_bytes %d grant_write_cycle %d "  		  "grant_write_bytes %d curr_cycle %d curr_block %d "  		  "tail_cycle %d tail_block %d",  		  MAJOR(__entry->dev), MINOR(__entry->dev), -		  __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),  		  __entry->ocnt,  		  __entry->cnt,  		  __entry->curr_res, @@ -1053,19 +1049,21 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,  )  TRACE_EVENT(xfs_log_force, -	TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), -	TP_ARGS(mp, lsn), +	TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn, unsigned long caller_ip), +	TP_ARGS(mp, lsn, caller_ip),  	TP_STRUCT__entry(  		__field(dev_t, dev)  		__field(xfs_lsn_t, lsn) +		__field(unsigned long, caller_ip)  	),  	TP_fast_assign(  		__entry->dev = mp->m_super->s_dev;  		__entry->lsn = lsn; +		__entry->caller_ip = caller_ip;  	), -	TP_printk("dev %d:%d lsn 0x%llx", +	TP_printk("dev %d:%d lsn 0x%llx caller %ps",  		  MAJOR(__entry->dev), MINOR(__entry->dev), -		  __entry->lsn) +		  __entry->lsn, (void *)__entry->caller_ip)  )  #define DEFINE_LOG_ITEM_EVENT(name) \ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 20c53666cb4b..5f3d33d16e67 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -47,47 +47,6 @@ xfs_trans_init(  }  /* - * This routine is called to allocate a transaction structure. - * The type parameter indicates the type of the transaction.  These - * are enumerated in xfs_trans.h. - * - * Dynamically allocate the transaction structure from the transaction - * zone, initialize it, and return it to the caller. - */ -xfs_trans_t * -xfs_trans_alloc( -	xfs_mount_t	*mp, -	uint		type) -{ -	xfs_trans_t     *tp; - -	sb_start_intwrite(mp->m_super); -	tp = _xfs_trans_alloc(mp, type, KM_SLEEP); -	tp->t_flags |= XFS_TRANS_FREEZE_PROT; -	return tp; -} - -xfs_trans_t * -_xfs_trans_alloc( -	xfs_mount_t	*mp, -	uint		type, -	xfs_km_flags_t	memflags) -{ -	xfs_trans_t	*tp; - -	WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); -	atomic_inc(&mp->m_active_trans); - -	tp = kmem_zone_zalloc(xfs_trans_zone, memflags); -	tp->t_magic = XFS_TRANS_HEADER_MAGIC; -	tp->t_type = type; -	tp->t_mountp = mp; -	INIT_LIST_HEAD(&tp->t_items); -	INIT_LIST_HEAD(&tp->t_busy); -	return tp; -} - -/*   * Free the transaction structure.  If there is more clean up   * to do when the structure is freed, add it here.   */ @@ -99,7 +58,7 @@ xfs_trans_free(  	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);  	atomic_dec(&tp->t_mountp->m_active_trans); -	if (tp->t_flags & XFS_TRANS_FREEZE_PROT) +	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))  		sb_end_intwrite(tp->t_mountp->m_super);  	xfs_trans_free_dqinfo(tp);  	kmem_zone_free(xfs_trans_zone, tp); @@ -125,7 +84,6 @@ xfs_trans_dup(  	 * Initialize the new transaction structure.  	 */  	ntp->t_magic = XFS_TRANS_HEADER_MAGIC; -	ntp->t_type = tp->t_type;  	ntp->t_mountp = tp->t_mountp;  	INIT_LIST_HEAD(&ntp->t_items);  	INIT_LIST_HEAD(&ntp->t_busy); @@ -135,9 +93,9 @@ xfs_trans_dup(  	ntp->t_flags = XFS_TRANS_PERM_LOG_RES |  		       (tp->t_flags & XFS_TRANS_RESERVE) | -		       (tp->t_flags & XFS_TRANS_FREEZE_PROT); +		       (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);  	/* We gave our writer reference to the new transaction */ -	tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; +	tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;  	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);  	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;  	tp->t_blk_res = tp->t_blk_res_used; @@ -165,7 +123,7 @@ xfs_trans_dup(   * This does not do quota reservations. That typically is done by the   * caller afterwards.   */ -int +static int  xfs_trans_reserve(  	struct xfs_trans	*tp,  	struct xfs_trans_res	*resp, @@ -219,7 +177,7 @@ xfs_trans_reserve(  						resp->tr_logres,  						resp->tr_logcount,  						&tp->t_ticket, XFS_TRANSACTION, -						permanent, tp->t_type); +						permanent);  		}  		if (error) @@ -268,6 +226,42 @@ undo_blocks:  	return error;  } +int +xfs_trans_alloc( +	struct xfs_mount	*mp, +	struct xfs_trans_res	*resp, +	uint			blocks, +	uint			rtextents, +	uint			flags, +	struct xfs_trans	**tpp) +{ +	struct xfs_trans	*tp; +	int			error; + +	if (!(flags & XFS_TRANS_NO_WRITECOUNT)) +		sb_start_intwrite(mp->m_super); + +	WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); +	atomic_inc(&mp->m_active_trans); + +	tp = kmem_zone_zalloc(xfs_trans_zone, +		(flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); +	tp->t_magic = XFS_TRANS_HEADER_MAGIC; +	tp->t_flags = flags; +	tp->t_mountp = mp; +	INIT_LIST_HEAD(&tp->t_items); +	INIT_LIST_HEAD(&tp->t_busy); + +	error = xfs_trans_reserve(tp, resp, blocks, rtextents); +	if (error) { +		xfs_trans_cancel(tp); +		return error; +	} + +	*tpp = tp; +	return 0; +} +  /*   * Record the indicated change to the given field for application   * to the file system's superblock when the transaction commits. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index e7c49cf43fbc..9a462e892e4f 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -90,7 +90,6 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,   */  typedef struct xfs_trans {  	unsigned int		t_magic;	/* magic number */ -	unsigned int		t_type;		/* transaction type */  	unsigned int		t_log_res;	/* amt of log space resvd */  	unsigned int		t_log_count;	/* count for perm log res */  	unsigned int		t_blk_res;	/* # of blocks resvd */ @@ -148,10 +147,9 @@ typedef struct xfs_trans {  /*   * XFS transaction mechanism exported interfaces.   */ -xfs_trans_t	*xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t	*_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); -int		xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, -				  uint, uint); +int		xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, +			uint blocks, uint rtextents, uint flags, +			struct xfs_trans **tpp);  void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);  struct xfs_buf	*xfs_trans_get_buf_map(struct xfs_trans *tp, diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 110f1d7d86b0..ea62245fee26 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -32,11 +32,11 @@  static int -xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, -		const char *name, void *value, size_t size) +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, +		struct inode *inode, const char *name, void *value, size_t size)  {  	int xflags = handler->flags; -	struct xfs_inode *ip = XFS_I(d_inode(dentry)); +	struct xfs_inode *ip = XFS_I(inode);  	int error, asize = size;  	/* Convert Linux syscall to XFS internal ATTR flags */ @@ -74,11 +74,12 @@ xfs_forget_acl(  }  static int -xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, -		const char *name, const void *value, size_t size, int flags) +xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, +		struct inode *inode, const char *name, const void *value, +		size_t size, int flags)  {  	int			xflags = handler->flags; -	struct xfs_inode	*ip = XFS_I(d_inode(dentry)); +	struct xfs_inode	*ip = XFS_I(inode);  	int			error;  	/* Convert Linux syscall to XFS internal ATTR flags */ @@ -92,7 +93,7 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry,  	error = xfs_attr_set(ip, (unsigned char *)name,  				(void *)value, size, xflags);  	if (!error) -		xfs_forget_acl(d_inode(dentry), name, xflags); +		xfs_forget_acl(inode, name, xflags);  	return error;  } @@ -146,7 +147,7 @@ __xfs_xattr_put_listent(  	arraytop = context->count + prefix_len + namelen + 1;  	if (arraytop > context->firstu) {  		context->count = -1;	/* insufficient space */ -		return 1; +		return 0;  	}  	offset = (char *)context->alist + context->count;  	strncpy(offset, prefix, prefix_len); @@ -166,8 +167,7 @@ xfs_xattr_put_listent(  	int		flags,  	unsigned char	*name,  	int		namelen, -	int		valuelen, -	unsigned char	*value) +	int		valuelen)  {  	char *prefix;  	int prefix_len; @@ -221,11 +221,15 @@ xfs_xattr_put_listent(  }  ssize_t -xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +xfs_vn_listxattr( +	struct dentry	*dentry, +	char		*data, +	size_t		size)  {  	struct xfs_attr_list_context context;  	struct attrlist_cursor_kern cursor = { 0 }; -	struct inode		*inode = d_inode(dentry); +	struct inode	*inode = d_inode(dentry); +	int		error;  	/*  	 * First read the regular on-disk attributes. @@ -239,7 +243,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)  	context.firstu = context.bufsize;  	context.put_listent = xfs_xattr_put_listent; -	xfs_attr_list_int(&context); +	error = xfs_attr_list_int(&context); +	if (error) +		return error;  	if (context.count < 0)  		return -ERANGE; | 
