diff options
Diffstat (limited to 'fs/xfs/xfs_aops.c')
| -rw-r--r-- | fs/xfs/xfs_aops.c | 119 | 
1 files changed, 66 insertions, 53 deletions
| diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 50ab2879b9da..29e7e5dd5178 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -172,6 +172,12 @@ xfs_setfilesize_ioend(  	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);  	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); +	/* we abort the update if there was an IO error */ +	if (ioend->io_error) { +		xfs_trans_cancel(tp); +		return ioend->io_error; +	} +  	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);  } @@ -212,14 +218,17 @@ xfs_end_io(  		ioend->io_error = -EIO;  		goto done;  	} -	if (ioend->io_error) -		goto done;  	/*  	 * For unwritten extents we need to issue transactions to convert a  	 * range to normal written extens after the data I/O has finished. +	 * Detecting and handling completion IO errors is done individually +	 * for each case as different cleanup operations need to be performed +	 * on error.  	 */  	if (ioend->io_type == XFS_IO_UNWRITTEN) { +		if (ioend->io_error) +			goto done;  		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,  						  ioend->io_size);  	} else if (ioend->io_append_trans) { @@ -1250,13 +1259,28 @@ xfs_vm_releasepage(   * the DIO. There is only going to be one reference to the ioend and its life   * cycle is constrained by the DIO completion code. hence we don't need   * reference counting here. + * + * Note that for DIO, an IO to the highest supported file block offset (i.e. + * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 + * bit variable. Hence if we see this overflow, we have to assume that the IO is + * extending the file size. We won't know for sure until IO completion is run + * and the actual max write offset is communicated to the IO completion + * routine. + * + * For DAX page faults, we are preparing to never see unwritten extents here, + * nor should we ever extend the inode size. Hence we will soon have nothing to + * do here for this case, ensuring we don't have to provide an IO completion + * callback to free an ioend that we don't actually need for a fault into the + * page at offset (2^63 - 1FSB) bytes.   */ +  static void  xfs_map_direct(  	struct inode		*inode,  	struct buffer_head	*bh_result,  	struct xfs_bmbt_irec	*imap, -	xfs_off_t		offset) +	xfs_off_t		offset, +	bool			dax_fault)  {  	struct xfs_ioend	*ioend;  	xfs_off_t		size = bh_result->b_size; @@ -1269,6 +1293,13 @@ xfs_map_direct(  	trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); +	if (dax_fault) { +		ASSERT(type == XFS_IO_OVERWRITE); +		trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, +					    imap); +		return; +	} +  	if (bh_result->b_private) {  		ioend = bh_result->b_private;  		ASSERT(ioend->io_size > 0); @@ -1283,7 +1314,8 @@ xfs_map_direct(  					      ioend->io_size, ioend->io_type,  					      imap);  	} else if (type == XFS_IO_UNWRITTEN || -		   offset + size > i_size_read(inode)) { +		   offset + size > i_size_read(inode) || +		   offset + size < 0) {  		ioend = xfs_alloc_ioend(inode, type);  		ioend->io_offset = offset;  		ioend->io_size = size; @@ -1345,7 +1377,8 @@ __xfs_get_blocks(  	sector_t		iblock,  	struct buffer_head	*bh_result,  	int			create, -	bool			direct) +	bool			direct, +	bool			dax_fault)  {  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount; @@ -1393,18 +1426,20 @@ __xfs_get_blocks(  	if (error)  		goto out_unlock; +	/* for DAX, we convert unwritten extents directly */  	if (create &&  	    (!nimaps ||  	     (imap.br_startblock == HOLESTARTBLOCK || -	      imap.br_startblock == DELAYSTARTBLOCK))) { +	      imap.br_startblock == DELAYSTARTBLOCK) || +	     (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {  		if (direct || xfs_get_extsz_hint(ip)) {  			/* -			 * Drop the ilock in preparation for starting the block -			 * allocation transaction.  It will be retaken -			 * exclusively inside xfs_iomap_write_direct for the -			 * actual allocation. +			 * xfs_iomap_write_direct() expects the shared lock. It +			 * is unlocked on return.  			 */ -			xfs_iunlock(ip, lockmode); +			if (lockmode == XFS_ILOCK_EXCL) +				xfs_ilock_demote(ip, lockmode); +  			error = xfs_iomap_write_direct(ip, offset, size,  						       &imap, nimaps);  			if (error) @@ -1441,6 +1476,12 @@ __xfs_get_blocks(  		goto out_unlock;  	} +	if (IS_DAX(inode) && create) { +		ASSERT(!ISUNWRITTEN(&imap)); +		/* zeroing is not needed at a higher layer */ +		new = 0; +	} +  	/* trim mapping down to size requested */  	if (direct || size > (1 << inode->i_blkbits))  		xfs_map_trim_size(inode, iblock, bh_result, @@ -1458,7 +1499,8 @@ __xfs_get_blocks(  			set_buffer_unwritten(bh_result);  		/* direct IO needs special help */  		if (create && direct) -			xfs_map_direct(inode, bh_result, &imap, offset); +			xfs_map_direct(inode, bh_result, &imap, offset, +				       dax_fault);  	}  	/* @@ -1505,7 +1547,7 @@ xfs_get_blocks(  	struct buffer_head	*bh_result,  	int			create)  { -	return __xfs_get_blocks(inode, iblock, bh_result, create, false); +	return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);  }  int @@ -1515,7 +1557,17 @@ xfs_get_blocks_direct(  	struct buffer_head	*bh_result,  	int			create)  { -	return __xfs_get_blocks(inode, iblock, bh_result, create, true); +	return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); +} + +int +xfs_get_blocks_dax_fault( +	struct inode		*inode, +	sector_t		iblock, +	struct buffer_head	*bh_result, +	int			create) +{ +	return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);  }  static void @@ -1614,45 +1666,6 @@ xfs_end_io_direct_write(  	__xfs_end_io_direct_write(inode, ioend, offset, size);  } -/* - * For DAX we need a mapping buffer callback for unwritten extent conversion - * when page faults allocate blocks and then zero them. Note that in this - * case the mapping indicated by the ioend may extend beyond EOF. We most - * definitely do not want to extend EOF here, so we trim back the ioend size to - * EOF. - */ -#ifdef CONFIG_FS_DAX -void -xfs_end_io_dax_write( -	struct buffer_head	*bh, -	int			uptodate) -{ -	struct xfs_ioend	*ioend = bh->b_private; -	struct inode		*inode = ioend->io_inode; -	ssize_t			size = ioend->io_size; - -	ASSERT(IS_DAX(ioend->io_inode)); - -	/* if there was an error zeroing, then don't convert it */ -	if (!uptodate) -		ioend->io_error = -EIO; - -	/* -	 * Trim update to EOF, so we don't extend EOF during unwritten extent -	 * conversion of partial EOF blocks. -	 */ -	spin_lock(&XFS_I(inode)->i_flags_lock); -	if (ioend->io_offset + size > i_size_read(inode)) -		size = i_size_read(inode) - ioend->io_offset; -	spin_unlock(&XFS_I(inode)->i_flags_lock); - -	__xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size); - -} -#else -void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { } -#endif -  static inline ssize_t  xfs_vm_do_dio(  	struct inode		*inode, | 
