diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 323 | 
1 files changed, 193 insertions, 130 deletions
| diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 27b2fe7f735d..fe2c810335ff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -393,34 +393,13 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags)   * extent (btrfs_finish_ordered_io()).   */  static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, -						 struct folio *locked_folio,  						 u64 offset, u64 bytes)  {  	unsigned long index = offset >> PAGE_SHIFT;  	unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; -	u64 page_start = 0, page_end = 0;  	struct folio *folio; -	if (locked_folio) { -		page_start = folio_pos(locked_folio); -		page_end = page_start + folio_size(locked_folio) - 1; -	} -  	while (index <= end_index) { -		/* -		 * For locked page, we will call btrfs_mark_ordered_io_finished -		 * through btrfs_mark_ordered_io_finished() on it -		 * in run_delalloc_range() for the error handling, which will -		 * clear page Ordered and run the ordered extent accounting. -		 * -		 * Here we can't just clear the Ordered bit, or -		 * btrfs_mark_ordered_io_finished() would skip the accounting -		 * for the page range, and the ordered extent will never finish. -		 */ -		if (locked_folio && index == (page_start >> PAGE_SHIFT)) { -			index++; -			continue; -		}  		folio = filemap_get_folio(inode->vfs_inode.i_mapping, index);  		index++;  		if (IS_ERR(folio)) @@ -436,23 +415,6 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,  		folio_put(folio);  	} -	if (locked_folio) { -		/* The locked page covers the full range, nothing needs to be done */ -		if (bytes + offset <= page_start + folio_size(locked_folio)) -			return; -		/* -		 * In case this page belongs to the delalloc range being -		 * instantiated then skip it, since the first page of a range is -		 * going to be properly cleaned up by the caller of -		 * run_delalloc_range -		 */ -		if (page_start >= offset && page_end <= (offset + bytes - 1)) { -			bytes = offset + bytes - folio_pos(locked_folio) - -				folio_size(locked_folio); -			offset = folio_pos(locked_folio) + folio_size(locked_folio); -		} -	} -  	return btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes, false);  } @@ -564,7 +526,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,  		kunmap_local(kaddr);  		folio_put(folio);  	} -	btrfs_mark_buffer_dirty(trans, leaf);  	btrfs_release_path(path);  	/* @@ -1129,19 +1090,14 @@ static void submit_uncompressed_range(struct btrfs_inode *inode,  			       &wbc, false);  	wbc_detach_inode(&wbc);  	if (ret < 0) { -		btrfs_cleanup_ordered_extents(inode, locked_folio, -					      start, end - start + 1); -		if (locked_folio) { -			const u64 page_start = folio_pos(locked_folio); - -			folio_start_writeback(locked_folio); -			folio_end_writeback(locked_folio); -			btrfs_mark_ordered_io_finished(inode, locked_folio, -						       page_start, PAGE_SIZE, -						       !ret); -			mapping_set_error(locked_folio->mapping, ret); -			folio_unlock(locked_folio); -		} +		btrfs_cleanup_ordered_extents(inode, start, end - start + 1); +		if (locked_folio) +			btrfs_folio_end_lock(inode->root->fs_info, locked_folio, +					     start, async_extent->ram_size); +		btrfs_err_rl(inode->root->fs_info, +			"%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", +			     __func__, btrfs_root_id(inode->root), +			     btrfs_ino(inode), start, async_extent->ram_size, ret);  	}  } @@ -1373,6 +1329,17 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  	alloc_hint = btrfs_get_extent_allocation_hint(inode, start, num_bytes);  	/* +	 * We're not doing compressed IO, don't unlock the first page (which +	 * the caller expects to stay locked), don't clear any dirty bits and +	 * don't set any writeback bits. +	 * +	 * Do set the Ordered (Private2) bit so we know this page was properly +	 * setup for writepage. +	 */ +	page_ops = (keep_locked ? 0 : PAGE_UNLOCK); +	page_ops |= PAGE_SET_ORDERED; + +	/*  	 * Relocation relies on the relocated extents to have exactly the same  	 * size as the original extents. Normally writeback for relocation data  	 * extents follows a NOCOW path because relocation preallocates the @@ -1431,6 +1398,10 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  		file_extent.offset = 0;  		file_extent.compression = BTRFS_COMPRESS_NONE; +		/* +		 * Locked range will be released either during error clean up or +		 * after the whole range is finished. +		 */  		lock_extent(&inode->io_tree, start, start + cur_alloc_size - 1,  			    &cached); @@ -1476,21 +1447,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  		btrfs_dec_block_group_reservations(fs_info, ins.objectid); -		/* -		 * We're not doing compressed IO, don't unlock the first page -		 * (which the caller expects to stay locked), don't clear any -		 * dirty bits and don't set any writeback bits -		 * -		 * Do set the Ordered flag so we know this page was -		 * properly setup for writepage. -		 */ -		page_ops = (keep_locked ? 0 : PAGE_UNLOCK); -		page_ops |= PAGE_SET_ORDERED; - -		extent_clear_unlock_delalloc(inode, start, start + cur_alloc_size - 1, -					     locked_folio, &cached, -					     EXTENT_LOCKED | EXTENT_DELALLOC, -					     page_ops);  		if (num_bytes < cur_alloc_size)  			num_bytes = 0;  		else @@ -1507,6 +1463,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,  		if (ret)  			goto out_unlock;  	} +	extent_clear_unlock_delalloc(inode, orig_start, end, locked_folio, &cached, +				     EXTENT_LOCKED | EXTENT_DELALLOC, page_ops);  done:  	if (done_offset)  		*done_offset = end; @@ -1527,35 +1485,30 @@ out_unlock:  	 * We process each region below.  	 */ -	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | -		EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; -	page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; -  	/*  	 * For the range (1). We have already instantiated the ordered extents  	 * for this region. They are cleaned up by  	 * btrfs_cleanup_ordered_extents() in e.g, -	 * btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are -	 * already cleared in the above loop. And, EXTENT_DELALLOC_NEW | -	 * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup -	 * function. +	 * btrfs_run_delalloc_range(). +	 * EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV +	 * are also handled by the cleanup function.  	 * -	 * However, in case of @keep_locked, we still need to unlock the pages -	 * (except @locked_folio) to ensure all the pages are unlocked. +	 * So here we only clear EXTENT_LOCKED and EXTENT_DELALLOC flag, and +	 * finish the writeback of the involved folios, which will be never submitted.  	 */ -	if (keep_locked && orig_start < start) { +	if (orig_start < start) { +		clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC; +		page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; +  		if (!locked_folio)  			mapping_set_error(inode->vfs_inode.i_mapping, ret);  		extent_clear_unlock_delalloc(inode, orig_start, start - 1, -					     locked_folio, NULL, 0, page_ops); +					     locked_folio, NULL, clear_bits, page_ops);  	} -	/* -	 * At this point we're unlocked, we want to make sure we're only -	 * clearing these flags under the extent lock, so lock the rest of the -	 * range and clear everything up. -	 */ -	lock_extent(&inode->io_tree, start, end, NULL); +	clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | +		     EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; +	page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;  	/*  	 * For the range (2). If we reserved an extent for our delalloc range @@ -1589,6 +1542,10 @@ out_unlock:  		btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,  				       end - start - cur_alloc_size + 1, NULL);  	} +	btrfs_err_rl(fs_info, +		     "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", +		     __func__, btrfs_root_id(inode->root), +		     btrfs_ino(inode), orig_start, end + 1 - orig_start, ret);  	return ret;  } @@ -1809,7 +1766,7 @@ static int fallback_to_cow(struct btrfs_inode *inode,  			bytes = range_bytes;  		spin_lock(&sinfo->lock); -		btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); +		btrfs_space_info_update_bytes_may_use(sinfo, bytes);  		spin_unlock(&sinfo->lock);  		if (count > 0) @@ -1837,7 +1794,6 @@ struct can_nocow_file_extent_args {  	/* End file offset (inclusive) of the range we want to NOCOW. */  	u64 end;  	bool writeback_path; -	bool strict;  	/*  	 * Free the path passed to can_nocow_file_extent() once it's not needed  	 * anymore. @@ -1892,8 +1848,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,  	 * for its subvolume was created, then this implies the extent is shared,  	 * hence we must COW.  	 */ -	if (!args->strict && -	    btrfs_file_extent_generation(leaf, fi) <= +	if (btrfs_file_extent_generation(leaf, fi) <=  	    btrfs_root_last_snapshot(&root->root_item))  		goto out; @@ -1922,9 +1877,8 @@ static int can_nocow_file_extent(struct btrfs_path *path,  	 */  	btrfs_release_path(path); -	ret = btrfs_cross_ref_exist(root, btrfs_ino(inode), -				    key->offset - args->file_extent.offset, -				    args->file_extent.disk_bytenr, args->strict, path); +	ret = btrfs_cross_ref_exist(inode, key->offset - args->file_extent.offset, +				    args->file_extent.disk_bytenr, path);  	WARN_ON_ONCE(ret > 0 && is_freespace_inode);  	if (ret != 0)  		goto out; @@ -1971,6 +1925,53 @@ static int can_nocow_file_extent(struct btrfs_path *path,  }  /* + * Cleanup the dirty folios which will never be submitted due to error. + * + * When running a delalloc range, we may need to split the ranges (due to + * fragmentation or NOCOW). If we hit an error in the later part, we will error + * out and previously successfully executed range will never be submitted, thus + * we have to cleanup those folios by clearing their dirty flag, starting and + * finishing the writeback. + */ +static void cleanup_dirty_folios(struct btrfs_inode *inode, +				 struct folio *locked_folio, +				 u64 start, u64 end, int error) +{ +	struct btrfs_fs_info *fs_info = inode->root->fs_info; +	struct address_space *mapping = inode->vfs_inode.i_mapping; +	pgoff_t start_index = start >> PAGE_SHIFT; +	pgoff_t end_index = end >> PAGE_SHIFT; +	u32 len; + +	ASSERT(end + 1 - start < U32_MAX); +	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && +	       IS_ALIGNED(end + 1, fs_info->sectorsize)); +	len = end + 1 - start; + +	/* +	 * Handle the locked folio first. +	 * The btrfs_folio_clamp_*() helpers can handle range out of the folio case. +	 */ +	btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len); + +	for (pgoff_t index = start_index; index <= end_index; index++) { +		struct folio *folio; + +		/* Already handled at the beginning. */ +		if (index == locked_folio->index) +			continue; +		folio = __filemap_get_folio(mapping, index, FGP_LOCK, GFP_NOFS); +		/* Cache already dropped, no need to do any cleanup. */ +		if (IS_ERR(folio)) +			continue; +		btrfs_folio_clamp_finish_io(fs_info, locked_folio, start, len); +		folio_unlock(folio); +		folio_put(folio); +	} +	mapping_set_error(mapping, error); +} + +/*   * when nowcow writeback call back.  This checks for snapshots or COW copies   * of the extents that exist in the file, and COWs the file as required.   * @@ -1985,6 +1986,11 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,  	struct btrfs_root *root = inode->root;  	struct btrfs_path *path;  	u64 cow_start = (u64)-1; +	/* +	 * If not 0, represents the inclusive end of the last fallback_to_cow() +	 * range. Only for error handling. +	 */ +	u64 cow_end = 0;  	u64 cur_offset = start;  	int ret;  	bool check_prev = true; @@ -2145,6 +2151,7 @@ must_cow:  					      found_key.offset - 1);  			cow_start = (u64)-1;  			if (ret) { +				cow_end = found_key.offset - 1;  				btrfs_dec_nocow_writers(nocow_bg);  				goto error;  			} @@ -2218,11 +2225,12 @@ must_cow:  		cow_start = cur_offset;  	if (cow_start != (u64)-1) { -		cur_offset = end;  		ret = fallback_to_cow(inode, locked_folio, cow_start, end);  		cow_start = (u64)-1; -		if (ret) +		if (ret) { +			cow_end = end;  			goto error; +		}  	}  	btrfs_free_path(path); @@ -2230,12 +2238,41 @@ must_cow:  error:  	/* +	 * There are several error cases: +	 * +	 * 1) Failed without falling back to COW +	 *    start         cur_offset             end +	 *    |/////////////|                      | +	 * +	 *    For range [start, cur_offset) the folios are already unlocked (except +	 *    @locked_folio), EXTENT_DELALLOC already removed. +	 *    Only need to clear the dirty flag as they will never be submitted. +	 *    Ordered extent and extent maps are handled by +	 *    btrfs_mark_ordered_io_finished() inside run_delalloc_range(). +	 * +	 * 2) Failed with error from fallback_to_cow() +	 *    start         cur_offset  cow_end    end +	 *    |/////////////|-----------|          | +	 * +	 *    For range [start, cur_offset) it's the same as case 1). +	 *    But for range [cur_offset, cow_end), the folios have dirty flag +	 *    cleared and unlocked, EXTENT_DEALLLOC cleared by cow_file_range(). +	 * +	 *    Thus we should not call extent_clear_unlock_delalloc() on range +	 *    [cur_offset, cow_end), as the folios are already unlocked. +	 * +	 * So clear the folio dirty flags for [start, cur_offset) first. +	 */ +	if (cur_offset > start) +		cleanup_dirty_folios(inode, locked_folio, start, cur_offset - 1, ret); + +	/*  	 * If an error happened while a COW region is outstanding, cur_offset -	 * needs to be reset to cow_start to ensure the COW region is unlocked -	 * as well. +	 * needs to be reset to @cow_end + 1 to skip the COW range, as +	 * cow_file_range() will do the proper cleanup at error.  	 */ -	if (cow_start != (u64)-1) -		cur_offset = cow_start; +	if (cow_end) +		cur_offset = cow_end + 1;  	/*  	 * We need to lock the extent here because we're clearing DELALLOC and @@ -2255,6 +2292,10 @@ error:  		btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);  	}  	btrfs_free_path(path); +	btrfs_err_rl(fs_info, +		     "%s failed, root=%llu inode=%llu start=%llu len=%llu: %d", +		     __func__, btrfs_root_id(inode->root), +		     btrfs_ino(inode), start, end + 1 - start, ret);  	return ret;  } @@ -2305,8 +2346,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol  out:  	if (ret < 0) -		btrfs_cleanup_ordered_extents(inode, locked_folio, start, -					      end - start + 1); +		btrfs_cleanup_ordered_extents(inode, start, end - start + 1);  	return ret;  } @@ -2921,7 +2961,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  			btrfs_item_ptr_offset(leaf, path->slots[0]),  			sizeof(struct btrfs_file_extent_item)); -	btrfs_mark_buffer_dirty(trans, leaf);  	btrfs_release_path(path);  	/* @@ -4085,7 +4124,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,  				    struct btrfs_inode_item);  	fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode); -	btrfs_mark_buffer_dirty(trans, leaf);  	btrfs_set_inode_last_trans(trans, inode);  	ret = 0;  failed: @@ -6380,7 +6418,6 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,  		}  	} -	btrfs_mark_buffer_dirty(trans, path->nodes[0]);  	/*  	 * We don't need the path anymore, plus inheriting properties, adding  	 * ACLs, security xattrs, orphan item or adding the link, will result in @@ -7011,8 +7048,6 @@ static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)   * @orig_start:	(optional) Return the original file offset of the file extent   * @orig_len:	(optional) Return the original on-disk length of the file extent   * @ram_bytes:	(optional) Return the ram_bytes of the file extent - * @strict:	if true, omit optimizations that might force us into unnecessary - *		cow. e.g., don't trust generation number.   *   * Return:   * >0	and update @len if we can do nocow write @@ -7024,7 +7059,7 @@ static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)   */  noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  			      struct btrfs_file_extent *file_extent, -			      bool nowait, bool strict) +			      bool nowait)  {  	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);  	struct can_nocow_file_extent_args nocow_args = { 0 }; @@ -7077,7 +7112,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	nocow_args.start = offset;  	nocow_args.end = offset + *len - 1; -	nocow_args.strict = strict;  	nocow_args.free_path = true;  	ret = can_nocow_file_extent(path, &key, BTRFS_I(inode), &nocow_args); @@ -8027,31 +8061,45 @@ static int btrfs_rename_exchange(struct inode *old_dir,  	/* src is a subvolume */  	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {  		ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	} else { /* src is an inode */  		ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),  					   BTRFS_I(old_dentry->d_inode),  					   old_name, &old_rename_ctx); -		if (!ret) -			ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); -	} -	if (ret) { -		btrfs_abort_transaction(trans, ret); -		goto out_fail; +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		} +		ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	}  	/* dest is a subvolume */  	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {  		ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	} else { /* dest is an inode */  		ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),  					   BTRFS_I(new_dentry->d_inode),  					   new_name, &new_rename_ctx); -		if (!ret) -			ret = btrfs_update_inode(trans, BTRFS_I(new_inode)); -	} -	if (ret) { -		btrfs_abort_transaction(trans, ret); -		goto out_fail; +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		} +		ret = btrfs_update_inode(trans, BTRFS_I(new_inode)); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	}  	ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode), @@ -8287,16 +8335,23 @@ static int btrfs_rename(struct mnt_idmap *idmap,  	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {  		ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	} else {  		ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),  					   BTRFS_I(d_inode(old_dentry)),  					   &old_fname.disk_name, &rename_ctx); -		if (!ret) -			ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); -	} -	if (ret) { -		btrfs_abort_transaction(trans, ret); -		goto out_fail; +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		} +		ret = btrfs_update_inode(trans, BTRFS_I(old_inode)); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out_fail; +		}  	}  	if (new_inode) { @@ -8304,18 +8359,27 @@ static int btrfs_rename(struct mnt_idmap *idmap,  		if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==  			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {  			ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry); +			if (ret) { +				btrfs_abort_transaction(trans, ret); +				goto out_fail; +			}  			BUG_ON(new_inode->i_nlink == 0);  		} else {  			ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),  						 BTRFS_I(d_inode(new_dentry)),  						 &new_fname.disk_name); +			if (ret) { +				btrfs_abort_transaction(trans, ret); +				goto out_fail; +			}  		} -		if (!ret && new_inode->i_nlink == 0) +		if (new_inode->i_nlink == 0) {  			ret = btrfs_orphan_add(trans,  					BTRFS_I(d_inode(new_dentry))); -		if (ret) { -			btrfs_abort_transaction(trans, ret); -			goto out_fail; +			if (ret) { +				btrfs_abort_transaction(trans, ret); +				goto out_fail; +			}  		}  	} @@ -8655,7 +8719,6 @@ static int btrfs_symlink(struct mnt_idmap *idmap, struct inode *dir,  	ptr = btrfs_file_extent_inline_start(ei);  	write_extent_buffer(leaf, symname, ptr, name_len); -	btrfs_mark_buffer_dirty(trans, leaf);  	btrfs_free_path(path);  	d_instantiate_new(dentry, inode); | 
