diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 925 | 
1 files changed, 129 insertions, 796 deletions
| diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 56032c518b26..6d2bb58d277a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -44,7 +44,6 @@  #include "locking.h"  #include "free-space-cache.h"  #include "inode-map.h" -#include "backref.h"  #include "props.h"  #include "qgroup.h"  #include "delalloc-space.h" @@ -64,7 +63,6 @@ struct btrfs_dio_data {  static const struct inode_operations btrfs_dir_inode_operations;  static const struct inode_operations btrfs_symlink_inode_operations; -static const struct inode_operations btrfs_dir_ro_inode_operations;  static const struct inode_operations btrfs_special_inode_operations;  static const struct inode_operations btrfs_file_inode_operations;  static const struct address_space_operations btrfs_aops; @@ -1479,10 +1477,10 @@ next_slot:  			disk_num_bytes =  				btrfs_file_extent_disk_num_bytes(leaf, fi);  			/* -			 * If extent we got ends before our range starts, skip -			 * to next extent +			 * If the extent we got ends before our current offset, +			 * skip to the next extent.  			 */ -			if (extent_end <= start) { +			if (extent_end <= cur_offset) {  				path->slots[0]++;  				goto next_slot;  			} @@ -2128,7 +2126,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,  							   bio_flags);  			goto out;  		} else if (!skip_sum) { -			ret = btrfs_lookup_bio_sums(inode, bio, NULL); +			ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL);  			if (ret)  				goto out;  		} @@ -2394,649 +2392,6 @@ out:  	return ret;  } -/* snapshot-aware defrag */ -struct sa_defrag_extent_backref { -	struct rb_node node; -	struct old_sa_defrag_extent *old; -	u64 root_id; -	u64 inum; -	u64 file_pos; -	u64 extent_offset; -	u64 num_bytes; -	u64 generation; -}; - -struct old_sa_defrag_extent { -	struct list_head list; -	struct new_sa_defrag_extent *new; - -	u64 extent_offset; -	u64 bytenr; -	u64 offset; -	u64 len; -	int count; -}; - -struct new_sa_defrag_extent { -	struct rb_root root; -	struct list_head head; -	struct btrfs_path *path; -	struct inode *inode; -	u64 file_pos; -	u64 len; -	u64 bytenr; -	u64 disk_len; -	u8 compress_type; -}; - -static int backref_comp(struct sa_defrag_extent_backref *b1, -			struct sa_defrag_extent_backref *b2) -{ -	if (b1->root_id < b2->root_id) -		return -1; -	else if (b1->root_id > b2->root_id) -		return 1; - -	if (b1->inum < b2->inum) -		return -1; -	else if (b1->inum > b2->inum) -		return 1; - -	if (b1->file_pos < b2->file_pos) -		return -1; -	else if (b1->file_pos > b2->file_pos) -		return 1; - -	/* -	 * [------------------------------] ===> (a range of space) -	 *     |<--->|   |<---->| =============> (fs/file tree A) -	 * |<---------------------------->| ===> (fs/file tree B) -	 * -	 * A range of space can refer to two file extents in one tree while -	 * refer to only one file extent in another tree. -	 * -	 * So we may process a disk offset more than one time(two extents in A) -	 * and locate at the same extent(one extent in B), then insert two same -	 * backrefs(both refer to the extent in B). -	 */ -	return 0; -} - -static void backref_insert(struct rb_root *root, -			   struct sa_defrag_extent_backref *backref) -{ -	struct rb_node **p = &root->rb_node; -	struct rb_node *parent = NULL; -	struct sa_defrag_extent_backref *entry; -	int ret; - -	while (*p) { -		parent = *p; -		entry = rb_entry(parent, struct sa_defrag_extent_backref, node); - -		ret = backref_comp(backref, entry); -		if (ret < 0) -			p = &(*p)->rb_left; -		else -			p = &(*p)->rb_right; -	} - -	rb_link_node(&backref->node, parent, p); -	rb_insert_color(&backref->node, root); -} - -/* - * Note the backref might has changed, and in this case we just return 0. - */ -static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, -				       void *ctx) -{ -	struct btrfs_file_extent_item *extent; -	struct old_sa_defrag_extent *old = ctx; -	struct new_sa_defrag_extent *new = old->new; -	struct btrfs_path *path = new->path; -	struct btrfs_key key; -	struct btrfs_root *root; -	struct sa_defrag_extent_backref *backref; -	struct extent_buffer *leaf; -	struct inode *inode = new->inode; -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -	int slot; -	int ret; -	u64 extent_offset; -	u64 num_bytes; - -	if (BTRFS_I(inode)->root->root_key.objectid == root_id && -	    inum == btrfs_ino(BTRFS_I(inode))) -		return 0; - -	key.objectid = root_id; -	key.type = BTRFS_ROOT_ITEM_KEY; -	key.offset = (u64)-1; - -	root = btrfs_read_fs_root_no_name(fs_info, &key); -	if (IS_ERR(root)) { -		if (PTR_ERR(root) == -ENOENT) -			return 0; -		WARN_ON(1); -		btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu", -			 inum, offset, root_id); -		return PTR_ERR(root); -	} - -	key.objectid = inum; -	key.type = BTRFS_EXTENT_DATA_KEY; -	if (offset > (u64)-1 << 32) -		key.offset = 0; -	else -		key.offset = offset; - -	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -	if (WARN_ON(ret < 0)) -		return ret; -	ret = 0; - -	while (1) { -		cond_resched(); - -		leaf = path->nodes[0]; -		slot = path->slots[0]; - -		if (slot >= btrfs_header_nritems(leaf)) { -			ret = btrfs_next_leaf(root, path); -			if (ret < 0) { -				goto out; -			} else if (ret > 0) { -				ret = 0; -				goto out; -			} -			continue; -		} - -		path->slots[0]++; - -		btrfs_item_key_to_cpu(leaf, &key, slot); - -		if (key.objectid > inum) -			goto out; - -		if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY) -			continue; - -		extent = btrfs_item_ptr(leaf, slot, -					struct btrfs_file_extent_item); - -		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) -			continue; - -		/* -		 * 'offset' refers to the exact key.offset, -		 * NOT the 'offset' field in btrfs_extent_data_ref, ie. -		 * (key.offset - extent_offset). -		 */ -		if (key.offset != offset) -			continue; - -		extent_offset = btrfs_file_extent_offset(leaf, extent); -		num_bytes = btrfs_file_extent_num_bytes(leaf, extent); - -		if (extent_offset >= old->extent_offset + old->offset + -		    old->len || extent_offset + num_bytes <= -		    old->extent_offset + old->offset) -			continue; -		break; -	} - -	backref = kmalloc(sizeof(*backref), GFP_NOFS); -	if (!backref) { -		ret = -ENOENT; -		goto out; -	} - -	backref->root_id = root_id; -	backref->inum = inum; -	backref->file_pos = offset; -	backref->num_bytes = num_bytes; -	backref->extent_offset = extent_offset; -	backref->generation = btrfs_file_extent_generation(leaf, extent); -	backref->old = old; -	backref_insert(&new->root, backref); -	old->count++; -out: -	btrfs_release_path(path); -	WARN_ON(ret); -	return ret; -} - -static noinline bool record_extent_backrefs(struct btrfs_path *path, -				   struct new_sa_defrag_extent *new) -{ -	struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); -	struct old_sa_defrag_extent *old, *tmp; -	int ret; - -	new->path = path; - -	list_for_each_entry_safe(old, tmp, &new->head, list) { -		ret = iterate_inodes_from_logical(old->bytenr + -						  old->extent_offset, fs_info, -						  path, record_one_backref, -						  old, false); -		if (ret < 0 && ret != -ENOENT) -			return false; - -		/* no backref to be processed for this extent */ -		if (!old->count) { -			list_del(&old->list); -			kfree(old); -		} -	} - -	if (list_empty(&new->head)) -		return false; - -	return true; -} - -static int relink_is_mergable(struct extent_buffer *leaf, -			      struct btrfs_file_extent_item *fi, -			      struct new_sa_defrag_extent *new) -{ -	if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr) -		return 0; - -	if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) -		return 0; - -	if (btrfs_file_extent_compression(leaf, fi) != new->compress_type) -		return 0; - -	if (btrfs_file_extent_encryption(leaf, fi) || -	    btrfs_file_extent_other_encoding(leaf, fi)) -		return 0; - -	return 1; -} - -/* - * Note the backref might has changed, and in this case we just return 0. - */ -static noinline int relink_extent_backref(struct btrfs_path *path, -				 struct sa_defrag_extent_backref *prev, -				 struct sa_defrag_extent_backref *backref) -{ -	struct btrfs_file_extent_item *extent; -	struct btrfs_file_extent_item *item; -	struct btrfs_ordered_extent *ordered; -	struct btrfs_trans_handle *trans; -	struct btrfs_ref ref = { 0 }; -	struct btrfs_root *root; -	struct btrfs_key key; -	struct extent_buffer *leaf; -	struct old_sa_defrag_extent *old = backref->old; -	struct new_sa_defrag_extent *new = old->new; -	struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); -	struct inode *inode; -	struct extent_state *cached = NULL; -	int ret = 0; -	u64 start; -	u64 len; -	u64 lock_start; -	u64 lock_end; -	bool merge = false; -	int index; - -	if (prev && prev->root_id == backref->root_id && -	    prev->inum == backref->inum && -	    prev->file_pos + prev->num_bytes == backref->file_pos) -		merge = true; - -	/* step 1: get root */ -	key.objectid = backref->root_id; -	key.type = BTRFS_ROOT_ITEM_KEY; -	key.offset = (u64)-1; - -	index = srcu_read_lock(&fs_info->subvol_srcu); - -	root = btrfs_read_fs_root_no_name(fs_info, &key); -	if (IS_ERR(root)) { -		srcu_read_unlock(&fs_info->subvol_srcu, index); -		if (PTR_ERR(root) == -ENOENT) -			return 0; -		return PTR_ERR(root); -	} - -	if (btrfs_root_readonly(root)) { -		srcu_read_unlock(&fs_info->subvol_srcu, index); -		return 0; -	} - -	/* step 2: get inode */ -	key.objectid = backref->inum; -	key.type = BTRFS_INODE_ITEM_KEY; -	key.offset = 0; - -	inode = btrfs_iget(fs_info->sb, &key, root); -	if (IS_ERR(inode)) { -		srcu_read_unlock(&fs_info->subvol_srcu, index); -		return 0; -	} - -	srcu_read_unlock(&fs_info->subvol_srcu, index); - -	/* step 3: relink backref */ -	lock_start = backref->file_pos; -	lock_end = backref->file_pos + backref->num_bytes - 1; -	lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end, -			 &cached); - -	ordered = btrfs_lookup_first_ordered_extent(inode, lock_end); -	if (ordered) { -		btrfs_put_ordered_extent(ordered); -		goto out_unlock; -	} - -	trans = btrfs_join_transaction(root); -	if (IS_ERR(trans)) { -		ret = PTR_ERR(trans); -		goto out_unlock; -	} - -	key.objectid = backref->inum; -	key.type = BTRFS_EXTENT_DATA_KEY; -	key.offset = backref->file_pos; - -	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -	if (ret < 0) { -		goto out_free_path; -	} else if (ret > 0) { -		ret = 0; -		goto out_free_path; -	} - -	extent = btrfs_item_ptr(path->nodes[0], path->slots[0], -				struct btrfs_file_extent_item); - -	if (btrfs_file_extent_generation(path->nodes[0], extent) != -	    backref->generation) -		goto out_free_path; - -	btrfs_release_path(path); - -	start = backref->file_pos; -	if (backref->extent_offset < old->extent_offset + old->offset) -		start += old->extent_offset + old->offset - -			 backref->extent_offset; - -	len = min(backref->extent_offset + backref->num_bytes, -		  old->extent_offset + old->offset + old->len); -	len -= max(backref->extent_offset, old->extent_offset + old->offset); - -	ret = btrfs_drop_extents(trans, root, inode, start, -				 start + len, 1); -	if (ret) -		goto out_free_path; -again: -	key.objectid = btrfs_ino(BTRFS_I(inode)); -	key.type = BTRFS_EXTENT_DATA_KEY; -	key.offset = start; - -	path->leave_spinning = 1; -	if (merge) { -		struct btrfs_file_extent_item *fi; -		u64 extent_len; -		struct btrfs_key found_key; - -		ret = btrfs_search_slot(trans, root, &key, path, 0, 1); -		if (ret < 0) -			goto out_free_path; - -		path->slots[0]--; -		leaf = path->nodes[0]; -		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - -		fi = btrfs_item_ptr(leaf, path->slots[0], -				    struct btrfs_file_extent_item); -		extent_len = btrfs_file_extent_num_bytes(leaf, fi); - -		if (extent_len + found_key.offset == start && -		    relink_is_mergable(leaf, fi, new)) { -			btrfs_set_file_extent_num_bytes(leaf, fi, -							extent_len + len); -			btrfs_mark_buffer_dirty(leaf); -			inode_add_bytes(inode, len); - -			ret = 1; -			goto out_free_path; -		} else { -			merge = false; -			btrfs_release_path(path); -			goto again; -		} -	} - -	ret = btrfs_insert_empty_item(trans, root, path, &key, -					sizeof(*extent)); -	if (ret) { -		btrfs_abort_transaction(trans, ret); -		goto out_free_path; -	} - -	leaf = path->nodes[0]; -	item = btrfs_item_ptr(leaf, path->slots[0], -				struct btrfs_file_extent_item); -	btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr); -	btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len); -	btrfs_set_file_extent_offset(leaf, item, start - new->file_pos); -	btrfs_set_file_extent_num_bytes(leaf, item, len); -	btrfs_set_file_extent_ram_bytes(leaf, item, new->len); -	btrfs_set_file_extent_generation(leaf, item, trans->transid); -	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); -	btrfs_set_file_extent_compression(leaf, item, new->compress_type); -	btrfs_set_file_extent_encryption(leaf, item, 0); -	btrfs_set_file_extent_other_encoding(leaf, item, 0); - -	btrfs_mark_buffer_dirty(leaf); -	inode_add_bytes(inode, len); -	btrfs_release_path(path); - -	btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr, -			       new->disk_len, 0); -	btrfs_init_data_ref(&ref, backref->root_id, backref->inum, -			    new->file_pos);  /* start - extent_offset */ -	ret = btrfs_inc_extent_ref(trans, &ref); -	if (ret) { -		btrfs_abort_transaction(trans, ret); -		goto out_free_path; -	} - -	ret = 1; -out_free_path: -	btrfs_release_path(path); -	path->leave_spinning = 0; -	btrfs_end_transaction(trans); -out_unlock: -	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, -			     &cached); -	iput(inode); -	return ret; -} - -static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) -{ -	struct old_sa_defrag_extent *old, *tmp; - -	if (!new) -		return; - -	list_for_each_entry_safe(old, tmp, &new->head, list) { -		kfree(old); -	} -	kfree(new); -} - -static void relink_file_extents(struct new_sa_defrag_extent *new) -{ -	struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb); -	struct btrfs_path *path; -	struct sa_defrag_extent_backref *backref; -	struct sa_defrag_extent_backref *prev = NULL; -	struct rb_node *node; -	int ret; - -	path = btrfs_alloc_path(); -	if (!path) -		return; - -	if (!record_extent_backrefs(path, new)) { -		btrfs_free_path(path); -		goto out; -	} -	btrfs_release_path(path); - -	while (1) { -		node = rb_first(&new->root); -		if (!node) -			break; -		rb_erase(node, &new->root); - -		backref = rb_entry(node, struct sa_defrag_extent_backref, node); - -		ret = relink_extent_backref(path, prev, backref); -		WARN_ON(ret < 0); - -		kfree(prev); - -		if (ret == 1) -			prev = backref; -		else -			prev = NULL; -		cond_resched(); -	} -	kfree(prev); - -	btrfs_free_path(path); -out: -	free_sa_defrag_extent(new); - -	atomic_dec(&fs_info->defrag_running); -	wake_up(&fs_info->transaction_wait); -} - -static struct new_sa_defrag_extent * -record_old_file_extents(struct inode *inode, -			struct btrfs_ordered_extent *ordered) -{ -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); -	struct btrfs_root *root = BTRFS_I(inode)->root; -	struct btrfs_path *path; -	struct btrfs_key key; -	struct old_sa_defrag_extent *old; -	struct new_sa_defrag_extent *new; -	int ret; - -	new = kmalloc(sizeof(*new), GFP_NOFS); -	if (!new) -		return NULL; - -	new->inode = inode; -	new->file_pos = ordered->file_offset; -	new->len = ordered->len; -	new->bytenr = ordered->start; -	new->disk_len = ordered->disk_len; -	new->compress_type = ordered->compress_type; -	new->root = RB_ROOT; -	INIT_LIST_HEAD(&new->head); - -	path = btrfs_alloc_path(); -	if (!path) -		goto out_kfree; - -	key.objectid = btrfs_ino(BTRFS_I(inode)); -	key.type = BTRFS_EXTENT_DATA_KEY; -	key.offset = new->file_pos; - -	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -	if (ret < 0) -		goto out_free_path; -	if (ret > 0 && path->slots[0] > 0) -		path->slots[0]--; - -	/* find out all the old extents for the file range */ -	while (1) { -		struct btrfs_file_extent_item *extent; -		struct extent_buffer *l; -		int slot; -		u64 num_bytes; -		u64 offset; -		u64 end; -		u64 disk_bytenr; -		u64 extent_offset; - -		l = path->nodes[0]; -		slot = path->slots[0]; - -		if (slot >= btrfs_header_nritems(l)) { -			ret = btrfs_next_leaf(root, path); -			if (ret < 0) -				goto out_free_path; -			else if (ret > 0) -				break; -			continue; -		} - -		btrfs_item_key_to_cpu(l, &key, slot); - -		if (key.objectid != btrfs_ino(BTRFS_I(inode))) -			break; -		if (key.type != BTRFS_EXTENT_DATA_KEY) -			break; -		if (key.offset >= new->file_pos + new->len) -			break; - -		extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item); - -		num_bytes = btrfs_file_extent_num_bytes(l, extent); -		if (key.offset + num_bytes < new->file_pos) -			goto next; - -		disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent); -		if (!disk_bytenr) -			goto next; - -		extent_offset = btrfs_file_extent_offset(l, extent); - -		old = kmalloc(sizeof(*old), GFP_NOFS); -		if (!old) -			goto out_free_path; - -		offset = max(new->file_pos, key.offset); -		end = min(new->file_pos + new->len, key.offset + num_bytes); - -		old->bytenr = disk_bytenr; -		old->extent_offset = extent_offset; -		old->offset = offset - key.offset; -		old->len = end - offset; -		old->new = new; -		old->count = 0; -		list_add_tail(&old->list, &new->head); -next: -		path->slots[0]++; -		cond_resched(); -	} - -	btrfs_free_path(path); -	atomic_inc(&fs_info->defrag_running); - -	return new; - -out_free_path: -	btrfs_free_path(path); -out_kfree: -	free_sa_defrag_extent(new); -	return NULL; -} -  static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,  					 u64 start, u64 len)  { @@ -3064,15 +2419,19 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	struct btrfs_trans_handle *trans = NULL;  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;  	struct extent_state *cached_state = NULL; -	struct new_sa_defrag_extent *new = NULL; +	u64 start, end;  	int compress_type = 0;  	int ret = 0; -	u64 logical_len = ordered_extent->len; +	u64 logical_len = ordered_extent->num_bytes;  	bool freespace_inode;  	bool truncated = false;  	bool range_locked = false;  	bool clear_new_delalloc_bytes = false;  	bool clear_reserved_extent = true; +	unsigned int clear_bits; + +	start = ordered_extent->file_offset; +	end = start + ordered_extent->num_bytes - 1;  	if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&  	    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) && @@ -3086,10 +2445,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		goto out;  	} -	btrfs_free_io_failure_record(BTRFS_I(inode), -			ordered_extent->file_offset, -			ordered_extent->file_offset + -			ordered_extent->len - 1); +	btrfs_free_io_failure_record(BTRFS_I(inode), start, end);  	if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {  		truncated = true; @@ -3107,8 +2463,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		 * space for NOCOW range.  		 * As NOCOW won't cause a new delayed ref, just free the space  		 */ -		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, -				       ordered_extent->len); +		btrfs_qgroup_free_data(inode, NULL, start, +				       ordered_extent->num_bytes);  		btrfs_ordered_update_i_size(inode, 0, ordered_extent);  		if (freespace_inode)  			trans = btrfs_join_transaction_spacecache(root); @@ -3127,23 +2483,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	}  	range_locked = true; -	lock_extent_bits(io_tree, ordered_extent->file_offset, -			 ordered_extent->file_offset + ordered_extent->len - 1, -			 &cached_state); - -	ret = test_range_bit(io_tree, ordered_extent->file_offset, -			ordered_extent->file_offset + ordered_extent->len - 1, -			EXTENT_DEFRAG, 0, cached_state); -	if (ret) { -		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); -		if (0 && last_snapshot >= BTRFS_I(inode)->generation) -			/* the inode is shared */ -			new = record_old_file_extents(inode, ordered_extent); - -		clear_extent_bit(io_tree, ordered_extent->file_offset, -			ordered_extent->file_offset + ordered_extent->len - 1, -			EXTENT_DEFRAG, 0, 0, &cached_state); -	} +	lock_extent_bits(io_tree, start, end, &cached_state);  	if (freespace_inode)  		trans = btrfs_join_transaction_spacecache(root); @@ -3161,31 +2501,30 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		compress_type = ordered_extent->compress_type;  	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {  		BUG_ON(compress_type); -		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset, -				       ordered_extent->len); +		btrfs_qgroup_free_data(inode, NULL, start, +				       ordered_extent->num_bytes);  		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),  						ordered_extent->file_offset,  						ordered_extent->file_offset +  						logical_len);  	} else {  		BUG_ON(root == fs_info->tree_root); -		ret = insert_reserved_file_extent(trans, inode, -						ordered_extent->file_offset, -						ordered_extent->start, -						ordered_extent->disk_len, +		ret = insert_reserved_file_extent(trans, inode, start, +						ordered_extent->disk_bytenr, +						ordered_extent->disk_num_bytes,  						logical_len, logical_len,  						compress_type, 0, 0,  						BTRFS_FILE_EXTENT_REG);  		if (!ret) {  			clear_reserved_extent = false;  			btrfs_release_delalloc_bytes(fs_info, -						     ordered_extent->start, -						     ordered_extent->disk_len); +						ordered_extent->disk_bytenr, +						ordered_extent->disk_num_bytes);  		}  	}  	unpin_extent_cache(&BTRFS_I(inode)->extent_tree, -			   ordered_extent->file_offset, ordered_extent->len, -			   trans->transid); +			   ordered_extent->file_offset, +			   ordered_extent->num_bytes, trans->transid);  	if (ret < 0) {  		btrfs_abort_transaction(trans, ret);  		goto out; @@ -3205,37 +2544,27 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  	}  	ret = 0;  out: -	if (range_locked || clear_new_delalloc_bytes) { -		unsigned int clear_bits = 0; - -		if (range_locked) -			clear_bits |= EXTENT_LOCKED; -		if (clear_new_delalloc_bytes) -			clear_bits |= EXTENT_DELALLOC_NEW; -		clear_extent_bit(&BTRFS_I(inode)->io_tree, -				 ordered_extent->file_offset, -				 ordered_extent->file_offset + -				 ordered_extent->len - 1, -				 clear_bits, -				 (clear_bits & EXTENT_LOCKED) ? 1 : 0, -				 0, &cached_state); -	} +	clear_bits = EXTENT_DEFRAG; +	if (range_locked) +		clear_bits |= EXTENT_LOCKED; +	if (clear_new_delalloc_bytes) +		clear_bits |= EXTENT_DELALLOC_NEW; +	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, +			 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0, +			 &cached_state);  	if (trans)  		btrfs_end_transaction(trans);  	if (ret || truncated) { -		u64 start, end; +		u64 unwritten_start = start;  		if (truncated) -			start = ordered_extent->file_offset + logical_len; -		else -			start = ordered_extent->file_offset; -		end = ordered_extent->file_offset + ordered_extent->len - 1; -		clear_extent_uptodate(io_tree, start, end, NULL); +			unwritten_start += logical_len; +		clear_extent_uptodate(io_tree, unwritten_start, end, NULL);  		/* Drop the cache for the part of the extent we didn't write. */ -		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); +		btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);  		/*  		 * If the ordered extent had an IOERR or something else went @@ -3250,29 +2579,28 @@ out:  		if ((ret || !logical_len) &&  		    clear_reserved_extent &&  		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && -		    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) +		    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { +			/* +			 * Discard the range before returning it back to the +			 * free space pool +			 */ +			if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC)) +				btrfs_discard_extent(fs_info, +						ordered_extent->disk_bytenr, +						ordered_extent->disk_num_bytes, +						NULL);  			btrfs_free_reserved_extent(fs_info, -						   ordered_extent->start, -						   ordered_extent->disk_len, 1); +					ordered_extent->disk_bytenr, +					ordered_extent->disk_num_bytes, 1); +		}  	} -  	/*  	 * This needs to be done to make sure anybody waiting knows we are done  	 * updating everything for this ordered extent.  	 */  	btrfs_remove_ordered_extent(inode, ordered_extent); -	/* for snapshot-aware defrag */ -	if (new) { -		if (ret) { -			free_sa_defrag_extent(new); -			atomic_dec(&fs_info->defrag_running); -		} else { -			relink_file_extents(new); -		} -	} -  	/* once for us */  	btrfs_put_ordered_extent(ordered_extent);  	/* once for the tree */ @@ -4238,18 +3566,30 @@ out:  }  static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, -			       struct inode *dir, u64 objectid, -			       const char *name, int name_len) +			       struct inode *dir, struct dentry *dentry)  {  	struct btrfs_root *root = BTRFS_I(dir)->root; +	struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));  	struct btrfs_path *path;  	struct extent_buffer *leaf;  	struct btrfs_dir_item *di;  	struct btrfs_key key; +	const char *name = dentry->d_name.name; +	int name_len = dentry->d_name.len;  	u64 index;  	int ret; +	u64 objectid;  	u64 dir_ino = btrfs_ino(BTRFS_I(dir)); +	if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) { +		objectid = inode->root->root_key.objectid; +	} else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) { +		objectid = inode->location.objectid; +	} else { +		WARN_ON(1); +		return -EINVAL; +	} +  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; @@ -4271,13 +3611,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  	}  	btrfs_release_path(path); -	ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid, -				 dir_ino, &index, name, name_len); -	if (ret < 0) { -		if (ret != -ENOENT) { -			btrfs_abort_transaction(trans, ret); -			goto out; -		} +	/* +	 * This is a placeholder inode for a subvolume we didn't have a +	 * reference to at the time of the snapshot creation.  In the meantime +	 * we could have renamed the real subvol link into our snapshot, so +	 * depending on btrfs_del_root_ref to return -ENOENT here is incorret. +	 * Instead simply lookup the dir_index_item for this entry so we can +	 * remove it.  Otherwise we know we have a ref to the root and we can +	 * call btrfs_del_root_ref, and it _shouldn't_ fail. +	 */ +	if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {  		di = btrfs_search_dir_index_item(root, path, dir_ino,  						 name, name_len);  		if (IS_ERR_OR_NULL(di)) { @@ -4292,8 +3635,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,  		leaf = path->nodes[0];  		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);  		index = key.offset; +		btrfs_release_path(path); +	} else { +		ret = btrfs_del_root_ref(trans, objectid, +					 root->root_key.objectid, dir_ino, +					 &index, name, name_len); +		if (ret) { +			btrfs_abort_transaction(trans, ret); +			goto out; +		}  	} -	btrfs_release_path(path);  	ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);  	if (ret) { @@ -4487,8 +3838,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)  	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); -	ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid, -				  dentry->d_name.name, dentry->d_name.len); +	ret = btrfs_unlink_subvol(trans, dir, dentry);  	if (ret) {  		err = ret;  		btrfs_abort_transaction(trans, ret); @@ -4583,10 +3933,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)  		return PTR_ERR(trans);  	if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { -		err = btrfs_unlink_subvol(trans, dir, -					  BTRFS_I(inode)->location.objectid, -					  dentry->d_name.name, -					  dentry->d_name.len); +		err = btrfs_unlink_subvol(trans, dir, dentry);  		goto out;  	} @@ -5157,7 +4504,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  	cur_offset = hole_start;  	while (1) {  		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, -				block_end - cur_offset, 0); +				      block_end - cur_offset);  		if (IS_ERR(em)) {  			err = PTR_ERR(em);  			em = NULL; @@ -5728,7 +5075,6 @@ static void inode_tree_add(struct inode *inode)  static void inode_tree_del(struct inode *inode)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);  	struct btrfs_root *root = BTRFS_I(inode)->root;  	int empty = 0; @@ -5741,7 +5087,6 @@ static void inode_tree_del(struct inode *inode)  	spin_unlock(&root->inode_lock);  	if (empty && btrfs_root_refs(&root->root_item) == 0) { -		synchronize_srcu(&fs_info->subvol_srcu);  		spin_lock(&root->inode_lock);  		empty = RB_EMPTY_ROOT(&root->inode_tree);  		spin_unlock(&root->inode_lock); @@ -5843,7 +5188,11 @@ static struct inode *new_simple_dir(struct super_block *s,  	set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);  	inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; -	inode->i_op = &btrfs_dir_ro_inode_operations; +	/* +	 * We only need lookup, the rest is read-only and there's no inode +	 * associated with the dentry +	 */ +	inode->i_op = &simple_dir_inode_operations;  	inode->i_opflags &= ~IOP_XATTR;  	inode->i_fop = &simple_dir_operations;  	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; @@ -6934,18 +6283,27 @@ static noinline int uncompress_inline(struct btrfs_path *path,  	return ret;  } -/* - * a bit scary, this does extent mapping from logical file offset to the disk. - * the ugly parts come from merging extents from the disk with the in-ram - * representation.  This gets more complex because of the data=ordered code, - * where the in-ram extents might be locked pending data=ordered completion. +/** + * btrfs_get_extent - Lookup the first extent overlapping a range in a file. + * @inode:	file to search in + * @page:	page to read extent data into if the extent is inline + * @pg_offset:	offset into @page to copy to + * @start:	file offset + * @len:	length of range starting at @start   * - * This also copies inline extents directly into the page. + * This returns the first &struct extent_map which overlaps with the given + * range, reading it from the B-tree and caching it if necessary. Note that + * there may be more extents which overlap the given range after the returned + * extent_map. + * + * If @page is not NULL and the extent is inline, this also reads the extent + * data directly into the page and marks the extent up to date in the io_tree. + * + * Return: ERR_PTR on error, non-NULL extent_map on success.   */  struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, -				    struct page *page, -				    size_t pg_offset, u64 start, u64 len, -				    int create) +				    struct page *page, size_t pg_offset, +				    u64 start, u64 len)  {  	struct btrfs_fs_info *fs_info = inode->root->fs_info;  	int ret; @@ -6962,7 +6320,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,  	struct extent_map *em = NULL;  	struct extent_map_tree *em_tree = &inode->extent_tree;  	struct extent_io_tree *io_tree = &inode->io_tree; -	const bool new_inline = !page || create;  	read_lock(&em_tree->lock);  	em = lookup_extent_mapping(em_tree, start, len); @@ -7085,8 +6442,7 @@ next:  		goto insert;  	} -	btrfs_extent_item_to_extent_map(inode, path, item, -			new_inline, em); +	btrfs_extent_item_to_extent_map(inode, path, item, !page, em);  	if (extent_type == BTRFS_FILE_EXTENT_REG ||  	    extent_type == BTRFS_FILE_EXTENT_PREALLOC) { @@ -7098,7 +6454,7 @@ next:  		size_t extent_offset;  		size_t copy_size; -		if (new_inline) +		if (!page)  			goto out;  		size = btrfs_file_extent_ram_bytes(leaf, item); @@ -7181,7 +6537,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,  	u64 delalloc_end;  	int err = 0; -	em = btrfs_get_extent(inode, NULL, 0, start, len, 0); +	em = btrfs_get_extent(inode, NULL, 0, start, len);  	if (IS_ERR(em))  		return em;  	/* @@ -7806,7 +7162,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  		goto err;  	} -	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); +	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);  	if (IS_ERR(em)) {  		ret = PTR_ERR(em);  		goto unlock_err; @@ -8358,8 +7714,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,  	 * contention.  	 */  	if (dip->logical_offset == file_offset) { -		ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio, -						file_offset); +		ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset, +					    NULL);  		if (ret)  			return ret;  	} @@ -8872,7 +8228,8 @@ again:  	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,  					page_end - start + 1);  	if (ordered) { -		end = min(page_end, ordered->file_offset + ordered->len - 1); +		end = min(page_end, +			  ordered->file_offset + ordered->num_bytes - 1);  		/*  		 * IO on this page will never be started, so we need  		 * to account for any ordered extents now @@ -9073,7 +8430,6 @@ again:  		ret = VM_FAULT_SIGBUS;  		goto out_unlock;  	} -	ret2 = 0;  	/* page is wholly or partially inside EOF */  	if (page_start + PAGE_SIZE > size) @@ -9097,12 +8453,10 @@ again:  	unlock_extent_cached(io_tree, page_start, page_end, &cached_state); -	if (!ret2) { -		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); -		sb_end_pagefault(inode->i_sb); -		extent_changeset_free(data_reserved); -		return VM_FAULT_LOCKED; -	} +	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); +	sb_end_pagefault(inode->i_sb); +	extent_changeset_free(data_reserved); +	return VM_FAULT_LOCKED;  out_unlock:  	unlock_page(page); @@ -9400,7 +8754,7 @@ void btrfs_destroy_inode(struct inode *inode)  		else {  			btrfs_err(fs_info,  				  "found ordered extent %llu %llu on inode cleanup", -				  ordered->file_offset, ordered->len); +				  ordered->file_offset, ordered->num_bytes);  			btrfs_remove_ordered_extent(inode, ordered);  			btrfs_put_ordered_extent(ordered);  			btrfs_put_ordered_extent(ordered); @@ -9538,7 +8892,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,  	u64 new_ino = btrfs_ino(BTRFS_I(new_inode));  	u64 old_idx = 0;  	u64 new_idx = 0; -	u64 root_objectid;  	int ret;  	bool root_log_pinned = false;  	bool dest_log_pinned = false; @@ -9556,9 +8909,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,  	btrfs_init_log_ctx(&ctx_dest, new_inode);  	/* close the race window with snapshot create/destroy ioctl */ -	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) -		down_read(&fs_info->subvol_sem); -	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) +	if (old_ino == BTRFS_FIRST_FREE_OBJECTID || +	    new_ino == BTRFS_FIRST_FREE_OBJECTID)  		down_read(&fs_info->subvol_sem);  	/* @@ -9645,10 +8997,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,  	/* src is a subvolume */  	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { -		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; -		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, -					  old_dentry->d_name.name, -					  old_dentry->d_name.len); +		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);  	} else { /* src is an inode */  		ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),  					   BTRFS_I(old_dentry->d_inode), @@ -9664,10 +9013,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,  	/* dest is a subvolume */  	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { -		root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; -		ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, -					  new_dentry->d_name.name, -					  new_dentry->d_name.len); +		ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);  	} else { /* dest is an inode */  		ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),  					   BTRFS_I(new_dentry->d_inode), @@ -9792,9 +9138,8 @@ out_fail:  		ret = ret ? ret : ret2;  	}  out_notrans: -	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) -		up_read(&fs_info->subvol_sem); -	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) +	if (new_ino == BTRFS_FIRST_FREE_OBJECTID || +	    old_ino == BTRFS_FIRST_FREE_OBJECTID)  		up_read(&fs_info->subvol_sem);  	ASSERT(list_empty(&ctx_root.list)); @@ -9866,7 +9211,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	struct inode *new_inode = d_inode(new_dentry);  	struct inode *old_inode = d_inode(old_dentry);  	u64 index = 0; -	u64 root_objectid;  	int ret;  	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));  	bool log_pinned = false; @@ -9974,10 +9318,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  				BTRFS_I(old_inode), 1);  	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { -		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; -		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid, -					old_dentry->d_name.name, -					old_dentry->d_name.len); +		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);  	} else {  		ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),  					BTRFS_I(d_inode(old_dentry)), @@ -9996,10 +9337,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  		new_inode->i_ctime = current_time(new_inode);  		if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==  			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { -			root_objectid = BTRFS_I(new_inode)->location.objectid; -			ret = btrfs_unlink_subvol(trans, new_dir, root_objectid, -						new_dentry->d_name.name, -						new_dentry->d_name.len); +			ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);  			BUG_ON(new_inode->i_nlink == 0);  		} else {  			ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir), @@ -10835,7 +10173,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,  		struct btrfs_block_group *bg;  		u64 len = isize - start; -		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); +		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);  		if (IS_ERR(em)) {  			ret = PTR_ERR(em);  			goto out; @@ -11003,11 +10341,6 @@ static const struct inode_operations btrfs_dir_inode_operations = {  	.update_time	= btrfs_update_time,  	.tmpfile        = btrfs_tmpfile,  }; -static const struct inode_operations btrfs_dir_ro_inode_operations = { -	.lookup		= btrfs_lookup, -	.permission	= btrfs_permission, -	.update_time	= btrfs_update_time, -};  static const struct file_operations btrfs_dir_file_operations = {  	.llseek		= generic_file_llseek, | 
