diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/ctree.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 16 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 87 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/send.c | 171 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 3 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/xattr.c | 8 | 
10 files changed, 249 insertions, 63 deletions
| diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 993642199326..6d67f32e648d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,  	parent_nritems = btrfs_header_nritems(parent);  	blocksize = root->nodesize; -	end_slot = parent_nritems; +	end_slot = parent_nritems - 1; -	if (parent_nritems == 1) +	if (parent_nritems <= 1)  		return 0;  	btrfs_set_lock_blocking(parent); -	for (i = start_slot; i < end_slot; i++) { +	for (i = start_slot; i <= end_slot; i++) {  		int close = 1;  		btrfs_node_key(parent, &disk_key, i); @@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,  			other = btrfs_node_blockptr(parent, i - 1);  			close = close_blocks(blocknr, other, blocksize);  		} -		if (!close && i < end_slot - 2) { +		if (!close && i < end_slot) {  			other = btrfs_node_blockptr(parent, i + 1);  			close = close_blocks(blocknr, other, blocksize);  		} diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 571f402d3fc4..6f080451fcb1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,  		return 0;  	} +	if (trans->aborted) +		return 0;  again:  	inode = lookup_free_space_inode(root, block_group, path);  	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { @@ -3243,6 +3245,20 @@ again:  	 */  	BTRFS_I(inode)->generation = 0;  	ret = btrfs_update_inode(trans, root, inode); +	if (ret) { +		/* +		 * So theoretically we could recover from this, simply set the +		 * super cache generation to 0 so we know to invalidate the +		 * cache, but then we'd have to keep track of the block groups +		 * that fail this way so we know we _have_ to reset this cache +		 * before the next commit or risk reading stale cache.  So to +		 * limit our exposure to horrible edge cases lets just abort the +		 * transaction, this only happens in really bad situations +		 * anyway. +		 */ +		btrfs_abort_transaction(trans, root, ret); +		goto out_put; +	}  	WARN_ON(ret);  	if (i_size_read(inode) > 0) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b78bbbac900d..30982bbd31c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,  	mutex_unlock(&inode->i_mutex);  	/* -	 * we want to make sure fsync finds this change -	 * but we haven't joined a transaction running right now. -	 * -	 * Later on, someone is sure to update the inode and get the -	 * real transid recorded. -	 * -	 * We set last_trans now to the fs_info generation + 1, -	 * this will either be one more than the running transaction -	 * or the generation used for the next transaction if there isn't -	 * one running right now. -	 *  	 * We also have to set last_sub_trans to the current log transid,  	 * otherwise subsequent syncs to a file that's been synced in this  	 * transaction will appear to have already occured.  	 */ -	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;  	BTRFS_I(inode)->last_sub_trans = root->log_transid;  	if (num_written > 0) {  		err = generic_write_sync(file, pos, num_written); @@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  	atomic_inc(&root->log_batch);  	/* -	 * check the transaction that last modified this inode -	 * and see if its already been committed -	 */ -	if (!BTRFS_I(inode)->last_trans) { -		mutex_unlock(&inode->i_mutex); -		goto out; -	} - -	/* -	 * if the last transaction that changed this file was before -	 * the current transaction, we can bail out now without any -	 * syncing +	 * If the last transaction that changed this file was before the current +	 * transaction and we have the full sync flag set in our inode, we can +	 * bail out now without any syncing. +	 * +	 * Note that we can't bail out if the full sync flag isn't set. This is +	 * because when the full sync flag is set we start all ordered extents +	 * and wait for them to fully complete - when they complete they update +	 * the inode's last_trans field through: +	 * +	 *     btrfs_finish_ordered_io() -> +	 *         btrfs_update_inode_fallback() -> +	 *             btrfs_update_inode() -> +	 *                 btrfs_set_inode_last_trans() +	 * +	 * So we are sure that last_trans is up to date and can do this check to +	 * bail out safely. For the fast path, when the full sync flag is not +	 * set in our inode, we can not do it because we start only our ordered +	 * extents and don't wait for them to complete (that is when +	 * btrfs_finish_ordered_io runs), so here at this point their last_trans +	 * value might be less than or equals to fs_info->last_trans_committed, +	 * and setting a speculative last_trans for an inode when a buffered +	 * write is made (such as fs_info->generation + 1 for example) would not +	 * be reliable since after setting the value and before fsync is called +	 * any number of transactions can start and commit (transaction kthread +	 * commits the current transaction periodically), and a transaction +	 * commit does not start nor waits for ordered extents to complete.  	 */  	smp_mb();  	if (btrfs_inode_in_log(inode, root->fs_info->generation) || -	    BTRFS_I(inode)->last_trans <= -	    root->fs_info->last_trans_committed) { -		BTRFS_I(inode)->last_trans = 0; - +	    (full_sync && BTRFS_I(inode)->last_trans <= +	     root->fs_info->last_trans_committed)) {  		/*  		 * We'v had everything committed since the last time we were  		 * modified so clear this flag in case it was set for whatever @@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	bool same_page;  	bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);  	u64 ino_size; +	bool truncated_page = false; +	bool updated_inode = false;  	ret = btrfs_wait_ordered_range(inode, offset, len);  	if (ret) @@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	 * entire page.  	 */  	if (same_page && len < PAGE_CACHE_SIZE) { -		if (offset < ino_size) +		if (offset < ino_size) { +			truncated_page = true;  			ret = btrfs_truncate_page(inode, offset, len, 0); +		} else { +			ret = 0; +		}  		goto out_only_mutex;  	}  	/* zero back part of the first page */  	if (offset < ino_size) { +		truncated_page = true;  		ret = btrfs_truncate_page(inode, offset, 0, 0);  		if (ret) {  			mutex_unlock(&inode->i_mutex); @@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  		if (!ret) {  			/* zero the front end of the last page */  			if (tail_start + tail_len < ino_size) { +				truncated_page = true;  				ret = btrfs_truncate_page(inode,  						tail_start + tail_len, 0, 1);  				if (ret) @@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)  	}  	if (lockend < lockstart) { -		mutex_unlock(&inode->i_mutex); -		return 0; +		ret = 0; +		goto out_only_mutex;  	}  	while (1) { @@ -2506,6 +2514,7 @@ out_trans:  	trans->block_rsv = &root->fs_info->trans_block_rsv;  	ret = btrfs_update_inode(trans, root, inode); +	updated_inode = true;  	btrfs_end_transaction(trans, root);  	btrfs_btree_balance_dirty(root);  out_free: @@ -2515,6 +2524,22 @@ out:  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,  			     &cached_state, GFP_NOFS);  out_only_mutex: +	if (!updated_inode && truncated_page && !ret && !err) { +		/* +		 * If we only end up zeroing part of a page, we still need to +		 * update the inode item, so that all the time fields are +		 * updated as well as the necessary btrfs inode in memory fields +		 * for detecting, at fsync time, if the inode isn't yet in the +		 * log tree or it's there but not up to date. +		 */ +		trans = btrfs_start_transaction(root, 1); +		if (IS_ERR(trans)) { +			err = PTR_ERR(trans); +		} else { +			err = btrfs_update_inode(trans, root, inode); +			ret = btrfs_end_transaction(trans, root); +		} +	}  	mutex_unlock(&inode->i_mutex);  	if (ret && !err)  		err = ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a85c23dfcddb..da828cf5e8f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7285,7 +7285,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,  	    ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&  	     em->block_start != EXTENT_MAP_HOLE)) {  		int type; -		int ret;  		u64 block_start, orig_start, orig_block_len, ram_bytes;  		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 534544e08f76..157cc54fc634 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,  			continue;  		if (entry_end(ordered) <= start)  			break; -		if (!list_empty(&ordered->log_list)) -			continue; -		if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) +		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))  			continue;  		list_add(&ordered->log_list, logged_list);  		atomic_inc(&ordered->refs); @@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,  		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,  						   &ordered->flags)); -		if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) -			list_add_tail(&ordered->trans_list, &trans->ordered); +		list_add_tail(&ordered->trans_list, &trans->ordered);  		spin_lock_irq(&log->log_extents_lock[index]);  	}  	spin_unlock_irq(&log->log_extents_lock[index]); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fe5857223515..d6033f540cc7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -230,6 +230,7 @@ struct pending_dir_move {  	u64 parent_ino;  	u64 ino;  	u64 gen; +	bool is_orphan;  	struct list_head update_refs;  }; @@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,  				u64 ino_gen,  				u64 parent_ino,  				struct list_head *new_refs, -				struct list_head *deleted_refs) +				struct list_head *deleted_refs, +				const bool is_orphan)  {  	struct rb_node **p = &sctx->pending_dir_moves.rb_node;  	struct rb_node *parent = NULL; @@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,  	pm->parent_ino = parent_ino;  	pm->ino = ino;  	pm->gen = ino_gen; +	pm->is_orphan = is_orphan;  	INIT_LIST_HEAD(&pm->list);  	INIT_LIST_HEAD(&pm->update_refs);  	RB_CLEAR_NODE(&pm->node); @@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)  	rmdir_ino = dm->rmdir_ino;  	free_waiting_dir_move(sctx, dm); -	ret = get_first_ref(sctx->parent_root, pm->ino, -			    &parent_ino, &parent_gen, name); -	if (ret < 0) -		goto out; - -	ret = get_cur_path(sctx, parent_ino, parent_gen, -			   from_path); -	if (ret < 0) -		goto out; -	ret = fs_path_add_path(from_path, name); +	if (pm->is_orphan) { +		ret = gen_unique_name(sctx, pm->ino, +				      pm->gen, from_path); +	} else { +		ret = get_first_ref(sctx->parent_root, pm->ino, +				    &parent_ino, &parent_gen, name); +		if (ret < 0) +			goto out; +		ret = get_cur_path(sctx, parent_ino, parent_gen, +				   from_path); +		if (ret < 0) +			goto out; +		ret = fs_path_add_path(from_path, name); +	}  	if (ret < 0)  		goto out; @@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)  		LIST_HEAD(deleted_refs);  		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);  		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, -					   &pm->update_refs, &deleted_refs); +					   &pm->update_refs, &deleted_refs, +					   pm->is_orphan);  		if (ret < 0)  			goto out;  		if (rmdir_ino) { @@ -3283,6 +3291,127 @@ out:  	return ret;  } +/* + * We might need to delay a directory rename even when no ancestor directory + * (in the send root) with a higher inode number than ours (sctx->cur_ino) was + * renamed. This happens when we rename a directory to the old name (the name + * in the parent root) of some other unrelated directory that got its rename + * delayed due to some ancestor with higher number that got renamed. + * + * Example: + * + * Parent snapshot: + * .                                       (ino 256) + * |---- a/                                (ino 257) + * |     |---- file                        (ino 260) + * | + * |---- b/                                (ino 258) + * |---- c/                                (ino 259) + * + * Send snapshot: + * .                                       (ino 256) + * |---- a/                                (ino 258) + * |---- x/                                (ino 259) + *       |---- y/                          (ino 257) + *             |----- file                 (ino 260) + * + * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 + * from 'a' to 'x/y' happening first, which in turn depends on the rename of + * inode 259 from 'c' to 'x'. So the order of rename commands the send stream + * must issue is: + * + * 1 - rename 259 from 'c' to 'x' + * 2 - rename 257 from 'a' to 'x/y' + * 3 - rename 258 from 'b' to 'a' + * + * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can + * be done right away and < 0 on error. + */ +static int wait_for_dest_dir_move(struct send_ctx *sctx, +				  struct recorded_ref *parent_ref, +				  const bool is_orphan) +{ +	struct btrfs_path *path; +	struct btrfs_key key; +	struct btrfs_key di_key; +	struct btrfs_dir_item *di; +	u64 left_gen; +	u64 right_gen; +	int ret = 0; + +	if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) +		return 0; + +	path = alloc_path_for_send(); +	if (!path) +		return -ENOMEM; + +	key.objectid = parent_ref->dir; +	key.type = BTRFS_DIR_ITEM_KEY; +	key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); + +	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); +	if (ret < 0) { +		goto out; +	} else if (ret > 0) { +		ret = 0; +		goto out; +	} + +	di = btrfs_match_dir_item_name(sctx->parent_root, path, +				       parent_ref->name, parent_ref->name_len); +	if (!di) { +		ret = 0; +		goto out; +	} +	/* +	 * di_key.objectid has the number of the inode that has a dentry in the +	 * parent directory with the same name that sctx->cur_ino is being +	 * renamed to. We need to check if that inode is in the send root as +	 * well and if it is currently marked as an inode with a pending rename, +	 * if it is, we need to delay the rename of sctx->cur_ino as well, so +	 * that it happens after that other inode is renamed. +	 */ +	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); +	if (di_key.type != BTRFS_INODE_ITEM_KEY) { +		ret = 0; +		goto out; +	} + +	ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, +			     &left_gen, NULL, NULL, NULL, NULL); +	if (ret < 0) +		goto out; +	ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, +			     &right_gen, NULL, NULL, NULL, NULL); +	if (ret < 0) { +		if (ret == -ENOENT) +			ret = 0; +		goto out; +	} + +	/* Different inode, no need to delay the rename of sctx->cur_ino */ +	if (right_gen != left_gen) { +		ret = 0; +		goto out; +	} + +	if (is_waiting_for_move(sctx, di_key.objectid)) { +		ret = add_pending_dir_move(sctx, +					   sctx->cur_ino, +					   sctx->cur_inode_gen, +					   di_key.objectid, +					   &sctx->new_refs, +					   &sctx->deleted_refs, +					   is_orphan); +		if (!ret) +			ret = 1; +	} +out: +	btrfs_free_path(path); +	return ret; +} +  static int wait_for_parent_move(struct send_ctx *sctx,  				struct recorded_ref *parent_ref)  { @@ -3349,7 +3478,8 @@ out:  					   sctx->cur_inode_gen,  					   ino,  					   &sctx->new_refs, -					   &sctx->deleted_refs); +					   &sctx->deleted_refs, +					   false);  		if (!ret)  			ret = 1;  	} @@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)  	int did_overwrite = 0;  	int is_orphan = 0;  	u64 last_dir_ino_rm = 0; +	bool can_rename = true;  verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); @@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);  			}  		} +		if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { +			ret = wait_for_dest_dir_move(sctx, cur, is_orphan); +			if (ret < 0) +				goto out; +			if (ret == 1) { +				can_rename = false; +				*pending_move = 1; +			} +		} +  		/*  		 * link/move the ref to the new place. If we have an orphan  		 * inode, move it and update valid_path. If not, link or move  		 * it depending on the inode mode.  		 */ -		if (is_orphan) { +		if (is_orphan && can_rename) {  			ret = send_rename(sctx, valid_path, cur->full_path);  			if (ret < 0)  				goto out; @@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);  			ret = fs_path_copy(valid_path, cur->full_path);  			if (ret < 0)  				goto out; -		} else { +		} else if (can_rename) {  			if (S_ISDIR(sctx->cur_inode_mode)) {  				/*  				 * Dirs can't be linked, so move it. For moved diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7e80f32550a6..88e51aded6bd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1052,9 +1052,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,  		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);  		if (ret)  			return ret; -		ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); -		if (ret) -			return ret;  	}  	return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9a37f8b39bae..c5b8ba37f88e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1012,7 +1012,7 @@ again:  		base = btrfs_item_ptr_offset(leaf, path->slots[0]);  		while (cur_offset < item_size) { -			extref = (struct btrfs_inode_extref *)base + cur_offset; +			extref = (struct btrfs_inode_extref *)(base + cur_offset);  			victim_name_len = btrfs_inode_extref_name_len(leaf, extref); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cd4d1315aaa9..8222f6f74147 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4903,10 +4903,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)  static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)  {  	struct btrfs_bio *bbio = kzalloc( +		 /* the size of the btrfs_bio */  		sizeof(struct btrfs_bio) + +		/* plus the variable array for the stripes */  		sizeof(struct btrfs_bio_stripe) * (total_stripes) + +		/* plus the variable array for the tgt dev */  		sizeof(int) * (real_stripes) + -		sizeof(u64) * (real_stripes), +		/* +		 * plus the raid_map, which includes both the tgt dev +		 * and the stripes +		 */ +		sizeof(u64) * (total_stripes),  		GFP_NOFS);  	if (!bbio)  		return NULL; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 47b19465f0dc..883b93623bc5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,  					name, name_len, -1);  		if (!di && (flags & XATTR_REPLACE))  			ret = -ENODATA; +		else if (IS_ERR(di)) +			ret = PTR_ERR(di);  		else if (di)  			ret = btrfs_delete_one_dir_name(trans, root, path, di);  		goto out; @@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,  		ASSERT(mutex_is_locked(&inode->i_mutex));  		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),  					name, name_len, 0); -		if (!di) { +		if (!di)  			ret = -ENODATA; +		else if (IS_ERR(di)) +			ret = PTR_ERR(di); +		if (ret)  			goto out; -		}  		btrfs_release_path(path);  		di = NULL;  	} | 
