summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/compression.c44
-rw-r--r--fs/btrfs/ctree.c159
-rw-r--r--fs/btrfs/ctree.h31
-rw-r--r--fs/btrfs/delayed-ref.c6
-rw-r--r--fs/btrfs/dir-item.c45
-rw-r--r--fs/btrfs/disk-io.c228
-rw-r--r--fs/btrfs/export.c10
-rw-r--r--fs/btrfs/extent-tree.c357
-rw-r--r--fs/btrfs/extent_io.c224
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file-item.c10
-rw-r--r--fs/btrfs/file.c497
-rw-r--r--fs/btrfs/free-space-cache.c660
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/inode-map.c3
-rw-r--r--fs/btrfs/inode.c549
-rw-r--r--fs/btrfs/ioctl.c138
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/ordered-data.c10
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/relocation.c51
-rw-r--r--fs/btrfs/root-tree.c6
-rw-r--r--fs/btrfs/super.c21
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/btrfs/tree-log.c92
-rw-r--r--fs/btrfs/volumes.c269
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--fs/btrfs/xattr.c8
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/btrfs/zlib.c3
33 files changed, 2310 insertions, 1187 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 15b5ca2a2606..de34bfad9ec3 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
char *value = NULL;
struct posix_acl *acl;
+ if (!IS_POSIXACL(inode))
+ return NULL;
+
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
return acl;
@@ -84,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
struct posix_acl *acl;
int ret = 0;
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
acl = btrfs_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
@@ -164,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
int ret;
struct posix_acl *acl = NULL;
- if (!is_owner_or_cap(dentry->d_inode))
+ if (!inode_owner_or_capable(dentry->d_inode))
return -EPERM;
if (!IS_POSIXACL(dentry->d_inode))
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ccc991c542df..57c3bb2884ce 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -136,9 +136,8 @@ struct btrfs_inode {
* items we think we'll end up using, and reserved_extents is the number
* of extent items we've reserved metadata for.
*/
- spinlock_t accounting_lock;
atomic_t outstanding_extents;
- int reserved_extents;
+ atomic_t reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f745287fbf2e..41d1d7c70e29 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+ if (!cb)
+ return -ENOMEM;
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
+ if(!bio) {
+ kfree(cb);
+ return -ENOMEM;
+ }
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
atomic_inc(&cb->pending_bios);
@@ -562,7 +568,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret;
+ int ret = -ENOMEM;
u32 *sums;
tree = &BTRFS_I(inode)->io_tree;
@@ -577,6 +583,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+ if (!cb)
+ goto out;
+
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -597,13 +606,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE;
- cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages,
+ cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
GFP_NOFS);
+ if (!cb->compressed_pages)
+ goto fail1;
+
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
for (page_index = 0; page_index < nr_pages; page_index++) {
cb->compressed_pages[page_index] = alloc_page(GFP_NOFS |
__GFP_HIGHMEM);
+ if (!cb->compressed_pages[page_index])
+ goto fail2;
}
cb->nr_pages = nr_pages;
@@ -614,6 +628,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = uncompressed_len;
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
+ if (!comp_bio)
+ goto fail2;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
@@ -647,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
atomic_inc(&cb->pending_bios);
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- btrfs_lookup_bio_sums(root, inode, comp_bio,
- sums);
+ ret = btrfs_lookup_bio_sums(root, inode,
+ comp_bio, sums);
+ BUG_ON(ret);
}
sums += (comp_bio->bi_size + root->sectorsize - 1) /
root->sectorsize;
@@ -673,14 +690,27 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+ BUG_ON(ret);
+ }
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
BUG_ON(ret);
bio_put(comp_bio);
return 0;
+
+fail2:
+ for (page_index = 0; page_index < nr_pages; page_index++)
+ free_page((unsigned long)cb->compressed_pages[page_index]);
+
+ kfree(cb->compressed_pages);
+fail1:
+ kfree(cb);
+out:
+ free_extent_map(em);
+ return ret;
}
static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
@@ -900,7 +930,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
return ret;
}
-void __exit btrfs_exit_compress(void)
+void btrfs_exit_compress(void)
{
free_workspaces();
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b5baff0dccfe..84d7ca1fe0ba 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
- spin_lock(&root->node_lock);
- eb = root->node;
+
+ rcu_read_lock();
+ eb = rcu_dereference(root->node);
extent_buffer_get(eb);
- spin_unlock(&root->node_lock);
+ rcu_read_unlock();
return eb;
}
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
while (1) {
eb = btrfs_root_node(root);
btrfs_tree_lock(eb);
-
- spin_lock(&root->node_lock);
- if (eb == root->node) {
- spin_unlock(&root->node_lock);
+ if (eb == root->node)
break;
- }
- spin_unlock(&root->node_lock);
-
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
}
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
parent_start = 0;
- spin_lock(&root->node_lock);
- root->node = cow;
extent_buffer_get(cow);
- spin_unlock(&root->node_lock);
+ rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
+
+ trace_btrfs_cow_block(root, buf, *cow_ret);
+
return ret;
}
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (!cur) {
cur = read_tree_block(root, blocknr,
blocksize, gen);
+ if (!cur)
+ return -EIO;
} else if (!uptodate) {
btrfs_read_buffer(cur, gen);
}
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
return btrfs_item_offset_nr(leaf, nr - 1);
}
-/*
- * extra debugging checks to make sure all the items in a key are
- * well formed and in the proper order
- */
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
- int level)
-{
- struct extent_buffer *parent = NULL;
- struct extent_buffer *node = path->nodes[level];
- struct btrfs_disk_key parent_key;
- struct btrfs_disk_key node_key;
- int parent_slot;
- int slot;
- struct btrfs_key cpukey;
- u32 nritems = btrfs_header_nritems(node);
-
- if (path->nodes[level + 1])
- parent = path->nodes[level + 1];
-
- slot = path->slots[level];
- BUG_ON(nritems == 0);
- if (parent) {
- parent_slot = path->slots[level + 1];
- btrfs_node_key(parent, &parent_key, parent_slot);
- btrfs_node_key(node, &node_key, 0);
- BUG_ON(memcmp(&parent_key, &node_key,
- sizeof(struct btrfs_disk_key)));
- BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
- btrfs_header_bytenr(node));
- }
- BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
- if (slot != 0) {
- btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
- btrfs_node_key(node, &node_key, slot);
- BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
- }
- if (slot < nritems - 1) {
- btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
- btrfs_node_key(node, &node_key, slot);
- BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
- }
- return 0;
-}
-
-/*
- * extra checking to make sure all the items in a leaf are
- * well formed and in the proper order
- */
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
- int level)
-{
- struct extent_buffer *leaf = path->nodes[level];
- struct extent_buffer *parent = NULL;
- int parent_slot;
- struct btrfs_key cpukey;
- struct btrfs_disk_key parent_key;
- struct btrfs_disk_key leaf_key;
- int slot = path->slots[0];
-
- u32 nritems = btrfs_header_nritems(leaf);
-
- if (path->nodes[level + 1])
- parent = path->nodes[level + 1];
-
- if (nritems == 0)
- return 0;
-
- if (parent) {
- parent_slot = path->slots[level + 1];
- btrfs_node_key(parent, &parent_key, parent_slot);
- btrfs_item_key(leaf, &leaf_key, 0);
-
- BUG_ON(memcmp(&parent_key, &leaf_key,
- sizeof(struct btrfs_disk_key)));
- BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
- btrfs_header_bytenr(leaf));
- }
- if (slot != 0 && slot < nritems - 1) {
- btrfs_item_key(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
- if (comp_keys(&leaf_key, &cpukey) <= 0) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d offset bad key\n", slot);
- BUG_ON(1);
- }
- if (btrfs_item_offset_nr(leaf, slot - 1) !=
- btrfs_item_end_nr(leaf, slot)) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d offset bad\n", slot);
- BUG_ON(1);
- }
- }
- if (slot < nritems - 1) {
- btrfs_item_key(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
- BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
- if (btrfs_item_offset_nr(leaf, slot) !=
- btrfs_item_end_nr(leaf, slot + 1)) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d offset bad\n", slot);
- BUG_ON(1);
- }
- }
- BUG_ON(btrfs_item_offset_nr(leaf, 0) +
- btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
- return 0;
-}
-
-static noinline int check_block(struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- return 0;
- if (level == 0)
- return check_leaf(root, path, level);
- return check_node(root, path, level);
-}
/*
* search for key in the extent_buffer. The items start at offset p,
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
- spin_lock(&root->node_lock);
- root->node = child;
- spin_unlock(&root->node_lock);
+ rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
btrfs_tree_unlock(child);
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
}
}
/* double check we haven't messed things up */
- check_block(root, path, level);
if (orig_ptr !=
btrfs_node_blockptr(path->nodes[level], path->slots[level]))
BUG();
@@ -1798,12 +1677,6 @@ cow_done:
if (!cow)
btrfs_unlock_up_safe(p, level + 1);
- ret = check_block(root, p, level);
- if (ret) {
- ret = -1;
- goto done;
- }
-
ret = bin_search(b, key, level, &slot);
if (level != 0) {
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
- spin_lock(&root->node_lock);
old = root->node;
- root->node = c;
- spin_unlock(&root->node_lock);
+ rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
free_extent_buffer(old);
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
unsigned long ptr;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
if (!ret) {
leaf = path->nodes[0];
@@ -4217,6 +4089,7 @@ find_next_key:
}
btrfs_set_path_blocking(path);
cur = read_node_slot(root, cur, slot);
+ BUG_ON(!cur);
btrfs_tree_lock(cur);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..d47ce8307854 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
#include <linux/wait.h>
#include <linux/slab.h>
#include <linux/kobject.h>
+#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
#include "extent_io.h"
#include "extent_map.h"
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
+extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_BHRfS_M"
@@ -729,6 +731,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -773,9 +784,6 @@ struct btrfs_free_cluster {
/* first extent starting offset */
u64 window_start;
- /* if this cluster simply points at a bitmap in the block group */
- bool points_to_bitmap;
-
struct btrfs_block_group_cache *block_group;
/*
* when a cluster is allocated from a block group, we put the
@@ -1254,6 +1262,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1273,6 +1282,7 @@ struct btrfs_root {
#define BTRFS_INODE_NODUMP (1 << 8)
#define BTRFS_INODE_NOATIME (1 << 9)
#define BTRFS_INODE_DIRSYNC (1 << 10)
+#define BTRFS_INODE_COMPRESS (1 << 11)
/* some macros to generate set/get funcs for the struct fields. This
* assumes there is a lefoo_to_cpu for every type, so lets make a simple
@@ -2147,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 root_objectid, u64 owner, u64 offset);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve, int sinfo);
int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2217,8 +2229,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes);
+ u64 num_bytes, u64 *actual_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type);
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2380,6 +2396,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 dir,
const char *name, u16 name_len,
int mod);
+int verify_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_dir_item *dir_item);
/* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -2516,7 +2535,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct inode *inode);
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
-void btrfs_orphan_cleanup(struct btrfs_root *root);
+int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
@@ -2524,7 +2543,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e807b143b857..bce28f653899 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&head_ref->cluster);
mutex_init(&head_ref->mutex);
+ trace_btrfs_delayed_ref_head(ref, head_ref, action);
+
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
if (existing) {
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
}
full_ref->level = level;
+ trace_btrfs_delayed_tree_ref(ref, full_ref, action);
+
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
if (existing) {
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
full_ref->objectid = owner;
full_ref->offset = offset;
+ trace_btrfs_delayed_data_ref(ref, full_ref, action);
+
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
if (existing) {
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f0cad5ae5be7..c62f02f6ae69 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
ret = PTR_ERR(dir_item);
if (ret == -EEXIST)
goto second_insert;
- goto out;
+ goto out_free;
}
leaf = path->nodes[0];
@@ -170,7 +170,7 @@ second_insert:
/* FIXME, use some real flag for selecting the extra index */
if (root == root->fs_info->tree_root) {
ret = 0;
- goto out;
+ goto out_free;
}
btrfs_release_path(root, path);
@@ -180,7 +180,7 @@ second_insert:
name, name_len);
if (IS_ERR(dir_item)) {
ret2 = PTR_ERR(dir_item);
- goto out;
+ goto out_free;
}
leaf = path->nodes[0];
btrfs_cpu_key_to_disk(&disk_key, location);
@@ -192,7 +192,9 @@ second_insert:
name_ptr = (unsigned long)(dir_item + 1);
write_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
-out:
+
+out_free:
+
btrfs_free_path(path);
if (ret)
return ret;
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
leaf = path->nodes[0];
dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+ if (verify_dir_item(root, leaf, dir_item))
+ return NULL;
+
total_len = btrfs_item_size_nr(leaf, path->slots[0]);
while (cur < total_len) {
this_len = sizeof(*dir_item) +
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
}
return ret;
}
+
+int verify_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_dir_item *dir_item)
+{
+ u16 namelen = BTRFS_NAME_LEN;
+ u8 type = btrfs_dir_type(leaf, dir_item);
+
+ if (type >= BTRFS_FT_MAX) {
+ printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
+ (int)type);
+ return 1;
+ }
+
+ if (type == BTRFS_FT_XATTR)
+ namelen = XATTR_NAME_MAX;
+
+ if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+ printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n",
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ return 1;
+ }
+
+ /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+ if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
+ printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b531c36455d8..d7a7315bd031 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -29,6 +29,7 @@
#include <linux/crc32c.h>
#include <linux/slab.h>
#include <linux/migrate.h>
+#include <asm/unaligned.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
void btrfs_csum_final(u32 crc, char *result)
{
- *(__le32 *)result = ~cpu_to_le32(crc);
+ put_unaligned_le32(~crc, result);
}
/*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
int num_copies = 0;
int mirror_num = 0;
+ clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
!verify_parent_transid(io_tree, eb, parent_transid))
return ret;
+ /*
+ * This buffer's crc is fine, but its contents are corrupted, so
+ * there is no reason to read the other copies, they won't be
+ * any less wrong.
+ */
+ if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
+ return ret;
+
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
if (num_copies == 1)
@@ -359,10 +369,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
+ if (page->private == EXTENT_PAGE_PRIVATE) {
+ WARN_ON(1);
goto out;
- if (!page->private)
+ }
+ if (!page->private) {
+ WARN_ON(1);
goto out;
+ }
len = page->private >> 2;
WARN_ON(len == 0);
@@ -415,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
return ret;
}
+#define CORRUPT(reason, eb, root, slot) \
+ printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
+ "root=%llu, slot=%d\n", reason, \
+ (unsigned long long)btrfs_header_bytenr(eb), \
+ (unsigned long long)root->objectid, slot)
+
+static noinline int check_leaf(struct btrfs_root *root,
+ struct extent_buffer *leaf)
+{
+ struct btrfs_key key;
+ struct btrfs_key leaf_key;
+ u32 nritems = btrfs_header_nritems(leaf);
+ int slot;
+
+ if (nritems == 0)
+ return 0;
+
+ /* Check the 0 item */
+ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ CORRUPT("invalid item offset size pair", leaf, root, 0);
+ return -EIO;
+ }
+
+ /*
+ * Check to make sure each items keys are in the correct order and their
+ * offsets make sense. We only have to loop through nritems-1 because
+ * we check the current slot against the next slot, which verifies the
+ * next slot's offset+size makes sense and that the current's slot
+ * offset is correct.
+ */
+ for (slot = 0; slot < nritems - 1; slot++) {
+ btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+ btrfs_item_key_to_cpu(leaf, &key, slot + 1);
+
+ /* Make sure the keys are in the right order */
+ if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
+ CORRUPT("bad key order", leaf, root, slot);
+ return -EIO;
+ }
+
+ /*
+ * Make sure the offset and ends are right, remember that the
+ * item data starts at the end of the leaf and grows towards the
+ * front.
+ */
+ if (btrfs_item_offset_nr(leaf, slot) !=
+ btrfs_item_end_nr(leaf, slot + 1)) {
+ CORRUPT("slot offset bad", leaf, root, slot);
+ return -EIO;
+ }
+
+ /*
+ * Check to make sure that we don't point outside of the leaf,
+ * just incase all the items are consistent to eachother, but
+ * all point outside of the leaf.
+ */
+ if (btrfs_item_end_nr(leaf, slot) >
+ BTRFS_LEAF_DATA_SIZE(root)) {
+ CORRUPT("slot end outside of leaf", leaf, root, slot);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
{
@@ -481,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
btrfs_set_buffer_lockdep_class(eb, found_level);
ret = csum_tree_block(root, eb, 1);
- if (ret)
+ if (ret) {
+ ret = -EIO;
+ goto err;
+ }
+
+ /*
+ * If this is a leaf block and it is corrupt, set the corrupt bit so
+ * that we don't try and read the other copies of this block, just
+ * return -EIO.
+ */
+ if (found_level == 0 && check_leaf(root, eb)) {
+ set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
+ }
end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
end = eb->start + end - 1;
@@ -843,7 +936,6 @@ static const struct address_space_operations btree_aops = {
.writepages = btree_writepages,
.releasepage = btree_releasepage,
.invalidatepage = btree_invalidatepage,
- .sync_page = block_sync_page,
#ifdef CONFIG_MIGRATION
.migratepage = btree_migratepage,
#endif
@@ -1156,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
root, fs_info, location->objectid);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ kfree(root);
+ return ERR_PTR(-ENOMEM);
+ }
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
if (ret == 0) {
l = path->nodes[0];
@@ -1327,82 +1422,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
}
/*
- * this unplugs every device on the box, and it is only used when page
- * is null
- */
-static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
- struct btrfs_device *device;
- struct btrfs_fs_info *info;
-
- info = (struct btrfs_fs_info *)bdi->unplug_io_data;
- list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
- if (!device->bdev)
- continue;
-
- bdi = blk_get_backing_dev_info(device->bdev);
- if (bdi->unplug_io_fn)
- bdi->unplug_io_fn(bdi, page);
- }
-}
-
-static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
- struct inode *inode;
- struct extent_map_tree *em_tree;
- struct extent_map *em;
- struct address_space *mapping;
- u64 offset;
-
- /* the generic O_DIRECT read code does this */
- if (1 || !page) {
- __unplug_io_fn(bdi, page);
- return;
- }
-
- /*
- * page->mapping may change at any time. Get a consistent copy
- * and use that for everything below
- */
- smp_mb();
- mapping = page->mapping;
- if (!mapping)
- return;
-
- inode = mapping->host;
-
- /*
- * don't do the expensive searching for a small number of
- * devices
- */
- if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) {
- __unplug_io_fn(bdi, page);
- return;
- }
-
- offset = page_offset(page);
-
- em_tree = &BTRFS_I(inode)->extent_tree;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- read_unlock(&em_tree->lock);
- if (!em) {
- __unplug_io_fn(bdi, page);
- return;
- }
-
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- free_extent_map(em);
- __unplug_io_fn(bdi, page);
- return;
- }
- offset = offset - em->start;
- btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree,
- em->block_start + offset, page);
- free_extent_map(em);
-}
-
-/*
* If this fails, caller must call bdi_destroy() to get rid of the
* bdi again.
*/
@@ -1416,8 +1435,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
return err;
bdi->ra_pages = default_backing_dev_info.ra_pages;
- bdi->unplug_io_fn = btrfs_unplug_io_fn;
- bdi->unplug_io_data = info;
bdi->congested_fn = btrfs_congested_fn;
bdi->congested_data = info;
return 0;
@@ -1550,6 +1567,7 @@ static int transaction_kthread(void *arg)
spin_unlock(&root->fs_info->new_trans_lock);
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
if (transid == trans->transid) {
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
@@ -1627,6 +1645,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_bdi;
}
+ fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1757,6 +1777,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ /*
+ * In the long term, we'll store the compression type in the super
+ * block, and it'll be used for per file compression control.
+ */
+ fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+
ret = btrfs_parse_options(tree_root, options);
if (ret) {
err = ret;
@@ -1962,6 +1988,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
+ ret = btrfs_init_space_info(fs_info);
+ if (ret) {
+ printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+ goto fail_block_groups;
+ }
+
ret = btrfs_read_block_groups(extent_root);
if (ret) {
printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -2053,9 +2085,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
- btrfs_orphan_cleanup(fs_info->fs_root);
- btrfs_orphan_cleanup(fs_info->tree_root);
+ err = btrfs_orphan_cleanup(fs_info->fs_root);
+ if (!err)
+ err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
+ if (err) {
+ close_ctree(tree_root);
+ return ERR_PTR(err);
+ }
}
return tree_root;
@@ -2430,8 +2467,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
+ int err;
+
root_objectid = gang[i]->root_key.objectid;
- btrfs_orphan_cleanup(gang[i]);
+ err = btrfs_orphan_cleanup(gang[i]);
+ if (err)
+ return err;
}
root_objectid++;
}
@@ -2453,10 +2494,14 @@ int btrfs_commit_super(struct btrfs_root *root)
up_write(&root->fs_info->cleanup_work_sem);
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
/* run commit again to drop the original snapshot */
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_commit_transaction(trans, root);
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
@@ -2484,7 +2529,7 @@ int close_ctree(struct btrfs_root *root)
* ERROR state on disk.
*
* 2. when btrfs flips readonly just in btrfs_commit_super,
- * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+ * and in such case, btrfs cannot write sb via btrfs_commit_super,
* and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
* btrfs will cleanup all FS resources first and write sb then.
*/
@@ -2554,6 +2599,8 @@ int close_ctree(struct btrfs_root *root)
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
+ kfree(fs_info);
+
return 0;
}
@@ -2936,7 +2983,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
break;
/* opt_discard */
- ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+ if (btrfs_test_opt(root, DISCARD))
+ ret = btrfs_error_discard_extent(root, start,
+ end + 1 - start,
+ NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
btrfs_error_unpin_extent_range(root, start, end);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 9786963b07e5..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
@@ -171,6 +175,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
int ret;
path = btrfs_alloc_path();
+ if (!path)
+ return ERR_PTR(-ENOMEM);
if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b55269340cec..f619c3cb13b7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -36,8 +36,6 @@
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int sinfo);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
@@ -320,11 +318,6 @@ static int caching_kthread(void *data)
if (!path)
return -ENOMEM;
- exclude_super_stripes(extent_root, block_group);
- spin_lock(&block_group->space_info->lock);
- block_group->space_info->bytes_readonly += block_group->bytes_super;
- spin_unlock(&block_group->space_info->lock);
-
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
/*
@@ -447,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
* allocate blocks for the tree root we can't do the fast caching since
* we likely hold important locks.
*/
- if (!trans->transaction->in_commit &&
+ if (trans && (!trans->transaction->in_commit) &&
(root && root != root->fs_info->tree_root)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
@@ -467,14 +460,16 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
cache->cached = BTRFS_CACHE_NO;
}
spin_unlock(&cache->lock);
- if (ret == 1)
+ if (ret == 1) {
+ free_excluded_extents(fs_info->extent_root, cache);
return 0;
+ }
}
if (load_cache_only)
return 0;
- caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
BUG_ON(!caching_ctl);
INIT_LIST_HEAD(&caching_ctl->list);
@@ -1743,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
-static void btrfs_issue_discard(struct block_device *bdev,
+static int btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
{
- blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
+ return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
}
static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes)
+ u64 num_bytes, u64 *actual_bytes)
{
int ret;
- u64 map_length = num_bytes;
+ u64 discarded_bytes = 0;
struct btrfs_multi_bio *multi = NULL;
- if (!btrfs_test_opt(root, DISCARD))
- return 0;
/* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
- bytenr, &map_length, &multi, 0);
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
+ bytenr, &num_bytes, &multi, 0);
if (!ret) {
struct btrfs_bio_stripe *stripe = multi->stripes;
int i;
- if (map_length > num_bytes)
- map_length = num_bytes;
for (i = 0; i < multi->num_stripes; i++, stripe++) {
- btrfs_issue_discard(stripe->dev->bdev,
- stripe->physical,
- map_length);
+ ret = btrfs_issue_discard(stripe->dev->bdev,
+ stripe->physical,
+ stripe->length);
+ if (!ret)
+ discarded_bytes += stripe->length;
+ else if (ret != -EOPNOTSUPP)
+ break;
}
kfree(multi);
}
+ if (discarded_bytes && ret == -EOPNOTSUPP)
+ ret = 0;
+
+ if (actual_bytes)
+ *actual_bytes = discarded_bytes;
+
return ret;
}
@@ -3344,21 +3345,24 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
- int pause = 1;
+ long time_left;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
max_reclaim = min(reserved, to_reclaim);
- while (1) {
+ while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
nr_pages = min_t(unsigned long, nr_pages,
@@ -3371,17 +3375,31 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(pause);
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ time_left = schedule_timeout_interruptible(1);
+
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3588,10 +3606,23 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes > 0) {
if (dest) {
- block_rsv_add_bytes(dest, num_bytes, 0);
- } else {
+ spin_lock(&dest->lock);
+ if (!dest->full) {
+ u64 bytes_to_add;
+
+ bytes_to_add = dest->size - dest->reserved;
+ bytes_to_add = min(num_bytes, bytes_to_add);
+ dest->reserved += bytes_to_add;
+ if (dest->reserved >= dest->size)
+ dest->full = 1;
+ num_bytes -= bytes_to_add;
+ }
+ spin_unlock(&dest->lock);
+ }
+ if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3824,6 +3855,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -3968,6 +4000,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve;
int nr_extents;
+ int reserved_extents;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
@@ -3975,26 +4008,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
- spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
- if (nr_extents > BTRFS_I(inode)->reserved_extents) {
- nr_extents -= BTRFS_I(inode)->reserved_extents;
+ reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+
+ if (nr_extents > reserved_extents) {
+ nr_extents -= reserved_extents;
to_reserve = calc_trans_metadata_size(root, nr_extents);
} else {
nr_extents = 0;
to_reserve = 0;
}
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
return ret;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->reserved_extents += nr_extents;
+ atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
@@ -4009,19 +4040,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 to_free;
int nr_extents;
+ int reserved_extents;
num_bytes = ALIGN(num_bytes, root->sectorsize);
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
- if (nr_extents < BTRFS_I(inode)->reserved_extents) {
- nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
- BTRFS_I(inode)->reserved_extents -= nr_extents;
- } else {
- nr_extents = 0;
- }
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
+ reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+ do {
+ int old, new;
+
+ nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
+ if (nr_extents >= reserved_extents) {
+ nr_extents = 0;
+ break;
+ }
+ old = reserved_extents;
+ nr_extents = reserved_extents - nr_extents;
+ new = reserved_extents - nr_extents;
+ old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
+ reserved_extents, new);
+ if (likely(old == reserved_extents))
+ break;
+ reserved_extents = old;
+ } while (1);
to_free = calc_csum_metadata_size(inode, num_bytes);
if (nr_extents > 0)
@@ -4112,6 +4154,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4163,6 +4206,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4193,8 +4237,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
* update size of reserved extents. this function may return -EAGAIN
* if 'reserve' is true or 'sinfo' is false.
*/
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int sinfo)
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve, int sinfo)
{
int ret = 0;
if (sinfo) {
@@ -4213,6 +4257,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4332,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
if (ret)
break;
- ret = btrfs_discard_extent(root, start, end + 1 - start);
+ if (btrfs_test_opt(root, DISCARD))
+ ret = btrfs_discard_extent(root, start,
+ end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
unpin_extent_range(root, start, end);
@@ -4673,10 +4720,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
btrfs_add_free_space(cache, buf->start, buf->len);
- ret = update_reserved_bytes(cache, buf->len, 0, 0);
+ ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
if (ret == -EAGAIN) {
/* block group became read-only */
- update_reserved_bytes(cache, buf->len, 0, 1);
+ btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
goto out;
}
@@ -4691,6 +4738,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
@@ -4712,6 +4760,11 @@ pin:
}
}
out:
+ /*
+ * Deleting the buffer, clear the corrupt flag since it doesn't matter
+ * anymore.
+ */
+ clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
btrfs_put_block_group(cache);
}
@@ -5159,7 +5212,7 @@ checks:
search_start - offset);
BUG_ON(offset > search_start);
- ret = update_reserved_bytes(block_group, num_bytes, 1,
+ ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
(data & BTRFS_BLOCK_GROUP_DATA));
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
@@ -5355,7 +5408,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -5365,6 +5418,8 @@ again:
dump_space_info(sinfo, num_bytes, 1);
}
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
+
return ret;
}
@@ -5380,12 +5435,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
return -ENOSPC;
}
- ret = btrfs_discard_extent(root, start, len);
+ if (btrfs_test_opt(root, DISCARD))
+ ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
- update_reserved_bytes(cache, len, 0, 1);
+ btrfs_update_reserved_bytes(cache, len, 0, 1);
btrfs_put_block_group(cache);
+ trace_btrfs_reserved_extent_free(root, start, len);
+
return ret;
}
@@ -5412,7 +5470,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5582,7 +5641,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
put_caching_control(caching_ctl);
}
- ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
+ ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
BUG_ON(ret);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -5633,6 +5692,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize)
{
struct btrfs_block_rsv *block_rsv;
+ struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
int ret;
block_rsv = get_block_rsv(trans, root);
@@ -5640,14 +5700,39 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(trans, root, block_rsv,
blocksize, 0);
- if (ret)
+ /*
+ * If we couldn't reserve metadata bytes try and use some from
+ * the global reserve.
+ */
+ if (ret && block_rsv != global_rsv) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
return ERR_PTR(ret);
+ } else if (ret) {
+ return ERR_PTR(ret);
+ }
return block_rsv;
}
ret = block_rsv_use_bytes(block_rsv, blocksize);
if (!ret)
return block_rsv;
+ if (ret) {
+ WARN_ON(1);
+ ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
+ 0);
+ if (!ret) {
+ spin_lock(&block_rsv->lock);
+ block_rsv->size += blocksize;
+ spin_unlock(&block_rsv->lock);
+ return block_rsv;
+ } else if (ret && block_rsv != global_rsv) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
+ }
+ }
return ERR_PTR(-ENOSPC);
}
@@ -5989,6 +6074,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(root, bytenr, blocksize, generation);
+ if (!next)
+ return -EIO;
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
@@ -6221,6 +6308,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
BUG_ON(!wc);
trans = btrfs_start_transaction(tree_root, 0);
+ BUG_ON(IS_ERR(trans));
+
if (block_rsv)
trans->block_rsv = block_rsv;
@@ -6318,6 +6407,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
btrfs_end_transaction_throttle(trans, tree_root);
trans = btrfs_start_transaction(tree_root, 0);
+ BUG_ON(IS_ERR(trans));
if (block_rsv)
trans->block_rsv = block_rsv;
}
@@ -6377,10 +6467,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
wc = kzalloc(sizeof(*wc), GFP_NOFS);
- BUG_ON(!wc);
+ if (!wc) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
@@ -6446,6 +6540,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start,
int ret = 0;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
+ if (!ra)
+ return -ENOMEM;
mutex_lock(&inode->i_mutex);
first_index = start >> PAGE_CACHE_SHIFT;
@@ -6531,7 +6627,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
u64 end = start + extent_key->offset - 1;
em = alloc_extent_map(GFP_NOFS);
- BUG_ON(!em || IS_ERR(em));
+ BUG_ON(!em);
em->start = start;
em->len = extent_key->offset;
@@ -6836,7 +6932,11 @@ static noinline int get_new_locations(struct inode *reloc_inode,
}
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ if (exts != *extents)
+ kfree(exts);
+ return -ENOMEM;
+ }
cur_pos = extent_key->objectid - offset;
last_byte = extent_key->objectid + extent_key->offset;
@@ -6878,6 +6978,10 @@ static noinline int get_new_locations(struct inode *reloc_inode,
struct disk_extent *old = exts;
max *= 2;
exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
+ if (!exts) {
+ ret = -ENOMEM;
+ goto out;
+ }
memcpy(exts, old, sizeof(*exts) * nr);
if (old != *extents)
kfree(old);
@@ -7360,7 +7464,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
int ret;
new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
- BUG_ON(!new_extent);
+ if (!new_extent)
+ return -ENOMEM;
ref = btrfs_lookup_leaf_ref(root, leaf->start);
BUG_ON(!ref);
@@ -7477,7 +7582,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
BUG_ON(reloc_root->commit_root != NULL);
while (1) {
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
mutex_lock(&root->fs_info->drop_mutex);
ret = btrfs_drop_snapshot(trans, reloc_root);
@@ -7535,7 +7640,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
if (found) {
trans = btrfs_start_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
}
@@ -7546,7 +7651,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
BUG_ON(!reloc_root);
- btrfs_orphan_cleanup(reloc_root);
+ ret = btrfs_orphan_cleanup(reloc_root);
+ BUG_ON(ret);
return 0;
}
@@ -7564,7 +7670,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
return 0;
root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
- BUG_ON(!root_item);
+ if (!root_item)
+ return -ENOMEM;
ret = btrfs_copy_root(trans, root, root->commit_root,
&eb, BTRFS_TREE_RELOC_OBJECTID);
@@ -7590,7 +7697,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
&root_key);
- BUG_ON(!reloc_root);
+ BUG_ON(IS_ERR(reloc_root));
reloc_root->last_trans = trans->transid;
reloc_root->commit_root = NULL;
reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
@@ -7779,7 +7886,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
trans = btrfs_start_transaction(extent_root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
if (extent_key->objectid == 0) {
ret = del_extent_zero(trans, extent_root, path, extent_key);
@@ -7843,6 +7950,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
eb = read_tree_block(found_root, block_start,
block_size, 0);
+ if (!eb) {
+ ret = -EIO;
+ goto out;
+ }
btrfs_tree_lock(eb);
BUG_ON(level != btrfs_header_level(eb));
@@ -8013,6 +8124,13 @@ out:
return ret;
}
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.
@@ -8270,6 +8388,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_block_group_cache_done(block_group);
+ /*
+ * We haven't cached this block group, which means we could
+ * possibly have excluded extents on this block group.
+ */
+ if (block_group->cached == BTRFS_CACHE_NO)
+ free_excluded_extents(info->extent_root, block_group);
+
btrfs_remove_free_space_cache(block_group);
btrfs_put_block_group(block_group);
@@ -8385,6 +8510,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache->sectorsize = root->sectorsize;
/*
+ * We need to exclude the super stripes now so that the space
+ * info has super bytes accounted for, otherwise we'll think
+ * we have more space than we actually do.
+ */
+ exclude_super_stripes(root, cache);
+
+ /*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
* find any space, or we are empty, and we can just add all
@@ -8392,12 +8524,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
- exclude_super_stripes(root, cache);
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
free_excluded_extents(root, cache);
} else if (btrfs_block_group_used(&cache->item) == 0) {
- exclude_super_stripes(root, cache);
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
@@ -8539,6 +8669,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
BUG_ON(!block_group);
BUG_ON(!block_group->ro);
+ /*
+ * Free the reserved super bytes from this block group before
+ * remove it.
+ */
+ free_excluded_extents(root, block_group);
+
memcpy(&key, &block_group->key, sizeof(key));
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
@@ -8642,13 +8778,84 @@ out:
return ret;
}
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_space_info *space_info;
+ int ret;
+
+ ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
+ &space_info);
+ if (ret)
+ return ret;
+
+ ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
+ &space_info);
+ if (ret)
+ return ret;
+
+ ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
+ &space_info);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
return unpin_extent_range(root, start, end);
}
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes)
+ u64 num_bytes, u64 *actual_bytes)
{
- return btrfs_discard_extent(root, bytenr, num_bytes);
+ return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+}
+
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache = NULL;
+ u64 group_trimmed;
+ u64 start;
+ u64 end;
+ u64 trimmed = 0;
+ int ret = 0;
+
+ cache = btrfs_lookup_block_group(fs_info, range->start);
+
+ while (cache) {
+ if (cache->key.objectid >= (range->start + range->len)) {
+ btrfs_put_block_group(cache);
+ break;
+ }
+
+ start = max(range->start, cache->key.objectid);
+ end = min(range->start + range->len,
+ cache->key.objectid + cache->key.offset);
+
+ if (end - start >= range->minlen) {
+ if (!block_group_cache_done(cache)) {
+ ret = cache_block_group(cache, NULL, root, 0);
+ if (!ret)
+ wait_block_group_cache_done(cache);
+ }
+ ret = btrfs_trim_block_group(cache,
+ &group_trimmed,
+ start,
+ end,
+ range->minlen);
+
+ trimmed += group_trimmed;
+ if (ret) {
+ btrfs_put_block_group(cache);
+ break;
+ }
+ }
+
+ cache = next_block_group(fs_info->tree_root, cache);
+ }
+
+ range->len = trimmed;
+ return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e993cf1766e..20ddb28602a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+ unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
+ u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
- if (state->end >= cur_start && (state->state & bits)) {
+ if (contig && found && state->start > last + 1)
+ break;
+ if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
+ last = state->end;
+ } else if (contig && found) {
+ break;
}
node = rb_next(node);
if (!node)
@@ -1865,7 +1871,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
bio_get(bio);
if (tree->ops && tree->ops->submit_bio_hook)
- tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags, start);
else
submit_bio(rw, bio);
@@ -1920,6 +1926,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
nr = bio_get_nr_vecs(bdev);
bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ if (!bio)
+ return -ENOMEM;
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
@@ -1944,6 +1952,7 @@ void set_page_extent_mapped(struct page *page)
static void set_page_extent_head(struct page *page, unsigned long len)
{
+ WARN_ON(!PagePrivate(page));
set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
}
@@ -2126,7 +2135,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
ret = __extent_read_full_page(tree, page, get_extent, &bio, 0,
&bio_flags);
if (bio)
- submit_one_bio(READ, bio, 0, bio_flags);
+ ret = submit_one_bio(READ, bio, 0, bio_flags);
return ret;
}
@@ -2179,10 +2188,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
unsigned long nr_written = 0;
if (wbc->sync_mode == WB_SYNC_ALL)
- write_flags = WRITE_SYNC_PLUG;
+ write_flags = WRITE_SYNC;
else
write_flags = WRITE;
+ trace___extent_writepage(page, inode, wbc);
+
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2819,9 +2830,17 @@ int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
*/
- clear_extent_bit(tree, start, end,
+ ret = clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM),
0, 0, NULL, mask);
+
+ /* if clear_extent_bit failed for enomem reasons,
+ * we can't allow the release to continue.
+ */
+ if (ret < 0)
+ ret = 0;
+ else
+ ret = 1;
}
return ret;
}
@@ -2901,6 +2920,46 @@ out:
return sector;
}
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+ u64 offset,
+ u64 last,
+ get_extent_t *get_extent)
+{
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+ struct extent_map *em;
+ u64 len;
+
+ if (offset >= last)
+ return NULL;
+
+ while(1) {
+ len = last - offset;
+ if (len == 0)
+ break;
+ len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ em = get_extent(inode, NULL, 0, offset, len, 0);
+ if (!em || IS_ERR(em))
+ return em;
+
+ /* if this isn't a hole return it */
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+ em->block_start != EXTENT_MAP_HOLE) {
+ return em;
+ }
+
+ /* this is a hole, advance to the next extent */
+ offset = extent_map_end(em);
+ free_extent_map(em);
+ if (offset >= last)
+ break;
+ }
+ return NULL;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -2910,16 +2969,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
+ u64 last_for_get_extent = 0;
u64 disko = 0;
+ u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
- u64 em_start = 0, em_len = 0;
+ u64 em_start = 0;
+ u64 em_len = 0;
+ u64 em_end = 0;
unsigned long emflags;
- int hole = 0;
if (len == 0)
return -EINVAL;
@@ -2929,6 +2991,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ /*
+ * lookup the last file extent. We're not using i_size here
+ * because there might be preallocation past i_size
+ */
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@@ -2942,18 +3008,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- /* No extents, just return */
+ /* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_free_path(path);
- return 0;
+ /* have to trust i_size as the end */
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ } else {
+ /*
+ * remember the start of the last extent. There are a
+ * bunch of different factors that go into the length of the
+ * extent, so its much less complex to remember where it started
+ */
+ last = found_key.offset;
+ last_for_get_extent = last + 1;
}
- last = found_key.offset;
btrfs_free_path(path);
+ /*
+ * we might have some extents allocated but more delalloc past those
+ * extents. so, we trust isize unless the start of the last extent is
+ * beyond isize
+ */
+ if (last < isize) {
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ }
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -2962,22 +3048,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- hole = 0;
- off = em->start + em->len;
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
- if (em->block_start == EXTENT_MAP_HOLE) {
- hole = 1;
- goto next;
- }
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
- em_start = em->start;
- em_len = em->len;
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
+ em_end = extent_map_end(em);
+ em_len = em_end - em_start;
+ emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -2988,42 +3090,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
-next:
- emflags = em->flags;
free_extent_map(em);
em = NULL;
- if (!end) {
- em = get_extent(inode, NULL, 0, off, max - off, 0);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- emflags = em->flags;
- }
-
- if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ if ((em_start >= last) || em_len == (u64)-1 ||
+ (last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- if (em_start == last) {
+ /* now scan forward to see if this is really the last extent. */
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
-
- if (!hole) {
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
}
out_free:
free_extent_map(em);
@@ -3192,7 +3286,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
}
if (!PageUptodate(p))
uptodate = 0;
- unlock_page(p);
+
+ /*
+ * see below about how we avoid a nasty race with release page
+ * and why we unlock later
+ */
+ if (i != 0)
+ unlock_page(p);
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3216,9 +3316,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
+
+ /*
+ * there is a race where release page may have
+ * tried to find this extent buffer in the radix
+ * but failed. It will tell the VM it is safe to
+ * reclaim the, and it will clear the page private bit.
+ * We must make sure to set the page private bit properly
+ * after the extent buffer is in the radix tree so
+ * it doesn't get lost
+ */
+ set_page_extent_mapped(eb->first_page);
+ set_page_extent_head(eb->first_page, eb->len);
+ if (!page0)
+ unlock_page(eb->first_page);
return eb;
free_eb:
+ if (eb->first_page && !page0)
+ unlock_page(eb->first_page);
+
if (!atomic_dec_and_test(&eb->refs))
return exists;
btrfs_release_extent_buffer(eb);
@@ -3269,10 +3386,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
continue;
lock_page(page);
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
if (i == 0)
set_page_extent_head(page, eb->len);
- else
- set_page_private(page, EXTENT_PAGE_PRIVATE);
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
@@ -3462,6 +3580,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
+
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
+ if (i == 0)
+ set_page_extent_head(page, eb->len);
+
if (inc_all_pages)
page_cache_get(page);
if (!PageUptodate(page)) {
@@ -3567,6 +3692,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
"wanted %lu %lu\n", (unsigned long long)eb->start,
eb->len, start, min_len);
WARN_ON(1);
+ return -EINVAL;
}
p = extent_buffer_page(eb, i);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..f62c5442835d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -31,6 +31,7 @@
#define EXTENT_BUFFER_UPTODATE 0
#define EXTENT_BUFFER_BLOCKING 1
#define EXTENT_BUFFER_DIRTY 2
+#define EXTENT_BUFFER_CORRUPT 3
/* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@@ -191,7 +192,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
+ u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce12530..2b6c12e983b3 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, mask);
- if (!em || IS_ERR(em))
- return em;
+ if (!em)
+ return NULL;
em->in_tree = 0;
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a562a250ae77..a6a9d4e8b491 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
file_key.objectid = objectid;
file_key.offset = pos;
btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
if (bio->bi_size > PAGE_CACHE_SIZE * 8)
path->reada = 2;
@@ -536,6 +539,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
root = root->fs_info->csum_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
while (1) {
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -548,7 +553,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
+ } else if (ret < 0) {
+ goto out;
}
+
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c800d58f3013..656bc0a892b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -45,14 +45,14 @@
* and be replaced with calls into generic code.
*/
static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
- int write_bytes,
+ size_t write_bytes,
struct page **prepared_pages,
struct iov_iter *i)
{
size_t copied = 0;
+ size_t total_copied = 0;
int pg = 0;
int offset = pos & (PAGE_CACHE_SIZE - 1);
- int total_copied = 0;
while (write_bytes > 0) {
size_t count = min_t(size_t,
@@ -70,14 +70,26 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
/* Return to btrfs_file_aio_write to fault page */
- if (unlikely(copied == 0)) {
+ if (unlikely(copied == 0))
break;
- }
if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
offset += copied;
@@ -96,8 +108,6 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
{
size_t i;
for (i = 0; i < num_pages; i++) {
- if (!pages[i])
- break;
/* page checked is some magic around finding pages that
* have been modified without going through btrfs_set_page_dirty
* clear it here
@@ -117,13 +127,12 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
* this also makes the decision about creating an inline extent vs
* doing real data extents, marking pages dirty and delalloc as required.
*/
-static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct file *file,
- struct page **pages,
- size_t num_pages,
- loff_t pos,
- size_t write_bytes)
+static noinline int dirty_and_release_pages(struct btrfs_root *root,
+ struct file *file,
+ struct page **pages,
+ size_t num_pages,
+ loff_t pos,
+ size_t write_bytes)
{
int err = 0;
int i;
@@ -141,7 +150,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
NULL);
- BUG_ON(err);
+ if (err)
+ return err;
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
@@ -149,13 +159,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
ClearPageChecked(p);
set_page_dirty(p);
}
- if (end_pos > isize) {
+
+ /*
+ * we've only changed i_size in ram, and we haven't updated
+ * the disk i_size. There is no need to log the inode
+ * at this time.
+ */
+ if (end_pos > isize)
i_size_write(inode, end_pos);
- /* we've only changed i_size in ram, and we haven't updated
- * the disk i_size. There is no need to log the inode
- * at this time.
- */
- }
return 0;
}
@@ -186,6 +197,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split = alloc_extent_map(GFP_NOFS);
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!split || !split2);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
@@ -596,6 +608,8 @@ again:
key.offset = split;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
if (ret > 0 && path->slots[0] > 0)
path->slots[0]--;
@@ -762,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -776,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -783,21 +819,33 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
if (start_pos > inode->i_size) {
- err = btrfs_cont_expand(inode, start_pos);
+ err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
if (err)
return err;
}
- memset(pages, 0, num_pages * sizeof(struct page *));
again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
+ faili = i - 1;
err = -ENOMEM;
- BUG_ON(1);
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -837,187 +885,103 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static noinline ssize_t __btrfs_buffered_write(struct file *file,
+ struct iov_iter *i,
+ loff_t pos)
{
- struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
- struct iov_iter i;
- loff_t *ppos = &iocb->ki_pos;
- loff_t start_pos;
- ssize_t num_written = 0;
- ssize_t err = 0;
- size_t count;
- size_t ocount;
- int ret = 0;
- int nrptrs;
unsigned long first_index;
unsigned long last_index;
- int will_write;
- int buffered = 0;
- int copied = 0;
- int dirty_pages = 0;
-
- will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
- (file->f_flags & O_DIRECT));
-
- pinned[0] = NULL;
- pinned[1] = NULL;
-
- start_pos = pos;
-
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
- mutex_lock(&inode->i_mutex);
-
- err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
- if (err)
- goto out;
- count = ocount;
-
- current->backing_dev_info = inode->i_mapping->backing_dev_info;
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
-
- if (count == 0)
- goto out;
-
- err = file_remove_suid(file);
- if (err)
- goto out;
-
- /*
- * If BTRFS flips readonly due to some impossible error
- * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
- * although we have opened a file as writable, we have
- * to stop this write operation to ensure FS consistency.
- */
- if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- err = -EROFS;
- goto out;
- }
-
- file_update_time(file);
- BTRFS_I(inode)->sequence++;
-
- if (unlikely(file->f_flags & O_DIRECT)) {
- num_written = generic_file_direct_write(iocb, iov, &nr_segs,
- pos, ppos, count,
- ocount);
- /*
- * the generic O_DIRECT will update in-memory i_size after the
- * DIOs are done. But our endio handlers that update the on
- * disk i_size never update past the in memory i_size. So we
- * need one more update here to catch any additions to the
- * file
- */
- if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
- mark_inode_dirty(inode);
- }
-
- if (num_written < 0) {
- ret = num_written;
- num_written = 0;
- goto out;
- } else if (num_written == count) {
- /* pick up pos changes done by the generic code */
- pos = *ppos;
- goto out;
- }
- /*
- * We are going to do buffered for the rest of the range, so we
- * need to make sure to invalidate the buffered pages when we're
- * done.
- */
- buffered = 1;
- pos += num_written;
- }
+ size_t num_written = 0;
+ int nrptrs;
+ int ret;
- iov_iter_init(&i, iov, nr_segs, count, num_written);
- nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
+ nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
-
- /* generic_write_checks can change our pos */
- start_pos = pos;
+ if (!pages)
+ return -ENOMEM;
first_index = pos >> PAGE_CACHE_SHIFT;
- last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
-
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
+ last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
- while (iov_iter_count(&i) > 0) {
+ while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
- size_t write_bytes = min(iov_iter_count(&i),
+ size_t write_bytes = min(iov_iter_count(i),
nrptrs * (size_t)PAGE_CACHE_SIZE -
offset);
- size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ size_t num_pages = (write_bytes + offset +
+ PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ size_t dirty_pages;
+ size_t copied;
WARN_ON(num_pages > nrptrs);
- memset(pages, 0, sizeof(struct page *) * nrptrs);
/*
* Fault pages before locking them in prepare_pages
* to avoid recursive lock
*/
- if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+ if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
ret = -EFAULT;
- goto out;
+ break;
}
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret)
- goto out;
+ break;
+ /*
+ * This is going to setup the pages array with the number of
+ * pages we want, so we don't really need to worry about the
+ * contents of pages from loop to loop
+ */
ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
- goto out;
+ break;
}
copied = btrfs_copy_from_user(pos, num_pages,
- write_bytes, pages, &i);
- dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ write_bytes, pages, i);
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
+ /*
+ * If we had a short copy we need to release the excess delaloc
+ * bytes we reserved. We need to increment outstanding_extents
+ * because btrfs_delalloc_release_space will decrement it, but
+ * we still have an outstanding extent for the chunk we actually
+ * managed to copy.
+ */
if (num_pages > dirty_pages) {
if (copied > 0)
atomic_inc(
@@ -1028,43 +992,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
}
if (copied > 0) {
- dirty_and_release_pages(NULL, root, file, pages,
- dirty_pages, pos, copied);
+ ret = dirty_and_release_pages(root, file, pages,
+ dirty_pages, pos,
+ copied);
+ if (ret) {
+ btrfs_delalloc_release_space(inode,
+ dirty_pages << PAGE_CACHE_SHIFT);
+ btrfs_drop_pages(pages, num_pages);
+ break;
+ }
}
btrfs_drop_pages(pages, num_pages);
- if (copied > 0) {
- if (will_write) {
- filemap_fdatawrite_range(inode->i_mapping, pos,
- pos + copied - 1);
- } else {
- balance_dirty_pages_ratelimited_nr(
- inode->i_mapping,
- dirty_pages);
- if (dirty_pages <
- (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
- }
- }
+ cond_resched();
+
+ balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+ dirty_pages);
+ if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+ btrfs_btree_balance_dirty(root, 1);
+ btrfs_throttle(root);
pos += copied;
num_written += copied;
+ }
- cond_resched();
+ kfree(pages);
+
+ return num_written ? num_written : ret;
+}
+
+static ssize_t __btrfs_direct_write(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos,
+ loff_t *ppos, size_t count, size_t ocount)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = fdentry(file)->d_inode;
+ struct iov_iter i;
+ ssize_t written;
+ ssize_t written_buffered;
+ loff_t endbyte;
+ int err;
+
+ written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+ count, ocount);
+
+ /*
+ * the generic O_DIRECT will update in-memory i_size after the
+ * DIOs are done. But our endio handlers that update the on
+ * disk i_size never update past the in memory i_size. So we
+ * need one more update here to catch any additions to the
+ * file
+ */
+ if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
+ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+ mark_inode_dirty(inode);
}
+
+ if (written < 0 || written == count)
+ return written;
+
+ pos += written;
+ count -= written;
+ iov_iter_init(&i, iov, nr_segs, count, written);
+ written_buffered = __btrfs_buffered_write(file, &i, pos);
+ if (written_buffered < 0) {
+ err = written_buffered;
+ goto out;
+ }
+ endbyte = pos + written_buffered - 1;
+ err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
+ if (err)
+ goto out;
+ written += written_buffered;
+ *ppos = pos + written_buffered;
+ invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
+ endbyte >> PAGE_CACHE_SHIFT);
out:
- mutex_unlock(&inode->i_mutex);
- if (ret)
- err = ret;
+ return written ? written : err;
+}
- kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
- *ppos = pos;
+static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ loff_t *ppos = &iocb->ki_pos;
+ ssize_t num_written = 0;
+ ssize_t err = 0;
+ size_t count, ocount;
+
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+ mutex_lock(&inode->i_mutex);
+
+ err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
+ if (err) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+ count = ocount;
+
+ current->backing_dev_info = inode->i_mapping->backing_dev_info;
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+ if (err) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+
+ if (count == 0) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+
+ err = file_remove_suid(file);
+ if (err) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+
+ /*
+ * If BTRFS flips readonly due to some impossible error
+ * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+ * although we have opened a file as writable, we have
+ * to stop this write operation to ensure FS consistency.
+ */
+ if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ mutex_unlock(&inode->i_mutex);
+ err = -EROFS;
+ goto out;
+ }
+
+ file_update_time(file);
+ BTRFS_I(inode)->sequence++;
+
+ if (unlikely(file->f_flags & O_DIRECT)) {
+ num_written = __btrfs_direct_write(iocb, iov, nr_segs,
+ pos, ppos, count, ocount);
+ } else {
+ struct iov_iter i;
+
+ iov_iter_init(&i, iov, nr_segs, count, num_written);
+
+ num_written = __btrfs_buffered_write(file, &i, pos);
+ if (num_written > 0)
+ *ppos = pos + num_written;
+ }
+
+ mutex_unlock(&inode->i_mutex);
/*
* we want to make sure fsync finds this change
@@ -1079,43 +1157,12 @@ out:
* one running right now.
*/
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
-
- if (num_written > 0 && will_write) {
- struct btrfs_trans_handle *trans;
-
- err = btrfs_wait_ordered_range(inode, start_pos, num_written);
- if (err)
+ if (num_written > 0 || num_written == -EIOCBQUEUED) {
+ err = generic_write_sync(file, pos, num_written);
+ if (err < 0 && num_written > 0)
num_written = err;
-
- if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- num_written = PTR_ERR(trans);
- goto done;
- }
- mutex_lock(&inode->i_mutex);
- ret = btrfs_log_dentry_safe(trans, root,
- file->f_dentry);
- mutex_unlock(&inode->i_mutex);
- if (ret == 0) {
- ret = btrfs_sync_log(trans, root);
- if (ret == 0)
- btrfs_end_transaction(trans, root);
- else
- btrfs_commit_transaction(trans, root);
- } else if (ret != BTRFS_NO_LOG_SYNC) {
- btrfs_commit_transaction(trans, root);
- } else {
- btrfs_end_transaction(trans, root);
- }
- }
- if (file->f_flags & O_DIRECT && buffered) {
- invalidate_mapping_pages(inode->i_mapping,
- start_pos >> PAGE_CACHE_SHIFT,
- (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
- }
}
-done:
+out:
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
@@ -1158,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync)
int ret = 0;
struct btrfs_trans_handle *trans;
+ trace_btrfs_sync_file(file, datasync);
/* we wait first, since the writeback may change the inode */
root->log_batch++;
@@ -1285,7 +1333,8 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out;
if (alloc_start > inode->i_size) {
- ret = btrfs_cont_expand(inode, alloc_start);
+ ret = btrfs_cont_expand(inode, i_size_read(inode),
+ alloc_start);
if (ret)
goto out;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 60d684266959..0037427d8a9d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -393,7 +393,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
break;
need_loop = 1;
- e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+ e = kmem_cache_zalloc(btrfs_free_space_cachep,
+ GFP_NOFS);
if (!e) {
kunmap(page);
unlock_page(page);
@@ -405,7 +406,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
e->bytes = le64_to_cpu(entry->bytes);
if (!e->bytes) {
kunmap(page);
- kfree(e);
+ kmem_cache_free(btrfs_free_space_cachep, e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
@@ -420,7 +421,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
kunmap(page);
- kfree(e);
+ kmem_cache_free(
+ btrfs_free_space_cachep, e);
unlock_page(page);
page_cache_release(page);
goto free_cache;
@@ -987,11 +989,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group,
return entry;
}
-static void unlink_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_space *info)
+static inline void
+__unlink_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info)
{
rb_erase(&info->offset_index, &block_group->free_space_offset);
block_group->free_extents--;
+}
+
+static void unlink_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info)
+{
+ __unlink_free_space(block_group, info);
block_group->free_space -= info->bytes;
}
@@ -1016,14 +1025,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
u64 max_bytes;
u64 bitmap_bytes;
u64 extent_bytes;
+ u64 size = block_group->key.offset;
/*
* The goal is to keep the total amount of memory used per 1gb of space
* at or below 32k, so we need to adjust how much memory we allow to be
* used by extent based free space tracking
*/
- max_bytes = MAX_CACHE_BYTES_PER_GIG *
- (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
+ if (size < 1024 * 1024 * 1024)
+ max_bytes = MAX_CACHE_BYTES_PER_GIG;
+ else
+ max_bytes = MAX_CACHE_BYTES_PER_GIG *
+ div64_u64(size, 1024 * 1024 * 1024);
/*
* we want to account for 1 more bitmap than what we have so we can make
@@ -1171,6 +1184,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
recalculate_thresholds(block_group);
}
+static void free_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *bitmap_info)
+{
+ unlink_free_space(block_group, bitmap_info);
+ kfree(bitmap_info->bitmap);
+ kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
+ block_group->total_bitmaps--;
+ recalculate_thresholds(block_group);
+}
+
static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *bitmap_info,
u64 *offset, u64 *bytes)
@@ -1195,6 +1218,7 @@ again:
*/
search_start = *offset;
search_bytes = *bytes;
+ search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(block_group, bitmap_info, &search_start,
&search_bytes);
BUG_ON(ret < 0 || search_start != *offset);
@@ -1211,13 +1235,8 @@ again:
if (*bytes) {
struct rb_node *next = rb_next(&bitmap_info->offset_index);
- if (!bitmap_info->bytes) {
- unlink_free_space(block_group, bitmap_info);
- kfree(bitmap_info->bitmap);
- kfree(bitmap_info);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ if (!bitmap_info->bytes)
+ free_bitmap(block_group, bitmap_info);
/*
* no entry after this bitmap, but we still have bytes to
@@ -1250,13 +1269,8 @@ again:
return -EAGAIN;
goto again;
- } else if (!bitmap_info->bytes) {
- unlink_free_space(block_group, bitmap_info);
- kfree(bitmap_info->bitmap);
- kfree(bitmap_info);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ } else if (!bitmap_info->bytes)
+ free_bitmap(block_group, bitmap_info);
return 0;
}
@@ -1273,9 +1287,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
* If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap
*/
- if (block_group->free_extents < block_group->extents_thresh &&
- info->bytes > block_group->sectorsize * 4)
- return 0;
+ if (block_group->free_extents < block_group->extents_thresh) {
+ /*
+ * If this block group has some small extents we don't want to
+ * use up all of our free slots in the cache with them, we want
+ * to reserve them to larger extents, however if we have plent
+ * of cache left then go ahead an dadd them, no sense in adding
+ * the overhead of a bitmap if we don't have to.
+ */
+ if (info->bytes <= block_group->sectorsize * 4) {
+ if (block_group->free_extents * 2 <=
+ block_group->extents_thresh)
+ return 0;
+ } else {
+ return 0;
+ }
+ }
/*
* some block groups are so tiny they can't be enveloped by a bitmap, so
@@ -1330,8 +1357,8 @@ new_bitmap:
/* no pre-allocated info, allocate a new one */
if (!info) {
- info = kzalloc(sizeof(struct btrfs_free_space),
- GFP_NOFS);
+ info = kmem_cache_zalloc(btrfs_free_space_cachep,
+ GFP_NOFS);
if (!info) {
spin_lock(&block_group->tree_lock);
ret = -ENOMEM;
@@ -1353,28 +1380,20 @@ out:
if (info) {
if (info->bitmap)
kfree(info->bitmap);
- kfree(info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
}
return ret;
}
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes)
+bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_space *info, bool update_stat)
{
- struct btrfs_free_space *right_info = NULL;
- struct btrfs_free_space *left_info = NULL;
- struct btrfs_free_space *info = NULL;
- int ret = 0;
-
- info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
- if (!info)
- return -ENOMEM;
-
- info->offset = offset;
- info->bytes = bytes;
-
- spin_lock(&block_group->tree_lock);
+ struct btrfs_free_space *left_info;
+ struct btrfs_free_space *right_info;
+ bool merged = false;
+ u64 offset = info->offset;
+ u64 bytes = info->bytes;
/*
* first we want to see if there is free space adjacent to the range we
@@ -1388,40 +1407,65 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
else
left_info = tree_search_offset(block_group, offset - 1, 0, 0);
- /*
- * If there was no extent directly to the left or right of this new
- * extent then we know we're going to have to allocate a new extent, so
- * before we do that see if we need to drop this into a bitmap
- */
- if ((!left_info || left_info->bitmap) &&
- (!right_info || right_info->bitmap)) {
- ret = insert_into_bitmap(block_group, info);
-
- if (ret < 0) {
- goto out;
- } else if (ret) {
- ret = 0;
- goto out;
- }
- }
-
if (right_info && !right_info->bitmap) {
- unlink_free_space(block_group, right_info);
+ if (update_stat)
+ unlink_free_space(block_group, right_info);
+ else
+ __unlink_free_space(block_group, right_info);
info->bytes += right_info->bytes;
- kfree(right_info);
+ kmem_cache_free(btrfs_free_space_cachep, right_info);
+ merged = true;
}
if (left_info && !left_info->bitmap &&
left_info->offset + left_info->bytes == offset) {
- unlink_free_space(block_group, left_info);
+ if (update_stat)
+ unlink_free_space(block_group, left_info);
+ else
+ __unlink_free_space(block_group, left_info);
info->offset = left_info->offset;
info->bytes += left_info->bytes;
- kfree(left_info);
+ kmem_cache_free(btrfs_free_space_cachep, left_info);
+ merged = true;
}
+ return merged;
+}
+
+int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+ u64 offset, u64 bytes)
+{
+ struct btrfs_free_space *info;
+ int ret = 0;
+
+ info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
+
+ info->offset = offset;
+ info->bytes = bytes;
+
+ spin_lock(&block_group->tree_lock);
+
+ if (try_merge_free_space(block_group, info, true))
+ goto link;
+
+ /*
+ * There was no extent directly to the left or right of this new
+ * extent then we know we're going to have to allocate a new extent, so
+ * before we do that see if we need to drop this into a bitmap
+ */
+ ret = insert_into_bitmap(block_group, info);
+ if (ret < 0) {
+ goto out;
+ } else if (ret) {
+ ret = 0;
+ goto out;
+ }
+link:
ret = link_free_space(block_group, info);
if (ret)
- kfree(info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
out:
spin_unlock(&block_group->tree_lock);
@@ -1491,7 +1535,7 @@ again:
kfree(info->bitmap);
block_group->total_bitmaps--;
}
- kfree(info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
goto out_lock;
}
@@ -1527,7 +1571,7 @@ again:
/* the hole we're creating ends at the end
* of the info struct, just free the info
*/
- kfree(info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
}
spin_unlock(&block_group->tree_lock);
@@ -1600,29 +1644,28 @@ __btrfs_return_cluster_to_free_space(
{
struct btrfs_free_space *entry;
struct rb_node *node;
- bool bitmap;
spin_lock(&cluster->lock);
if (cluster->block_group != block_group)
goto out;
- bitmap = cluster->points_to_bitmap;
cluster->block_group = NULL;
cluster->window_start = 0;
list_del_init(&cluster->block_group_list);
- cluster->points_to_bitmap = false;
-
- if (bitmap)
- goto out;
node = rb_first(&cluster->root);
while (node) {
+ bool bitmap;
+
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root);
- BUG_ON(entry->bitmap);
+
+ bitmap = (entry->bitmap != NULL);
+ if (!bitmap)
+ try_merge_free_space(block_group, entry, false);
tree_insert_offset(&block_group->free_space_offset,
- entry->offset, &entry->offset_index, 0);
+ entry->offset, &entry->offset_index, bitmap);
}
cluster->root = RB_ROOT;
@@ -1659,7 +1702,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
unlink_free_space(block_group, info);
if (info->bitmap)
kfree(info->bitmap);
- kfree(info);
+ kmem_cache_free(btrfs_free_space_cachep, info);
if (need_resched()) {
spin_unlock(&block_group->tree_lock);
cond_resched();
@@ -1685,19 +1728,14 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
ret = offset;
if (entry->bitmap) {
bitmap_clear_bits(block_group, entry, offset, bytes);
- if (!entry->bytes) {
- unlink_free_space(block_group, entry);
- kfree(entry->bitmap);
- kfree(entry);
- block_group->total_bitmaps--;
- recalculate_thresholds(block_group);
- }
+ if (!entry->bytes)
+ free_bitmap(block_group, entry);
} else {
unlink_free_space(block_group, entry);
entry->offset += bytes;
entry->bytes -= bytes;
if (!entry->bytes)
- kfree(entry);
+ kmem_cache_free(btrfs_free_space_cachep, entry);
else
link_free_space(block_group, entry);
}
@@ -1750,48 +1788,24 @@ int btrfs_return_cluster_to_free_space(
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
+ struct btrfs_free_space *entry,
u64 bytes, u64 min_start)
{
- struct btrfs_free_space *entry;
int err;
u64 search_start = cluster->window_start;
u64 search_bytes = bytes;
u64 ret = 0;
- spin_lock(&block_group->tree_lock);
- spin_lock(&cluster->lock);
-
- if (!cluster->points_to_bitmap)
- goto out;
-
- if (cluster->block_group != block_group)
- goto out;
-
- /*
- * search_start is the beginning of the bitmap, but at some point it may
- * be a good idea to point to the actual start of the free area in the
- * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
- * to 1 to make sure we get the bitmap entry
- */
- entry = tree_search_offset(block_group,
- offset_to_bitmap(block_group, search_start),
- 1, 0);
- if (!entry || !entry->bitmap)
- goto out;
-
search_start = min_start;
search_bytes = bytes;
err = search_bitmap(block_group, entry, &search_start,
&search_bytes);
if (err)
- goto out;
+ return 0;
ret = search_start;
bitmap_clear_bits(block_group, entry, ret, bytes);
-out:
- spin_unlock(&cluster->lock);
- spin_unlock(&block_group->tree_lock);
return ret;
}
@@ -1809,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct rb_node *node;
u64 ret = 0;
- if (cluster->points_to_bitmap)
- return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
- min_start);
-
spin_lock(&cluster->lock);
if (bytes > cluster->max_size)
goto out;
@@ -1825,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
goto out;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
-
while(1) {
- if (entry->bytes < bytes || entry->offset < min_start) {
+ if (entry->bytes < bytes ||
+ (!entry->bitmap && entry->offset < min_start)) {
struct rb_node *node;
node = rb_next(&entry->offset_index);
@@ -1837,20 +1847,53 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
offset_index);
continue;
}
- ret = entry->offset;
- entry->offset += bytes;
- entry->bytes -= bytes;
+ if (entry->bitmap) {
+ ret = btrfs_alloc_from_bitmap(block_group,
+ cluster, entry, bytes,
+ min_start);
+ if (ret == 0) {
+ struct rb_node *node;
+ node = rb_next(&entry->offset_index);
+ if (!node)
+ break;
+ entry = rb_entry(node, struct btrfs_free_space,
+ offset_index);
+ continue;
+ }
+ } else {
- if (entry->bytes == 0) {
- rb_erase(&entry->offset_index, &cluster->root);
- kfree(entry);
+ ret = entry->offset;
+
+ entry->offset += bytes;
+ entry->bytes -= bytes;
}
+
+ if (entry->bytes == 0)
+ rb_erase(&entry->offset_index, &cluster->root);
break;
}
out:
spin_unlock(&cluster->lock);
+ if (!ret)
+ return 0;
+
+ spin_lock(&block_group->tree_lock);
+
+ block_group->free_space -= bytes;
+ if (entry->bytes == 0) {
+ block_group->free_extents--;
+ if (entry->bitmap) {
+ kfree(entry->bitmap);
+ block_group->total_bitmaps--;
+ recalculate_thresholds(block_group);
+ }
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ }
+
+ spin_unlock(&block_group->tree_lock);
+
return ret;
}
@@ -1866,12 +1909,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
unsigned long found_bits;
unsigned long start = 0;
unsigned long total_found = 0;
+ int ret;
bool found = false;
i = offset_to_bit(entry->offset, block_group->sectorsize,
max_t(u64, offset, entry->offset));
- search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
- total_bits = bytes_to_bits(bytes, block_group->sectorsize);
+ search_bits = bytes_to_bits(bytes, block_group->sectorsize);
+ total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
again:
found_bits = 0;
@@ -1888,7 +1932,7 @@ again:
}
if (!found_bits)
- return -1;
+ return -ENOSPC;
if (!found) {
start = i;
@@ -1912,189 +1956,208 @@ again:
cluster->window_start = start * block_group->sectorsize +
entry->offset;
- cluster->points_to_bitmap = true;
+ rb_erase(&entry->offset_index, &block_group->free_space_offset);
+ ret = tree_insert_offset(&cluster->root, entry->offset,
+ &entry->offset_index, 1);
+ BUG_ON(ret);
return 0;
}
/*
- * here we try to find a cluster of blocks in a block group. The goal
- * is to find at least bytes free and up to empty_size + bytes free.
- * We might not find them all in one contiguous area.
- *
- * returns zero and sets up cluster if things worked out, otherwise
- * it returns -enospc
+ * This searches the block group for just extents to fill the cluster with.
*/
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 empty_size)
+static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ u64 offset, u64 bytes, u64 min_bytes)
{
+ struct btrfs_free_space *first = NULL;
struct btrfs_free_space *entry = NULL;
+ struct btrfs_free_space *prev = NULL;
+ struct btrfs_free_space *last;
struct rb_node *node;
- struct btrfs_free_space *next;
- struct btrfs_free_space *last = NULL;
- u64 min_bytes;
u64 window_start;
u64 window_free;
- u64 max_extent = 0;
- bool found_bitmap = false;
- int ret;
+ u64 max_extent;
+ u64 max_gap = 128 * 1024;
- /* for metadata, allow allocates with more holes */
- if (btrfs_test_opt(root, SSD_SPREAD)) {
- min_bytes = bytes + empty_size;
- } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
- /*
- * we want to do larger allocations when we are
- * flushing out the delayed refs, it helps prevent
- * making more work as we go along.
- */
- if (trans->transaction->delayed_refs.flushing)
- min_bytes = max(bytes, (bytes + empty_size) >> 1);
- else
- min_bytes = max(bytes, (bytes + empty_size) >> 4);
- } else
- min_bytes = max(bytes, (bytes + empty_size) >> 2);
-
- spin_lock(&block_group->tree_lock);
- spin_lock(&cluster->lock);
-
- /* someone already found a cluster, hooray */
- if (cluster->block_group) {
- ret = 0;
- goto out;
- }
-again:
- entry = tree_search_offset(block_group, offset, found_bitmap, 1);
- if (!entry) {
- ret = -ENOSPC;
- goto out;
- }
+ entry = tree_search_offset(block_group, offset, 0, 1);
+ if (!entry)
+ return -ENOSPC;
/*
- * If found_bitmap is true, we exhausted our search for extent entries,
- * and we just want to search all of the bitmaps that we can find, and
- * ignore any extent entries we find.
+ * We don't want bitmaps, so just move along until we find a normal
+ * extent entry.
*/
- while (entry->bitmap || found_bitmap ||
- (!entry->bitmap && entry->bytes < min_bytes)) {
- struct rb_node *node = rb_next(&entry->offset_index);
-
- if (entry->bitmap && entry->bytes > bytes + empty_size) {
- ret = btrfs_bitmap_cluster(block_group, entry, cluster,
- offset, bytes + empty_size,
- min_bytes);
- if (!ret)
- goto got_it;
- }
-
- if (!node) {
- ret = -ENOSPC;
- goto out;
- }
+ while (entry->bitmap) {
+ node = rb_next(&entry->offset_index);
+ if (!node)
+ return -ENOSPC;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
}
- /*
- * We already searched all the extent entries from the passed in offset
- * to the end and didn't find enough space for the cluster, and we also
- * didn't find any bitmaps that met our criteria, just go ahead and exit
- */
- if (found_bitmap) {
- ret = -ENOSPC;
- goto out;
- }
-
- cluster->points_to_bitmap = false;
window_start = entry->offset;
window_free = entry->bytes;
- last = entry;
max_extent = entry->bytes;
+ first = entry;
+ last = entry;
+ prev = entry;
- while (1) {
- /* out window is just right, lets fill it */
- if (window_free >= bytes + empty_size)
- break;
-
- node = rb_next(&last->offset_index);
- if (!node) {
- if (found_bitmap)
- goto again;
- ret = -ENOSPC;
- goto out;
- }
- next = rb_entry(node, struct btrfs_free_space, offset_index);
+ while (window_free <= min_bytes) {
+ node = rb_next(&entry->offset_index);
+ if (!node)
+ return -ENOSPC;
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
- /*
- * we found a bitmap, so if this search doesn't result in a
- * cluster, we know to go and search again for the bitmaps and
- * start looking for space there
- */
- if (next->bitmap) {
- if (!found_bitmap)
- offset = next->offset;
- found_bitmap = true;
- last = next;
+ if (entry->bitmap)
continue;
- }
-
/*
* we haven't filled the empty size and the window is
* very large. reset and try again
*/
- if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
- next->offset - window_start > (bytes + empty_size) * 2) {
- entry = next;
+ if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
+ entry->offset - window_start > (min_bytes * 2)) {
+ first = entry;
window_start = entry->offset;
window_free = entry->bytes;
last = entry;
max_extent = entry->bytes;
} else {
- last = next;
- window_free += next->bytes;
+ last = entry;
+ window_free += entry->bytes;
if (entry->bytes > max_extent)
max_extent = entry->bytes;
}
+ prev = entry;
}
- cluster->window_start = entry->offset;
+ cluster->window_start = first->offset;
+
+ node = &first->offset_index;
/*
* now we've found our entries, pull them out of the free space
* cache and put them into the cluster rbtree
- *
- * The cluster includes an rbtree, but only uses the offset index
- * of each free space cache entry.
*/
- while (1) {
+ do {
+ int ret;
+
+ entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
- if (entry->bitmap && node) {
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
+ if (entry->bitmap)
continue;
- } else if (entry->bitmap && !node) {
- break;
- }
rb_erase(&entry->offset_index, &block_group->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
BUG_ON(ret);
+ } while (node && entry != last);
- if (!node || entry == last)
- break;
+ cluster->max_size = max_extent;
+
+ return 0;
+}
+
+/*
+ * This specifically looks for bitmaps that may work in the cluster, we assume
+ * that we have already failed to find extents that will work.
+ */
+static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ u64 offset, u64 bytes, u64 min_bytes)
+{
+ struct btrfs_free_space *entry;
+ struct rb_node *node;
+ int ret = -ENOSPC;
+
+ if (block_group->total_bitmaps == 0)
+ return -ENOSPC;
+
+ entry = tree_search_offset(block_group,
+ offset_to_bitmap(block_group, offset),
+ 0, 1);
+ if (!entry)
+ return -ENOSPC;
+ node = &entry->offset_index;
+ do {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
+ node = rb_next(&entry->offset_index);
+ if (!entry->bitmap)
+ continue;
+ if (entry->bytes < min_bytes)
+ continue;
+ ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
+ bytes, min_bytes);
+ } while (ret && node);
+
+ return ret;
+}
+
+/*
+ * here we try to find a cluster of blocks in a block group. The goal
+ * is to find at least bytes free and up to empty_size + bytes free.
+ * We might not find them all in one contiguous area.
+ *
+ * returns zero and sets up cluster if things worked out, otherwise
+ * it returns -enospc
+ */
+int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_free_cluster *cluster,
+ u64 offset, u64 bytes, u64 empty_size)
+{
+ u64 min_bytes;
+ int ret;
+
+ /* for metadata, allow allocates with more holes */
+ if (btrfs_test_opt(root, SSD_SPREAD)) {
+ min_bytes = bytes + empty_size;
+ } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ /*
+ * we want to do larger allocations when we are
+ * flushing out the delayed refs, it helps prevent
+ * making more work as we go along.
+ */
+ if (trans->transaction->delayed_refs.flushing)
+ min_bytes = max(bytes, (bytes + empty_size) >> 1);
+ else
+ min_bytes = max(bytes, (bytes + empty_size) >> 4);
+ } else
+ min_bytes = max(bytes, (bytes + empty_size) >> 2);
+
+ spin_lock(&block_group->tree_lock);
+
+ /*
+ * If we know we don't have enough space to make a cluster don't even
+ * bother doing all the work to try and find one.
+ */
+ if (block_group->free_space < min_bytes) {
+ spin_unlock(&block_group->tree_lock);
+ return -ENOSPC;
}
- cluster->max_size = max_extent;
-got_it:
- ret = 0;
- atomic_inc(&block_group->count);
- list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
- cluster->block_group = block_group;
+ spin_lock(&cluster->lock);
+
+ /* someone already found a cluster, hooray */
+ if (cluster->block_group) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
+ min_bytes);
+ if (ret)
+ ret = setup_cluster_bitmap(block_group, cluster, offset,
+ bytes, min_bytes);
+
+ if (!ret) {
+ atomic_inc(&block_group->count);
+ list_add_tail(&cluster->block_group_list,
+ &block_group->cluster_list);
+ cluster->block_group = block_group;
+ }
out:
spin_unlock(&cluster->lock);
spin_unlock(&block_group->tree_lock);
@@ -2111,8 +2174,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
spin_lock_init(&cluster->refill_lock);
cluster->root = RB_ROOT;
cluster->max_size = 0;
- cluster->points_to_bitmap = false;
INIT_LIST_HEAD(&cluster->block_group_list);
cluster->block_group = NULL;
}
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen)
+{
+ struct btrfs_free_space *entry = NULL;
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+ u64 bytes = 0;
+ u64 actually_trimmed;
+ int ret = 0;
+
+ *trimmed = 0;
+
+ while (start < end) {
+ spin_lock(&block_group->tree_lock);
+
+ if (block_group->free_space < minlen) {
+ spin_unlock(&block_group->tree_lock);
+ break;
+ }
+
+ entry = tree_search_offset(block_group, start, 0, 1);
+ if (!entry)
+ entry = tree_search_offset(block_group,
+ offset_to_bitmap(block_group,
+ start),
+ 1, 1);
+
+ if (!entry || entry->offset >= end) {
+ spin_unlock(&block_group->tree_lock);
+ break;
+ }
+
+ if (entry->bitmap) {
+ ret = search_bitmap(block_group, entry, &start, &bytes);
+ if (!ret) {
+ if (start >= end) {
+ spin_unlock(&block_group->tree_lock);
+ break;
+ }
+ bytes = min(bytes, end - start);
+ bitmap_clear_bits(block_group, entry,
+ start, bytes);
+ if (entry->bytes == 0)
+ free_bitmap(block_group, entry);
+ } else {
+ start = entry->offset + BITS_PER_BITMAP *
+ block_group->sectorsize;
+ spin_unlock(&block_group->tree_lock);
+ ret = 0;
+ continue;
+ }
+ } else {
+ start = entry->offset;
+ bytes = min(entry->bytes, end - start);
+ unlink_free_space(block_group, entry);
+ kfree(entry);
+ }
+
+ spin_unlock(&block_group->tree_lock);
+
+ if (bytes >= minlen) {
+ int update_ret;
+ update_ret = btrfs_update_reserved_bytes(block_group,
+ bytes, 1, 1);
+
+ ret = btrfs_error_discard_extent(fs_info->extent_root,
+ start,
+ bytes,
+ &actually_trimmed);
+
+ btrfs_add_free_space(block_group,
+ start, bytes);
+ if (!update_ret)
+ btrfs_update_reserved_bytes(block_group,
+ bytes, 0, 1);
+
+ if (ret)
+ break;
+ *trimmed += actually_trimmed;
+ }
+ start += bytes;
+ bytes = 0;
+
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ return ret;
+}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e49ca5c321b5..65c3b935289f 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+ u64 *trimmed, u64 start, u64 end, u64 minlen);
#endif
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index c56eb5909172..c05a08f4c411 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
int slot;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
search_key.type = -1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 160b55b3e132..93c28a1d6bdc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -50,6 +50,7 @@
#include "tree-log.h"
#include "compression.h"
#include "locking.h"
+#include "free-space-cache.h"
struct btrfs_iget_args {
u64 ino;
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
+struct kmem_cache *btrfs_free_space_cachep;
#define S_SHIFT 12
static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
};
-static void btrfs_truncate(struct inode *inode);
+static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
@@ -90,13 +93,14 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long *nr_written, int unlock);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
err = btrfs_init_acl(trans, inode, dir);
if (!err)
- err = btrfs_xattr_security_init(trans, inode, dir);
+ err = btrfs_xattr_security_init(trans, inode, dir, qstr);
return err;
}
@@ -287,6 +291,7 @@ static noinline int add_async_extent(struct async_cow *cow,
struct async_extent *async_extent;
async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
+ BUG_ON(!async_extent);
async_extent->start = start;
async_extent->ram_size = ram_size;
async_extent->compressed_size = compressed_size;
@@ -381,9 +386,11 @@ again:
*/
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
(btrfs_test_opt(root, COMPRESS) ||
- (BTRFS_I(inode)->force_compress))) {
+ (BTRFS_I(inode)->force_compress) ||
+ (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
WARN_ON(pages);
pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+ BUG_ON(!pages);
if (BTRFS_I(inode)->force_compress)
compress_type = BTRFS_I(inode)->force_compress;
@@ -416,7 +423,7 @@ again:
}
if (start == 0) {
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -612,6 +619,7 @@ retry:
GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_reserve_extent(trans, root,
async_extent->compressed_size,
async_extent->compressed_size,
@@ -643,6 +651,7 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -771,7 +780,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(root == root->fs_info->tree_root);
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -819,6 +828,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(ret);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -1049,7 +1059,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
} else {
trans = btrfs_join_transaction(root, 1);
}
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
cow_start = (u64)-1;
cur_offset = start;
@@ -1168,6 +1178,7 @@ out_check:
struct extent_map_tree *em_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = cur_offset;
em->orig_start = em->start;
em->len = num_bytes;
@@ -1249,7 +1260,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
else if (!btrfs_test_opt(root, COMPRESS) &&
- !(BTRFS_I(inode)->force_compress))
+ !(BTRFS_I(inode)->force_compress) &&
+ !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1);
else
@@ -1456,8 +1468,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
mirror_num, bio_flags);
- } else if (!skip_sum)
- btrfs_lookup_bio_sums(root, inode, bio, NULL);
+ } else if (!skip_sum) {
+ ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
+ if (ret)
+ return ret;
+ }
goto mapit;
} else if (!skip_sum) {
/* csum items have already been cloned */
@@ -1557,6 +1572,7 @@ out:
out_page:
unlock_page(page);
page_cache_release(page);
+ kfree(fixup);
}
/*
@@ -1703,7 +1719,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -1720,6 +1736,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1778,6 +1795,8 @@ out:
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
+ trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+
ClearPagePrivate2(page);
return btrfs_finish_ordered_io(page->mapping->host, start, end);
}
@@ -1888,10 +1907,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
else
rw = READ;
- BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
+ ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
failrec->last_mirror,
failrec->bio_flags, 0);
- return 0;
+ return ret;
}
/*
@@ -1907,7 +1926,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY)) {
+ (u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@@ -2275,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
* this cleans up any orphans that may be left on the list from the last use
* of this root.
*/
-void btrfs_orphan_cleanup(struct btrfs_root *root)
+int btrfs_orphan_cleanup(struct btrfs_root *root)
{
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -2285,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
int ret = 0, nr_unlink = 0, nr_truncate = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
- return;
+ return 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
path->reada = -1;
key.objectid = BTRFS_ORPHAN_OBJECTID;
@@ -2297,11 +2319,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- printk(KERN_ERR "Error searching slot for orphan: %d"
- "\n", ret);
- break;
- }
+ if (ret < 0)
+ goto out;
/*
* if ret == 0 means we found what we were searching for, which
@@ -2309,6 +2328,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
* find the key and see if we have stuff that matches
*/
if (ret > 0) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -2336,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- BUG_ON(IS_ERR(inode));
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto out;
+ }
/*
* add this inode to the orphan list so btrfs_orphan_del does
@@ -2354,6 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
*/
if (is_bad_inode(inode)) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
btrfs_orphan_del(trans, inode);
btrfs_end_transaction(trans, root);
iput(inode);
@@ -2362,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON(1);
+ iput(inode);
+ continue;
+ }
nr_truncate++;
- btrfs_truncate(inode);
+ ret = btrfs_truncate(inode);
} else {
nr_unlink++;
}
/* this will do delete_inode and everything for us */
iput(inode);
+ if (ret)
+ goto out;
}
- btrfs_free_path(path);
-
root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
if (root->orphan_block_rsv)
@@ -2381,13 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
if (root->orphan_block_rsv || root->orphan_item_inserted) {
trans = btrfs_join_transaction(root, 1);
- btrfs_end_transaction(trans, root);
+ if (!IS_ERR(trans))
+ btrfs_end_transaction(trans, root);
}
if (nr_unlink)
printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
if (nr_truncate)
printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+
+out:
+ if (ret)
+ printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+ btrfs_free_path(path);
+ return ret;
}
/*
@@ -2498,6 +2537,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+ if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
+ inode->i_mapping->flags &= ~__GFP_FS;
/*
* try to precache a NULL acl entry for files that don't have
@@ -2626,10 +2667,10 @@ failed:
* recovery code. It remove a link in a directory with a given name, and
* also drops the back refs in the inode to the directory
*/
-int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
- const char *name, int name_len)
+static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, struct inode *inode,
+ const char *name, int name_len)
{
struct btrfs_path *path;
int ret = 0;
@@ -2641,7 +2682,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto err;
+ goto out;
}
path->leave_spinning = 1;
@@ -2701,12 +2742,25 @@ err:
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
btrfs_update_inode(trans, root, dir);
- btrfs_drop_nlink(inode);
- ret = btrfs_update_inode(trans, root, inode);
out:
return ret;
}
+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *dir, struct inode *inode,
+ const char *name, int name_len)
+{
+ int ret;
+ ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+ if (!ret) {
+ btrfs_drop_nlink(inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ }
+ return ret;
+}
+
+
/* helper to check if there is any shared block in the path */
static int check_path_shared(struct btrfs_root *root,
struct btrfs_path *path)
@@ -2714,9 +2768,10 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
u64 refs = 1;
- int uninitialized_var(ret);
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ int ret;
+
if (!path->nodes[level])
break;
eb = path->nodes[level];
@@ -2727,7 +2782,7 @@ static int check_path_shared(struct btrfs_root *root,
if (refs > 1)
return 1;
}
- return ret; /* XXX callers? */
+ return 0;
}
/*
@@ -3527,7 +3582,13 @@ out:
return ret;
}
-int btrfs_cont_expand(struct inode *inode, loff_t size)
+/*
+ * This function puts in dummy file extents for the area we're creating a hole
+ * for. So if we are truncating this file to a larger size we need to insert
+ * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
+ * the range between oldsize and size
+ */
+int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3535,7 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
u64 mask = root->sectorsize - 1;
- u64 hole_start = (inode->i_size + mask) & ~mask;
+ u64 hole_start = (oldsize + mask) & ~mask;
u64 block_end = (size + mask) & ~mask;
u64 last_byte;
u64 cur_offset;
@@ -3580,13 +3641,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
err = btrfs_drop_extents(trans, inode, cur_offset,
cur_offset + hole_size,
&hint_byte, 1);
- BUG_ON(err);
+ if (err)
+ break;
err = btrfs_insert_file_extent(trans, root,
inode->i_ino, cur_offset, 0,
0, hole_size, 0, hole_size,
0, 0, 0);
- BUG_ON(err);
+ if (err)
+ break;
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
@@ -3606,81 +3669,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
return err;
}
-static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
+static int btrfs_setsize(struct inode *inode, loff_t newsize)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- unsigned long nr;
+ loff_t oldsize = i_size_read(inode);
int ret;
- if (attr->ia_size == inode->i_size)
+ if (newsize == oldsize)
return 0;
- if (attr->ia_size > inode->i_size) {
- unsigned long limit;
- limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
- if (attr->ia_size > inode->i_sb->s_maxbytes)
- return -EFBIG;
- if (limit != RLIM_INFINITY && attr->ia_size > limit) {
- send_sig(SIGXFSZ, current, 0);
- return -EFBIG;
- }
- }
-
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- btrfs_set_trans_block_group(trans, inode);
-
- ret = btrfs_orphan_add(trans, inode);
- BUG_ON(ret);
-
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
-
- if (attr->ia_size > inode->i_size) {
- ret = btrfs_cont_expand(inode, attr->ia_size);
+ if (newsize > oldsize) {
+ i_size_write(inode, newsize);
+ btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+ truncate_pagecache(inode, oldsize, newsize);
+ ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
- btrfs_truncate(inode);
+ btrfs_setsize(inode, oldsize);
return ret;
}
- i_size_write(inode, attr->ia_size);
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+ mark_inode_dirty(inode);
+ } else {
- trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
- btrfs_set_trans_block_group(trans, inode);
- trans->block_rsv = root->orphan_block_rsv;
- BUG_ON(!trans->block_rsv);
+ /*
+ * We're truncating a file that used to have good data down to
+ * zero. Make sure it gets into the ordered flush list so that
+ * any new writes get down to disk quickly.
+ */
+ if (newsize == 0)
+ BTRFS_I(inode)->ordered_data_close = 1;
- ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
- if (inode->i_nlink > 0) {
- ret = btrfs_orphan_del(trans, inode);
- BUG_ON(ret);
- }
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- return 0;
+ /* we don't support swapfiles, so vmtruncate shouldn't fail */
+ truncate_setsize(inode, newsize);
+ ret = btrfs_truncate(inode);
}
- /*
- * We're truncating a file that used to have good data down to
- * zero. Make sure it gets into the ordered flush list so that
- * any new writes get down to disk quickly.
- */
- if (attr->ia_size == 0)
- BTRFS_I(inode)->ordered_data_close = 1;
-
- /* we don't support swapfiles, so vmtruncate shouldn't fail */
- ret = vmtruncate(inode, attr->ia_size);
- BUG_ON(ret);
-
- return 0;
+ return ret;
}
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3697,7 +3720,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- err = btrfs_setattr_size(inode, attr);
+ err = btrfs_setsize(inode, attr->ia_size);
if (err)
return err;
}
@@ -3720,6 +3743,8 @@ void btrfs_evict_inode(struct inode *inode)
unsigned long nr;
int ret;
+ trace_btrfs_inode_evict(inode);
+
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
root == root->fs_info->tree_root))
@@ -4062,7 +4087,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
BTRFS_I(inode)->root = root;
memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
-
inode_tree_add(inode);
unlock_new_inode(inode);
if (new)
@@ -4134,11 +4158,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
}
srcu_read_unlock(&root->fs_info->subvol_srcu, index);
- if (root != sub_root) {
+ if (!IS_ERR(inode) && root != sub_root) {
down_read(&root->fs_info->cleanup_work_sem);
if (!(inode->i_sb->s_flags & MS_RDONLY))
- btrfs_orphan_cleanup(sub_root);
+ ret = btrfs_orphan_cleanup(sub_root);
up_read(&root->fs_info->cleanup_work_sem);
+ if (ret)
+ inode = ERR_PTR(ret);
}
return inode;
@@ -4272,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
+ if (verify_dir_item(root, leaf, di))
+ break;
+
name_len = btrfs_dir_name_len(leaf, di);
if (name_len <= sizeof(tmp_name)) {
name_ptr = tmp_name;
@@ -4347,6 +4376,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
trans = btrfs_join_transaction_nolock(root, 1);
else
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_set_trans_block_group(trans, inode);
if (nolock)
ret = btrfs_end_transaction_nolock(trans, root);
@@ -4372,6 +4403,7 @@ void btrfs_dirty_inode(struct inode *inode)
return;
trans = btrfs_join_transaction(root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
@@ -4504,6 +4536,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOMEM);
if (dir) {
+ trace_btrfs_inode_request(dir);
+
ret = btrfs_set_inode_index(dir, index);
if (ret) {
iput(inode);
@@ -4572,12 +4606,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if ((mode & S_IFREG)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW))
+ if (btrfs_test_opt(root, NODATACOW) ||
+ (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
}
insert_inode_hash(inode);
inode_tree_add(inode);
+
+ trace_btrfs_inode_new(inode);
+
return inode;
fail:
if (dir)
@@ -4692,7 +4730,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4753,7 +4791,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4794,12 +4832,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
int err;
int drop_inode = 0;
- if (inode->i_nlink == 0)
- return -ENOENT;
-
/* do not allow sys_link's with other subvols of the same device */
if (root->objectid != BTRFS_I(inode)->root->objectid)
- return -EPERM;
+ return -EXDEV;
+
+ if (inode->i_nlink == ~0U)
+ return -EMLINK;
btrfs_inc_nlink(inode);
inode->i_ctime = CURRENT_TIME;
@@ -4809,10 +4847,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -4881,7 +4920,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
drop_on_err = 1;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
goto out_fail;
@@ -5176,6 +5215,8 @@ again:
em = NULL;
btrfs_release_path(root, path);
trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return ERR_CAST(trans);
goto again;
}
map = kmap(page);
@@ -5252,6 +5293,9 @@ insert:
}
write_unlock(&em_tree->lock);
out:
+
+ trace_btrfs_get_extent(root, em);
+
if (path)
btrfs_free_path(path);
if (trans) {
@@ -5266,6 +5310,128 @@ out:
return em;
}
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map *em;
+ struct extent_map *hole_em = NULL;
+ u64 range_start = start;
+ u64 end;
+ u64 found;
+ u64 found_end;
+ int err = 0;
+
+ em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ if (IS_ERR(em))
+ return em;
+ if (em) {
+ /*
+ * if our em maps to a hole, there might
+ * actually be delalloc bytes behind it
+ */
+ if (em->block_start != EXTENT_MAP_HOLE)
+ return em;
+ else
+ hole_em = em;
+ }
+
+ /* check to see if we've wrapped (len == -1 or similar) */
+ end = start + len;
+ if (end < start)
+ end = (u64)-1;
+ else
+ end -= 1;
+
+ em = NULL;
+
+ /* ok, we didn't find anything, lets look for delalloc */
+ found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ end, len, EXTENT_DELALLOC, 1);
+ found_end = range_start + found;
+ if (found_end < range_start)
+ found_end = (u64)-1;
+
+ /*
+ * we didn't find anything useful, return
+ * the original results from get_extent()
+ */
+ if (range_start > end || found_end <= start) {
+ em = hole_em;
+ hole_em = NULL;
+ goto out;
+ }
+
+ /* adjust the range_start to make sure it doesn't
+ * go backwards from the start they passed in
+ */
+ range_start = max(start,range_start);
+ found = found_end - range_start;
+
+ if (found > 0) {
+ u64 hole_start = start;
+ u64 hole_len = len;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * when btrfs_get_extent can't find anything it
+ * returns one huge hole
+ *
+ * make sure what it found really fits our range, and
+ * adjust to make sure it is based on the start from
+ * the caller
+ */
+ if (hole_em) {
+ u64 calc_end = extent_map_end(hole_em);
+
+ if (calc_end <= start || (hole_em->start > end)) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = calc_end - hole_start;
+ }
+ }
+ em->bdev = NULL;
+ if (hole_em && range_start > hole_start) {
+ /* our hole starts before our delalloc, so we
+ * have to return just the parts of the hole
+ * that go until the delalloc starts
+ */
+ em->len = min(hole_len,
+ range_start - hole_start);
+ em->start = hole_start;
+ em->orig_start = hole_start;
+ /*
+ * don't adjust block start at all,
+ * it is fixed at EXTENT_MAP_HOLE
+ */
+ em->block_start = hole_em->block_start;
+ em->block_len = hole_len;
+ } else {
+ em->start = range_start;
+ em->len = found;
+ em->orig_start = range_start;
+ em->block_start = EXTENT_MAP_DELALLOC;
+ em->block_len = found;
+ }
+ } else if (hole_em) {
+ return hole_em;
+ }
+out:
+
+ free_extent_map(hole_em);
+ if (err) {
+ free_extent_map(em);
+ return ERR_PTR(err);
+ }
+ return em;
+}
+
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -5280,8 +5446,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
trans = btrfs_join_transaction(root, 0);
- if (!trans)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(trans))
+ return ERR_CAST(trans);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -5505,7 +5671,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* while we look for nocow cross refs
*/
trans = btrfs_join_transaction(root, 0);
- if (!trans)
+ if (IS_ERR(trans))
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
@@ -5613,6 +5779,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
kfree(dip->csums);
kfree(dip);
+
+ /* If we had a csum failure make sure to clear the uptodate flag */
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
dio_end_io(bio, err);
}
@@ -5640,7 +5810,7 @@ again:
BUG_ON(!ordered);
trans = btrfs_join_transaction(root, 1);
- if (!trans) {
+ if (IS_ERR(trans)) {
err = -ENOMEM;
goto out;
}
@@ -5714,6 +5884,10 @@ out_done:
kfree(dip->csums);
kfree(dip);
+
+ /* If we had an error make sure to clear the uptodate flag */
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
dio_end_io(bio, err);
}
@@ -5787,9 +5961,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
goto err;
- } else if (!skip_sum)
- btrfs_lookup_bio_sums_dio(root, inode, bio,
+ } else if (!skip_sum) {
+ ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
file_offset, csums);
+ if (ret)
+ goto err;
+ }
ret = btrfs_map_bio(root, rw, bio, 0, 1);
err:
@@ -5813,6 +5990,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int nr_pages = 0;
u32 *csums = dip->csums;
int ret = 0;
+ int write = rw & REQ_WRITE;
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
@@ -5849,7 +6027,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
goto out_err;
}
- if (!skip_sum)
+ /* Write's use the ordered csums */
+ if (!write && !skip_sum)
csums = csums + nr_pages;
start_sector += submit_len >> 9;
file_offset += submit_len;
@@ -5917,9 +6096,11 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
}
dip->csums = NULL;
- if (!skip_sum) {
+ /* Write's use the ordered csum stuff, so we don't need dip->csums */
+ if (!write && !skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
@@ -6088,7 +6269,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+ return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)
@@ -6338,28 +6519,42 @@ out:
return ret;
}
-static void btrfs_truncate(struct inode *inode)
+static int btrfs_truncate(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
+ int err = 0;
struct btrfs_trans_handle *trans;
unsigned long nr;
u64 mask = root->sectorsize - 1;
- if (!S_ISREG(inode->i_mode)) {
- WARN_ON(1);
- return;
- }
-
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
- return;
+ return ret;
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+ trans = btrfs_start_transaction(root, 5);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ btrfs_set_trans_block_group(trans, inode);
+
+ ret = btrfs_orphan_add(trans, inode);
+ if (ret) {
+ btrfs_end_transaction(trans, root);
+ return ret;
+ }
+
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+
+ /* Now start a transaction for the truncate */
trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = root->orphan_block_rsv;
@@ -6386,29 +6581,38 @@ static void btrfs_truncate(struct inode *inode)
while (1) {
if (!trans) {
trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = root->orphan_block_rsv;
}
ret = btrfs_block_rsv_check(trans, root,
root->orphan_block_rsv, 0, 5);
- if (ret) {
- BUG_ON(ret != -EAGAIN);
+ if (ret == -EAGAIN) {
ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
+ if (ret)
+ return ret;
trans = NULL;
continue;
+ } else if (ret) {
+ err = ret;
+ break;
}
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
- if (ret != -EAGAIN)
+ if (ret != -EAGAIN) {
+ err = ret;
break;
+ }
ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ break;
+ }
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
@@ -6418,16 +6622,27 @@ static void btrfs_truncate(struct inode *inode)
if (ret == 0 && inode->i_nlink > 0) {
ret = btrfs_orphan_del(trans, inode);
- BUG_ON(ret);
+ if (ret)
+ err = ret;
+ } else if (ret && inode->i_nlink > 0) {
+ /*
+ * Failed to do the truncate, remove us from the in memory
+ * orphan list.
+ */
+ ret = btrfs_orphan_del(NULL, inode);
}
ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
+ if (ret && !err)
+ err = ret;
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
- BUG_ON(ret);
+ if (ret && !err)
+ err = ret;
btrfs_btree_balance_dirty(root, nr);
+
+ return err;
}
/*
@@ -6494,9 +6709,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
- spin_lock_init(&ei->accounting_lock);
atomic_set(&ei->outstanding_extents, 0);
- ei->reserved_extents = 0;
+ atomic_set(&ei->reserved_extents, 0);
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
@@ -6532,7 +6746,7 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
- WARN_ON(BTRFS_I(inode)->reserved_extents);
+ WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
/*
* This can happen where we create an inode, but somebody else also
@@ -6624,6 +6838,8 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_transaction_cachep);
if (btrfs_path_cachep)
kmem_cache_destroy(btrfs_path_cachep);
+ if (btrfs_free_space_cachep)
+ kmem_cache_destroy(btrfs_free_space_cachep);
}
int btrfs_init_cachep(void)
@@ -6652,6 +6868,12 @@ int btrfs_init_cachep(void)
if (!btrfs_path_cachep)
goto fail;
+ btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
+ sizeof(struct btrfs_free_space), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_free_space_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -6670,6 +6892,26 @@ static int btrfs_getattr(struct vfsmount *mnt,
return 0;
}
+/*
+ * If a file is moved, it will inherit the cow and compression flags of the new
+ * directory.
+ */
+static void fixup_inode_flags(struct inode *dir, struct inode *inode)
+{
+ struct btrfs_inode *b_dir = BTRFS_I(dir);
+ struct btrfs_inode *b_inode = BTRFS_I(inode);
+
+ if (b_dir->flags & BTRFS_INODE_NODATACOW)
+ b_inode->flags |= BTRFS_INODE_NODATACOW;
+ else
+ b_inode->flags &= ~BTRFS_INODE_NODATACOW;
+
+ if (b_dir->flags & BTRFS_INODE_COMPRESS)
+ b_inode->flags |= BTRFS_INODE_COMPRESS;
+ else
+ b_inode->flags &= ~BTRFS_INODE_COMPRESS;
+}
+
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -6772,11 +7014,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len);
} else {
- btrfs_inc_nlink(old_dentry->d_inode);
- ret = btrfs_unlink_inode(trans, root, old_dir,
- old_dentry->d_inode,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
+ ret = __btrfs_unlink_inode(trans, root, old_dir,
+ old_dentry->d_inode,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ if (!ret)
+ ret = btrfs_update_inode(trans, root, old_inode);
}
BUG_ON(ret);
@@ -6803,6 +7046,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
+ fixup_inode_flags(new_dir, old_inode);
+
ret = btrfs_add_link(trans, new_dir, old_inode,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
@@ -6968,7 +7213,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -7204,7 +7449,6 @@ static const struct address_space_operations btrfs_aops = {
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
.readpages = btrfs_readpages,
- .sync_page = block_sync_page,
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
@@ -7220,7 +7464,6 @@ static const struct address_space_operations btrfs_symlink_aops = {
};
static const struct inode_operations btrfs_file_inode_operations = {
- .truncate = btrfs_truncate,
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
.setxattr = btrfs_setxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22b522a..7c07fe26b7cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -40,6 +40,7 @@
#include <linux/xattr.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
return 0;
}
+static int check_flags(unsigned int flags)
+{
+ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+ FS_NOATIME_FL | FS_NODUMP_FL | \
+ FS_SYNC_FL | FS_DIRSYNC_FL | \
+ FS_NOCOMP_FL | FS_COMPR_FL | \
+ FS_NOCOW_FL | FS_COW_FL))
+ return -EOPNOTSUPP;
+
+ if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
+ return -EINVAL;
+
+ if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
+ return -EINVAL;
+
+ return 0;
+}
+
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
struct inode *inode = file->f_path.dentry->d_inode;
@@ -153,12 +172,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
- FS_NOATIME_FL | FS_NODUMP_FL | \
- FS_SYNC_FL | FS_DIRSYNC_FL))
- return -EOPNOTSUPP;
+ ret = check_flags(flags);
+ if (ret)
+ return ret;
- if (!is_owner_or_cap(inode))
+ if (!inode_owner_or_capable(inode))
return -EACCES;
mutex_lock(&inode->i_mutex);
@@ -201,9 +219,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
else
ip->flags &= ~BTRFS_INODE_DIRSYNC;
+ /*
+ * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
+ * flag may be changed automatically if compression code won't make
+ * things smaller.
+ */
+ if (flags & FS_NOCOMP_FL) {
+ ip->flags &= ~BTRFS_INODE_COMPRESS;
+ ip->flags |= BTRFS_INODE_NOCOMPRESS;
+ } else if (flags & FS_COMPR_FL) {
+ ip->flags |= BTRFS_INODE_COMPRESS;
+ ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ }
+ if (flags & FS_NOCOW_FL)
+ ip->flags |= BTRFS_INODE_NODATACOW;
+ else if (flags & FS_COW_FL)
+ ip->flags &= ~BTRFS_INODE_NODATACOW;
trans = btrfs_join_transaction(root, 1);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_end_transaction(trans, root);
mnt_drop_write(file->f_path.mnt);
+
+ ret = 0;
out_unlock:
mutex_unlock(&inode->i_mutex);
- return 0;
+ return ret;
}
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
return put_user(inode->i_generation, arg);
}
+static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+{
+ struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_device *device;
+ struct request_queue *q;
+ struct fstrim_range range;
+ u64 minlen = ULLONG_MAX;
+ u64 num_devices = 0;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mutex_lock(&fs_info->fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+ if (!device->bdev)
+ continue;
+ q = bdev_get_queue(device->bdev);
+ if (blk_queue_discard(q)) {
+ num_devices++;
+ minlen = min((u64)q->limits.discard_granularity,
+ minlen);
+ }
+ }
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ if (!num_devices)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&range, arg, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max(range.minlen, minlen);
+ ret = btrfs_trim_fs(root, &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(arg, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen,
@@ -409,7 +488,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
if (ret)
goto fail;
- btrfs_orphan_cleanup(pending_snapshot->snap);
+ ret = btrfs_orphan_cleanup(pending_snapshot->snap);
+ if (ret)
+ goto fail;
parent = dget_parent(dentry);
inode = btrfs_lookup_dentry(parent->d_inode, dentry);
@@ -907,6 +988,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
} else {
@@ -1067,12 +1152,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1093,7 +1181,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
- ret = btrfs_update_root(trans, root,
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);
@@ -1898,7 +1986,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
memcpy(&new_key, &key, sizeof(new_key));
new_key.objectid = inode->i_ino;
- new_key.offset = key.offset + destoff - off;
+ if (off <= key.offset)
+ new_key.offset = key.offset + destoff - off;
+ else
+ new_key.offset = destoff;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
@@ -2082,7 +2173,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root, 0);
- if (!trans)
+ if (IS_ERR(trans))
goto out_drop;
file->private_data = trans;
@@ -2138,9 +2229,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
path->leave_spinning = 1;
trans = btrfs_start_transaction(root, 1);
- if (!trans) {
+ if (IS_ERR(trans)) {
btrfs_free_path(path);
- return -ENOMEM;
+ return PTR_ERR(trans);
}
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
@@ -2201,7 +2292,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
int num_types = 4;
int alloc_size;
int ret = 0;
- int slot_count = 0;
+ u64 slot_count = 0;
int i, c;
if (copy_from_user(&space_args,
@@ -2240,7 +2331,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
goto out;
}
- slot_count = min_t(int, space_args.space_slots, slot_count);
+ slot_count = min_t(u64, space_args.space_slots, slot_count);
alloc_size = sizeof(*dest) * slot_count;
@@ -2260,6 +2351,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
for (i = 0; i < num_types; i++) {
struct btrfs_space_info *tmp;
+ if (!slot_count)
+ break;
+
info = NULL;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2281,7 +2375,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
+ slot_count--;
}
+ if (!slot_count)
+ break;
}
up_read(&info->groups_sem);
}
@@ -2332,10 +2429,15 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
struct btrfs_trans_handle *trans;
u64 transid;
+ int ret;
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
transid = trans->transid;
- btrfs_commit_transaction_async(trans, root, 0);
+ ret = btrfs_commit_transaction_async(trans, root, 0);
+ if (ret)
+ return ret;
if (argp)
if (copy_to_user(argp, &transid, sizeof(transid)))
@@ -2370,6 +2472,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_setflags(file, argp);
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
+ case FITRIM:
+ return btrfs_ioctl_fitrim(file, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
+ bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
+ may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
+ may_late_unmap = true;
goto cont;
}
@@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
+ if (page_in_index + 1 >= total_pages_in) {
ret = -1;
- data_in = NULL;
goto done;
}
- data_in = kmap(pages_in[page_in_index]);
+
+ if (may_late_unmap)
+ need_unmap = true;
+ else
+ kunmap(pages_in[page_in_index]);
+
+ data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
+ if (need_unmap)
+ kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@@ -363,8 +371,7 @@ cont:
break;
}
done:
- if (data_in)
- kunmap(pages_in[page_in_index]);
+ kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1ddcd99..a1c940425307 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
u64 file_offset)
{
struct rb_root *root = &tree->tree;
- struct rb_node *prev;
+ struct rb_node *prev = NULL;
struct rb_node *ret;
struct btrfs_ordered_extent *entry;
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->root_extent_list);
+ trace_btrfs_ordered_extent_add(inode, entry);
+
spin_lock(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
struct list_head *cur;
struct btrfs_ordered_sum *sum;
+ trace_btrfs_ordered_extent_put(entry->inode, entry);
+
if (atomic_dec_and_test(&entry->refs)) {
while (!list_empty(&entry->list)) {
cur = entry->list.next;
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
+ trace_btrfs_ordered_extent_remove(inode, entry);
+
/*
* we have no more ordered extents for this inode and
* no dirty pages. We can safely remove it from the
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
u64 start = entry->file_offset;
u64 end = start + entry->len - 1;
+ trace_btrfs_ordered_extent_start(inode, entry);
+
/*
* pages in the range can be dirty, clean or writeback. We
* start IO on any dirty ones so the wait doesn't stall waiting
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 0d126be22b63..fb2605d998e9 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
#else
BUG();
#endif
+ break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2b2d7e..58250e09eb05 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
new_node->bytenr = dest->node->start;
new_node->level = node->level;
new_node->lowest = node->lowest;
+ new_node->checked = 1;
new_node->root = dest;
if (!node->lowest) {
@@ -1723,6 +1724,7 @@ again:
eb = read_tree_block(dest, old_bytenr, blocksize,
old_ptr_gen);
+ BUG_ON(!eb);
btrfs_tree_lock(eb);
if (cow) {
ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -2028,6 +2030,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
while (1) {
trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
trans->block_rsv = rc->block_rsv;
ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
@@ -2147,6 +2150,12 @@ again:
}
trans = btrfs_join_transaction(rc->extent_root, 1);
+ if (IS_ERR(trans)) {
+ if (!err)
+ btrfs_block_rsv_release(rc->extent_root,
+ rc->block_rsv, num_bytes);
+ return PTR_ERR(trans);
+ }
if (!err) {
if (num_bytes != rc->merging_rsv_size) {
@@ -2505,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, node->level);
generation = btrfs_node_ptr_generation(upper->eb, slot);
eb = read_tree_block(root, bytenr, blocksize, generation);
+ if (!eb) {
+ err = -EIO;
+ goto next;
+ }
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
@@ -2662,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc,
BUG_ON(block->key_ready);
eb = read_tree_block(rc->extent_root, block->bytenr,
block->key.objectid, block->key.offset);
+ BUG_ON(!eb);
WARN_ON(btrfs_header_level(eb) != block->level);
if (block->level == 0)
btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -3222,6 +3236,7 @@ truncate:
trans = btrfs_join_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
+ ret = PTR_ERR(trans);
goto out;
}
@@ -3628,6 +3643,7 @@ int prepare_to_relocate(struct reloc_control *rc)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
+ BUG_ON(IS_ERR(trans));
btrfs_commit_transaction(trans, rc->extent_root);
return 0;
}
@@ -3644,6 +3660,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
+ int progress = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3656,8 +3673,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
-
+ BUG_ON(IS_ERR(trans));
+restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@@ -3770,6 +3789,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
+ if (trans && progress && err == -ENOSPC) {
+ ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+ rc->block_group->flags);
+ if (ret == 0) {
+ err = 0;
+ progress = 0;
+ goto restart;
+ }
+ }
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
@@ -3804,7 +3832,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
/* get rid of pinned extents */
trans = btrfs_join_transaction(rc->extent_root, 1);
- btrfs_commit_transaction(trans, rc->extent_root);
+ if (IS_ERR(trans))
+ err = PTR_ERR(trans);
+ else
+ btrfs_commit_transaction(trans, rc->extent_root);
out_free:
btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
btrfs_free_path(path);
@@ -4022,6 +4053,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
int ret;
trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
+ BUG_ON(IS_ERR(trans));
memset(&root->root_item.drop_progress, 0,
sizeof(root->root_item.drop_progress));
@@ -4125,6 +4157,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
+ if (IS_ERR(trans)) {
+ unset_reloc_control(rc);
+ err = PTR_ERR(trans);
+ goto out_free;
+ }
rc->merge_reloc_tree = 1;
@@ -4154,9 +4191,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)
unset_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root, 1);
- btrfs_commit_transaction(trans, rc->extent_root);
-out:
+ if (IS_ERR(trans))
+ err = PTR_ERR(trans);
+ else
+ btrfs_commit_transaction(trans, rc->extent_root);
+out_free:
kfree(rc);
+out:
while (!list_empty(&reloc_roots)) {
reloc_root = list_entry(reloc_roots.next,
struct btrfs_root, root_list);
@@ -4174,7 +4215,7 @@ out:
if (IS_ERR(fs_root))
err = PTR_ERR(fs_root);
else
- btrfs_orphan_cleanup(fs_root);
+ err = btrfs_orphan_cleanup(fs_root);
}
return err;
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6a1086e83ffc..29b2d7c930eb 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
search_key.offset = (u64)-1;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto out;
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *leaf;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b2130c46fdb5..2edfc039f098 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,6 +52,9 @@
#include "export.h"
#include "compression.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/btrfs.h>
+
static const struct super_operations btrfs_super_ops;
static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
@@ -155,7 +158,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@@ -184,6 +188,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ {Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@@ -358,6 +363,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
+ case Opt_enospc_debug:
+ btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -383,7 +391,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
- char *opts, *p;
+ char *opts, *orig, *p;
int error = 0;
int intarg;
@@ -397,6 +405,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
return -ENOMEM;
+ orig = opts;
while ((p = strsep(&opts, ",")) != NULL) {
int token;
@@ -432,7 +441,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
}
out_free_opts:
- kfree(opts);
+ kfree(orig);
out:
/*
* If no subvolume name is specified we use the default one. Allocate
@@ -614,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
struct btrfs_root *root = btrfs_sb(sb);
int ret;
+ trace_btrfs_sync_fs(wait);
+
if (!wait) {
filemap_flush(root->fs_info->btree_inode->i_mapping);
return 0;
@@ -623,6 +634,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans, root);
return ret;
}
@@ -761,6 +774,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
}
btrfs_close_devices(fs_devices);
+ kfree(fs_info);
+ kfree(tree_root);
} else {
char b[BDEVNAME_SIZE];
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b8bbe2..ce48eb59d615 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,8 @@ static noinline int join_transaction(struct btrfs_root *root)
if (!cur_trans) {
cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
GFP_NOFS);
- BUG_ON(!cur_trans);
+ if (!cur_trans)
+ return -ENOMEM;
root->fs_info->generation++;
cur_trans->num_writers = 1;
cur_trans->num_joined = 0;
@@ -195,7 +196,11 @@ again:
wait_current_trans(root);
ret = join_transaction(root);
- BUG_ON(ret);
+ if (ret < 0) {
+ if (type != TRANS_JOIN_NOLOCK)
+ mutex_unlock(&root->fs_info->trans_mutex);
+ return ERR_PTR(ret);
+ }
cur_trans = root->fs_info->running_transaction;
cur_trans->use_count++;
@@ -1156,11 +1161,17 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans;
ac = kmalloc(sizeof(*ac), GFP_NOFS);
- BUG_ON(!ac);
+ if (!ac)
+ return -ENOMEM;
INIT_DELAYED_WORK(&ac->work, do_async_commit);
ac->root = root;
ac->newtrans = btrfs_join_transaction(root, 0);
+ if (IS_ERR(ac->newtrans)) {
+ int err = PTR_ERR(ac->newtrans);
+ kfree(ac);
+ return err;
+ }
/* take transaction reference */
mutex_lock(&root->fs_info->trans_mutex);
@@ -1384,6 +1395,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
put_transaction(cur_trans);
put_transaction(cur_trans);
+ trace_btrfs_transaction_commit(root);
+
mutex_unlock(&root->fs_info->trans_mutex);
if (current->journal_info == trans)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744ac5719..c50271ad3157 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
}
dst_copy = kmalloc(item_size, GFP_NOFS);
src_copy = kmalloc(item_size, GFP_NOFS);
+ if (!dst_copy || !src_copy) {
+ btrfs_release_path(root, path);
+ kfree(dst_copy);
+ kfree(src_copy);
+ return -ENOMEM;
+ }
read_extent_buffer(eb, src_copy, src_ptr, item_size);
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(leaf, di, &location);
name_len = btrfs_dir_name_len(leaf, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
btrfs_release_path(root, path);
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log,
int match = 0;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
if (ret != 0)
goto out;
@@ -787,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct inode *dir;
int ret;
struct btrfs_inode_ref *ref;
- struct btrfs_dir_item *di;
struct inode *inode;
char *name;
int namelen;
unsigned long ref_ptr;
unsigned long ref_end;
+ int search_done = 0;
/*
* it is possible that we didn't log all the parent directories
@@ -833,7 +845,10 @@ again:
* existing back reference, and we don't want to create
* dangling pointers in the directory.
*/
-conflict_again:
+
+ if (search_done)
+ goto insert;
+
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
if (ret == 0) {
char *victim_name;
@@ -874,37 +889,21 @@ conflict_again:
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
- kfree(victim_name);
- btrfs_release_path(root, path);
- goto conflict_again;
}
kfree(victim_name);
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
}
BUG_ON(ret);
- }
- btrfs_release_path(root, path);
- /* look for a conflicting sequence number */
- di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
- btrfs_inode_ref_index(eb, ref),
- name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
- }
- btrfs_release_path(root, path);
-
-
- /* look for a conflicting name */
- di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
- name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
+ /*
+ * NOTE: we have searched root tree and checked the
+ * coresponding ref, it does not need to check again.
+ */
+ search_done = 1;
}
btrfs_release_path(root, path);
+insert:
/* insert our name */
ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
btrfs_inode_ref_index(eb, ref));
@@ -967,6 +966,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
key.offset = (u64)-1;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -1178,6 +1179,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
log_type = btrfs_dir_type(eb, di);
read_extent_buffer(eb, name, (unsigned long)(di + 1),
name_len);
@@ -1269,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
+ if (verify_dir_item(root, eb, di))
+ return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
BUG_ON(ret);
@@ -1395,6 +1401,11 @@ again:
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
+ if (verify_dir_item(root, eb, di)) {
+ ret = -EIO;
+ goto out;
+ }
+
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
@@ -1692,6 +1703,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ if (!next)
+ return -ENOMEM;
if (*level == 1) {
wc->process_func(root, next, wc, ptr_gen);
@@ -1802,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
int orig_level;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
level = btrfs_header_level(log->node);
orig_level = level;
@@ -2032,6 +2046,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
+ ret = 0;
goto out;
}
atomic_set(&log_root_tree->log_commit[index2], 1);
@@ -2096,7 +2111,7 @@ out:
smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
- return 0;
+ return ret;
}
static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2194,6 +2209,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
log = root->log_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
name, name_len, -1);
if (IS_ERR(di)) {
@@ -2594,6 +2612,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
+ if (!ins_data)
+ return -ENOMEM;
+
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
@@ -2725,7 +2746,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
log = root->log_root;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
min_key.objectid = inode->i_ino;
min_key.type = BTRFS_INODE_ITEM_KEY;
@@ -3075,16 +3102,20 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
.stage = 0,
};
- fs_info->log_root_recovering = 1;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
+ fs_info->log_root_recovering = 1;
trans = btrfs_start_transaction(fs_info->tree_root, 0);
+ BUG_ON(IS_ERR(trans));
wc.trans = trans;
wc.pin = 1;
- walk_log_tree(trans, log_root_tree, &wc);
+ ret = walk_log_tree(trans, log_root_tree, &wc);
+ BUG_ON(ret);
again:
key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3108,8 +3139,7 @@ again:
log = btrfs_read_fs_root_no_radix(log_root_tree,
&found_key);
- BUG_ON(!log);
-
+ BUG_ON(IS_ERR(log));
tmp_key.objectid = found_key.offset;
tmp_key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d158530233b7..309a57b9fc85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,17 +33,6 @@
#include "volumes.h"
#include "async-thread.h"
-struct map_lookup {
- u64 type;
- int io_align;
- int io_width;
- int stripe_len;
- int sector_size;
- int num_stripes;
- int sub_stripes;
- struct btrfs_bio_stripe stripes[];
-};
-
static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
@@ -162,7 +151,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *cur;
int again = 0;
unsigned long num_run;
- unsigned long num_sync_run;
unsigned long batch_run = 0;
unsigned long limit;
unsigned long last_waited = 0;
@@ -173,11 +161,6 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
- /* we want to make sure that every time we switch from the sync
- * list to the normal list, we unplug
- */
- num_sync_run = 0;
-
loop:
spin_lock(&device->io_lock);
@@ -223,15 +206,6 @@ loop_lock:
spin_unlock(&device->io_lock);
- /*
- * if we're doing the regular priority list, make sure we unplug
- * for any high prio bios we've sent down
- */
- if (pending_bios == &device->pending_bios && num_sync_run > 0) {
- num_sync_run = 0;
- blk_run_backing_dev(bdi, NULL);
- }
-
while (pending) {
rmb();
@@ -259,19 +233,11 @@ loop_lock:
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- if (cur->bi_rw & REQ_SYNC)
- num_sync_run++;
-
submit_bio(cur->bi_rw, cur);
num_run++;
batch_run++;
- if (need_resched()) {
- if (num_sync_run) {
- blk_run_backing_dev(bdi, NULL);
- num_sync_run = 0;
- }
+ if (need_resched())
cond_resched();
- }
/*
* we made progress, there is more work to do and the bdi
@@ -304,13 +270,8 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
- if (need_resched()) {
- if (num_sync_run) {
- blk_run_backing_dev(bdi, NULL);
- num_sync_run = 0;
- }
+ if (need_resched())
cond_resched();
- }
continue;
}
spin_lock(&device->io_lock);
@@ -323,22 +284,6 @@ loop_lock:
}
}
- if (num_sync_run) {
- num_sync_run = 0;
- blk_run_backing_dev(bdi, NULL);
- }
- /*
- * IO has already been through a long path to get here. Checksumming,
- * async helper threads, perhaps compression. We've done a pretty
- * good job of collecting a batch of IO and should just unplug
- * the device right away.
- *
- * This will help anyone who is waiting on the IO, they might have
- * already unplugged, but managed to do so before the bio they
- * cared about found its way down here.
- */
- blk_run_backing_dev(bdi, NULL);
-
cond_resched();
if (again)
goto loop;
@@ -1213,6 +1158,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
return -ENOMEM;
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
+ return PTR_ERR(trans);
+ }
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@@ -1334,11 +1283,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
- goto error_brelse;
+ goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
- goto error_brelse;
+ goto error_undo;
device->in_fs_metadata = 0;
@@ -1412,6 +1361,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
+error_undo:
+ if (device->writeable) {
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->rw_devices++;
+ }
+ goto error_brelse;
}
/*
@@ -1601,11 +1557,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = find_next_devid(root, &device->devid);
if (ret) {
+ kfree(device->name);
kfree(device);
goto error;
}
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ kfree(device->name);
+ kfree(device);
+ ret = PTR_ERR(trans);
+ goto error;
+ }
+
lock_chunks(root);
device->writeable = 1;
@@ -1621,7 +1585,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
- device->mode = 0;
+ device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
@@ -1873,7 +1837,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
return ret;
trans = btrfs_start_transaction(root, 0);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
lock_chunks(root);
@@ -1904,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
BUG_ON(ret);
+ trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
+
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
BUG_ON(ret);
@@ -2047,7 +2013,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
BUG_ON(ret);
trans = btrfs_start_transaction(dev_root, 0);
- BUG_ON(!trans);
+ BUG_ON(IS_ERR(trans));
ret = btrfs_grow_device(trans, device, old_size);
BUG_ON(ret);
@@ -2213,6 +2179,11 @@ again:
/* Shrinking succeeded, else we would be at "done". */
trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto done;
+ }
+
lock_chunks(root);
device->disk_total_bytes = new_size;
@@ -2626,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
*num_bytes = chunk_bytes_by_type(type, calc_size,
map->num_stripes, sub_stripes);
+ trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
+
em = alloc_extent_map(GFP_NOFS);
if (!em) {
ret = -ENOMEM;
@@ -2734,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
item_size);
BUG_ON(ret);
}
+
kfree(chunk);
return 0;
}
@@ -2931,14 +2905,17 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length,
struct btrfs_multi_bio **multi_ret,
- int mirror_num, struct page *unplug_page)
+ int mirror_num)
{
struct extent_map *em;
struct map_lookup *map;
struct extent_map_tree *em_tree = &map_tree->map_tree;
u64 offset;
u64 stripe_offset;
+ u64 stripe_end_offset;
u64 stripe_nr;
+ u64 stripe_nr_orig;
+ u64 stripe_nr_end;
int stripes_allocated = 8;
int stripes_required = 1;
int stripe_index;
@@ -2947,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
int max_errors = 0;
struct btrfs_multi_bio *multi = NULL;
- if (multi_ret && !(rw & REQ_WRITE))
+ if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
stripes_allocated = 1;
again:
if (multi_ret) {
@@ -2963,11 +2940,6 @@ again:
em = lookup_extent_mapping(em_tree, logical, *length);
read_unlock(&em_tree->lock);
- if (!em && unplug_page) {
- kfree(multi);
- return 0;
- }
-
if (!em) {
printk(KERN_CRIT "unable to find logical %llu len %llu\n",
(unsigned long long)logical,
@@ -2993,7 +2965,15 @@ again:
max_errors = 1;
}
}
- if (multi_ret && (rw & REQ_WRITE) &&
+ if (rw & REQ_DISCARD) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ stripes_required = map->num_stripes;
+ }
+ }
+ if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -3013,23 +2993,37 @@ again:
/* stripe_offset is the offset of this block in its stripe*/
stripe_offset = offset - stripe_offset;
- if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP)) {
+ if (rw & REQ_DISCARD)
+ *length = min_t(u64, em->len - offset, *length);
+ else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
/* we limit the length of each bio to what fits in a stripe */
*length = min_t(u64, em->len - offset,
- map->stripe_len - stripe_offset);
+ map->stripe_len - stripe_offset);
} else {
*length = em->len - offset;
}
- if (!multi_ret && !unplug_page)
+ if (!multi_ret)
goto out;
num_stripes = 1;
stripe_index = 0;
- if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (unplug_page || (rw & REQ_WRITE))
+ stripe_nr_orig = stripe_nr;
+ stripe_nr_end = (offset + *length + map->stripe_len - 1) &
+ (~(map->stripe_len - 1));
+ do_div(stripe_nr_end, map->stripe_len);
+ stripe_end_offset = stripe_nr_end * map->stripe_len -
+ (offset + *length);
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+ if (rw & REQ_DISCARD)
+ num_stripes = min_t(u64, map->num_stripes,
+ stripe_nr_end - stripe_nr_orig);
+ stripe_index = do_div(stripe_nr, map->num_stripes);
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+ if (rw & (REQ_WRITE | REQ_DISCARD))
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -3040,7 +3034,7 @@ again:
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & REQ_WRITE)
+ if (rw & (REQ_WRITE | REQ_DISCARD))
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -3051,8 +3045,12 @@ again:
stripe_index = do_div(stripe_nr, factor);
stripe_index *= map->sub_stripes;
- if (unplug_page || (rw & REQ_WRITE))
+ if (rw & REQ_WRITE)
num_stripes = map->sub_stripes;
+ else if (rw & REQ_DISCARD)
+ num_stripes = min_t(u64, map->sub_stripes *
+ (stripe_nr_end - stripe_nr_orig),
+ map->num_stripes);
else if (mirror_num)
stripe_index += mirror_num - 1;
else {
@@ -3070,24 +3068,101 @@ again:
}
BUG_ON(stripe_index >= map->num_stripes);
- for (i = 0; i < num_stripes; i++) {
- if (unplug_page) {
- struct btrfs_device *device;
- struct backing_dev_info *bdi;
-
- device = map->stripes[stripe_index].dev;
- if (device->bdev) {
- bdi = blk_get_backing_dev_info(device->bdev);
- if (bdi->unplug_io_fn)
- bdi->unplug_io_fn(bdi, unplug_page);
- }
- } else {
+ if (rw & REQ_DISCARD) {
+ for (i = 0; i < num_stripes; i++) {
multi->stripes[i].physical =
map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
multi->stripes[i].dev = map->stripes[stripe_index].dev;
+
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+ u64 stripes;
+ u32 last_stripe = 0;
+ int j;
+
+ div_u64_rem(stripe_nr_end - 1,
+ map->num_stripes,
+ &last_stripe);
+
+ for (j = 0; j < map->num_stripes; j++) {
+ u32 test;
+
+ div_u64_rem(stripe_nr_end - 1 - j,
+ map->num_stripes, &test);
+ if (test == stripe_index)
+ break;
+ }
+ stripes = stripe_nr_end - 1 - j;
+ do_div(stripes, map->num_stripes);
+ multi->stripes[i].length = map->stripe_len *
+ (stripes - stripe_nr + 1);
+
+ if (i == 0) {
+ multi->stripes[i].length -=
+ stripe_offset;
+ stripe_offset = 0;
+ }
+ if (stripe_index == last_stripe)
+ multi->stripes[i].length -=
+ stripe_end_offset;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ u64 stripes;
+ int j;
+ int factor = map->num_stripes /
+ map->sub_stripes;
+ u32 last_stripe = 0;
+
+ div_u64_rem(stripe_nr_end - 1,
+ factor, &last_stripe);
+ last_stripe *= map->sub_stripes;
+
+ for (j = 0; j < factor; j++) {
+ u32 test;
+
+ div_u64_rem(stripe_nr_end - 1 - j,
+ factor, &test);
+
+ if (test ==
+ stripe_index / map->sub_stripes)
+ break;
+ }
+ stripes = stripe_nr_end - 1 - j;
+ do_div(stripes, factor);
+ multi->stripes[i].length = map->stripe_len *
+ (stripes - stripe_nr + 1);
+
+ if (i < map->sub_stripes) {
+ multi->stripes[i].length -=
+ stripe_offset;
+ if (i == map->sub_stripes - 1)
+ stripe_offset = 0;
+ }
+ if (stripe_index >= last_stripe &&
+ stripe_index <= (last_stripe +
+ map->sub_stripes - 1)) {
+ multi->stripes[i].length -=
+ stripe_end_offset;
+ }
+ } else
+ multi->stripes[i].length = *length;
+
+ stripe_index++;
+ if (stripe_index == map->num_stripes) {
+ /* This could only happen for RAID0/10 */
+ stripe_index = 0;
+ stripe_nr++;
+ }
+ }
+ } else {
+ for (i = 0; i < num_stripes; i++) {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset +
+ stripe_nr * map->stripe_len;
+ multi->stripes[i].dev =
+ map->stripes[stripe_index].dev;
+ stripe_index++;
}
- stripe_index++;
}
if (multi_ret) {
*multi_ret = multi;
@@ -3104,7 +3179,7 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
struct btrfs_multi_bio **multi_ret, int mirror_num)
{
return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
- mirror_num, NULL);
+ mirror_num);
}
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
@@ -3172,14 +3247,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
-int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
- u64 logical, struct page *page)
-{
- u64 length = PAGE_CACHE_SIZE;
- return __btrfs_map_block(map_tree, READ, logical, &length,
- NULL, 0, page);
-}
-
static void end_bio_multi_stripe(struct bio *bio, int err)
{
struct btrfs_multi_bio *multi = bio->bi_private;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7fb59d45fe8c..cc2eadaf7a27 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -126,6 +126,7 @@ struct btrfs_fs_devices {
struct btrfs_bio_stripe {
struct btrfs_device *dev;
u64 physical;
+ u64 length; /* only used for discard mappings */
};
struct btrfs_multi_bio {
@@ -145,6 +146,17 @@ struct btrfs_device_info {
u64 max_avail;
};
+struct map_lookup {
+ u64 type;
+ int io_align;
+ int io_width;
+ int stripe_len;
+ int sector_size;
+ int num_stripes;
+ int sub_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
/* Used to sort the devices by max_avail(descending sort) */
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2b..a5303b871b13 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -242,6 +242,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
break;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ if (verify_dir_item(root, leaf, di))
+ continue;
name_len = btrfs_dir_name_len(leaf, di);
total_size += name_len + 1;
@@ -370,7 +372,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
}
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
@@ -378,7 +381,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
char *suffix;
char *name;
- err = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bbb..b3cc8039134b 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir);
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
#endif /* __XATTR__ */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index f5ec2d44150d..faccd47c6c46 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void)
if (!workspace)
return ERR_PTR(-ENOMEM);
- workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
+ workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
+ MAX_WBITS, MAX_MEM_LEVEL));
workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!workspace->def_strm.workspace ||