diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/gfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 3 | ||||
-rw-r--r-- | fs/gfs2/file.c | 3 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 269 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 65 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 44 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 1 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 64 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 6 | ||||
-rw-r--r-- | fs/gfs2/super.c | 84 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 26 | ||||
-rw-r--r-- | fs/nfsd/filecache.c | 318 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 20 | ||||
-rw-r--r-- | fs/nfsd/trace.h | 51 |
15 files changed, 431 insertions, 529 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 05bee80ac7dee..e782b4f1d1043 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) return error; kaddr = kmap_atomic(page); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap_atomic(kaddr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3bdb2c668a71c..e7537fd305dd2 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, void *kaddr = kmap(page); u64 dsize = i_size_read(inode); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap(page); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 60c6fb91fb589..eea5be4fbf0ef 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) { - struct gfs2_glock *gl = fl_gh->gh_gl; + struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl); /* * Make sure gfs2_glock_put() won't sleep under the file->f_lock * spinlock. */ - gfs2_glock_hold(gl); spin_lock(&file->f_lock); gfs2_holder_uninit(fl_gh); spin_unlock(&file->f_lock); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index df335c258eb08..524f3c96b9a4c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); lockref_get(&gl->gl_lockref); + return gl; } /** @@ -205,12 +206,6 @@ static int demote_ok(const struct gfs2_glock *gl) if (gl->gl_state == LM_ST_UNLOCKED) return 0; - /* - * Note that demote_ok is used for the lru process of disposing of - * glocks. For this purpose, we don't care if the glock's holders - * have the HIF_MAY_DEMOTE flag set or not. If someone is using - * them, don't demote. - */ if (!list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) @@ -393,7 +388,7 @@ static void do_error(struct gfs2_glock *gl, const int ret) struct gfs2_holder *gh, *tmp; list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_WAIT, &gh->gh_iflags)) + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (ret & LM_OUT_ERROR) gh->gh_error = -EIO; @@ -408,45 +403,6 @@ static void do_error(struct gfs2_glock *gl, const int ret) } /** - * demote_incompat_holders - demote incompatible demoteable holders - * @gl: the glock we want to promote - * @current_gh: the newly promoted holder - * - * We're passing the newly promoted holder in @current_gh, but actually, any of - * the strong holders would do. - */ -static void demote_incompat_holders(struct gfs2_glock *gl, - struct gfs2_holder *current_gh) -{ - struct gfs2_holder *gh, *tmp; - - /* - * Demote incompatible holders before we make ourselves eligible. - * (This holder may or may not allow auto-demoting, but we don't want - * to demote the new holder before it's even granted.) - */ - list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - /* - * Since holders are at the front of the list, we stop when we - * find the first non-holder. - */ - if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) - return; - if (gh == current_gh) - continue; - if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) && - !may_grant(gl, current_gh, gh)) { - /* - * We should not recurse into do_promote because - * __gfs2_glock_dq only calls handle_callback, - * gfs2_glock_add_to_lru and __gfs2_glock_queue_work. - */ - __gfs2_glock_dq(gh); - } - } -} - -/** * find_first_holder - find the first "holder" gh * @gl: the glock */ @@ -464,26 +420,6 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) return NULL; } -/** - * find_first_strong_holder - find the first non-demoteable holder - * @gl: the glock - * - * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set. - */ -static inline struct gfs2_holder * -find_first_strong_holder(struct gfs2_glock *gl) -{ - struct gfs2_holder *gh; - - list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) - return NULL; - if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) - return gh; - } - return NULL; -} - /* * gfs2_instantiate - Call the glops instantiate function * @gh: The glock holder @@ -540,9 +476,8 @@ done: static int do_promote(struct gfs2_glock *gl) { struct gfs2_holder *gh, *current_gh; - bool incompat_holders_demoted = false; - current_gh = find_first_strong_holder(gl); + current_gh = find_first_holder(gl); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; @@ -561,11 +496,8 @@ static int do_promote(struct gfs2_glock *gl) set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); - if (!incompat_holders_demoted) { + if (!current_gh) current_gh = gh; - demote_incompat_holders(gl, current_gh); - incompat_holders_demoted = true; - } } return 0; } @@ -927,6 +859,48 @@ out_unlock: return; } +/** + * glock_set_object - set the gl_object field of a glock + * @gl: the glock + * @object: the object + */ +void glock_set_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = object; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + +/** + * glock_clear_object - clear the gl_object field of a glock + * @gl: the glock + */ +void glock_clear_object(struct gfs2_glock *gl, void *object) +{ + void *prev_object; + + spin_lock(&gl->gl_lockref.lock); + prev_object = gl->gl_object; + gl->gl_object = NULL; + spin_unlock(&gl->gl_lockref.lock); + if (gfs2_assert_warn(gl->gl_name.ln_sbd, + prev_object == object || prev_object == NULL)) { + pr_warn("glock=%u/%llx\n", + gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + gfs2_dump_glock(NULL, gl, true); + } +} + void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation) { struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr; @@ -980,8 +954,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - struct gfs2_glock *inode_gl = NULL; - gl->gl_no_formal_ino = ip->i_no_formal_ino; set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); @@ -991,14 +963,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { - inode_gl = ip->i_gl; - lockref_get(&inode_gl->gl_lockref); clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + if (!igrab(&ip->i_inode)) + ip = NULL; } spin_unlock(&gl->gl_lockref.lock); - if (inode_gl) { - gfs2_glock_poke(inode_gl); - gfs2_glock_put(inode_gl); + if (ip) { + gfs2_glock_poke(ip->i_gl); + iput(&ip->i_inode); } evicted = !ip; } @@ -1039,6 +1011,7 @@ static void delete_work_func(struct work_struct *work) if (gfs2_queue_delete_work(gl, 5 * HZ)) return; } + goto out; } inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, @@ -1051,6 +1024,7 @@ static void delete_work_func(struct work_struct *work) d_prune_aliases(inode); iput(inode); } +out: gfs2_glock_put(gl); } @@ -1256,13 +1230,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); - gh->gh_gl = gl; + gh->gh_gl = gfs2_glock_hold(gl); gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_iflags = 0; - gfs2_glock_hold(gl); } /** @@ -1496,7 +1469,7 @@ __acquires(&gl->gl_lockref.lock) if (test_bit(GLF_LOCK, &gl->gl_flags)) { struct gfs2_holder *current_gh; - current_gh = find_first_strong_holder(gl); + current_gh = find_first_holder(gl); try_futile = !may_grant(gl, current_gh, gh); } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) @@ -1508,8 +1481,6 @@ __acquires(&gl->gl_lockref.lock) continue; if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) continue; - if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)) - continue; if (!pid_is_meaningful(gh2)) continue; goto trap_recursive; @@ -1619,69 +1590,28 @@ static inline bool needs_demote(struct gfs2_glock *gl) static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned delay = 0; int fast_path = 0; /* - * This while loop is similar to function demote_incompat_holders: - * If the glock is due to be demoted (which may be from another node - * or even if this holder is GL_NOCACHE), the weak holders are - * demoted as well, allowing the glock to be demoted. + * This holder should not be cached, so mark it for demote. + * Note: this should be done before the check for needs_demote + * below. */ - while (gh) { - /* - * If we're in the process of file system withdraw, we cannot - * just dequeue any glocks until our journal is recovered, lest - * we introduce file system corruption. We need two exceptions - * to this rule: We need to allow unlocking of nondisk glocks - * and the glock for our own journal that needs recovery. - */ - if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && - glock_blocked_by_withdraw(gl) && - gh->gh_gl != sdp->sd_jinode_gl) { - sdp->sd_glock_dqs_held++; - spin_unlock(&gl->gl_lockref.lock); - might_sleep(); - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, - TASK_UNINTERRUPTIBLE); - spin_lock(&gl->gl_lockref.lock); - } + if (gh->gh_flags & GL_NOCACHE) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); - /* - * This holder should not be cached, so mark it for demote. - * Note: this should be done before the check for needs_demote - * below. - */ - if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - - list_del_init(&gh->gh_list); - clear_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_glock_queue(gh, 0); + list_del_init(&gh->gh_list); + clear_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_glock_queue(gh, 0); - /* - * If there hasn't been a demote request we are done. - * (Let the remaining holders, if any, keep holding it.) - */ - if (!needs_demote(gl)) { - if (list_empty(&gl->gl_holders)) - fast_path = 1; - break; - } - /* - * If we have another strong holder (we cannot auto-demote) - * we are done. It keeps holding it until it is done. - */ - if (find_first_strong_holder(gl)) - break; - - /* - * If we have a weak holder at the head of the list, it - * (and all others like it) must be auto-demoted. If there - * are no more weak holders, we exit the while loop. - */ - gh = find_first_holder(gl); + /* + * If there hasn't been a demote request we are done. + * (Let the remaining holders, if any, keep holding it.) + */ + if (!needs_demote(gl)) { + if (list_empty(&gl->gl_holders)) + fast_path = 1; } if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) @@ -1705,8 +1635,17 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; spin_lock(&gl->gl_lockref.lock); + if (!gfs2_holder_queued(gh)) { + /* + * May have already been dequeued because the locking request + * was GL_ASYNC and it has failed in the meantime. + */ + goto out; + } + if (list_is_first(&gh->gh_list, &gl->gl_holders) && !test_bit(HIF_HOLDER, &gh->gh_iflags)) { spin_unlock(&gl->gl_lockref.lock); @@ -1715,7 +1654,26 @@ void gfs2_glock_dq(struct gfs2_holder *gh) spin_lock(&gl->gl_lockref.lock); } + /* + * If we're in the process of file system withdraw, we cannot just + * dequeue any glocks until our journal is recovered, lest we introduce + * file system corruption. We need two exceptions to this rule: We need + * to allow unlocking of nondisk glocks and the glock for our own + * journal that needs recovery. + */ + if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && + glock_blocked_by_withdraw(gl) && + gh->gh_gl != sdp->sd_jinode_gl) { + sdp->sd_glock_dqs_held++; + spin_unlock(&gl->gl_lockref.lock); + might_sleep(); + wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, + TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); + } + __gfs2_glock_dq(gh); +out: spin_unlock(&gl->gl_lockref.lock); } @@ -1888,33 +1846,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) delay = gl->gl_hold_time; } - /* - * Note 1: We cannot call demote_incompat_holders from handle_callback - * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq -> - * handle_callback -> demote_incompat_holders -> gfs2_glock_dq - * Plus, we only want to demote the holders if the request comes from - * a remote cluster node because local holder conflicts are resolved - * elsewhere. - * - * Note 2: if a remote node wants this glock in EX mode, lock_dlm will - * request that we set our state to UNLOCKED. Here we mock up a holder - * to make it look like someone wants the lock EX locally. Any SH - * and DF requests should be able to share the lock without demoting. - * - * Note 3: We only want to demote the demoteable holders when there - * are no more strong holders. The demoteable holders might as well - * keep the glock until the last strong holder is done with it. - */ - if (!find_first_strong_holder(gl)) { - struct gfs2_holder mock_gh = { - .gh_gl = gl, - .gh_state = (state == LM_ST_UNLOCKED) ? - LM_ST_EXCLUSIVE : state, - .gh_iflags = BIT(HIF_HOLDER) - }; - - demote_incompat_holders(gl, &mock_gh); - } handle_callback(gl, state, delay, true); __gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); @@ -2306,8 +2237,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 'H'; if (test_bit(HIF_WAIT, &iflags)) *p++ = 'W'; - if (test_bit(HIF_MAY_DEMOTE, &iflags)) - *p++ = 'D'; if (flags & GL_SKIP) *p++ = 's'; *p = 0; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 0d068f4fd7d67..f37ac087e2c14 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -156,8 +156,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) break; - if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) - continue; if (gh->gh_owner_pid == pid) goto out; } @@ -196,7 +194,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); +extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_glock_queue_put(struct gfs2_glock *gl); @@ -288,6 +286,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); extern void gfs2_register_debugfs(void); extern void gfs2_unregister_debugfs(void); +extern void glock_set_object(struct gfs2_glock *gl, void *object); +extern void glock_clear_object(struct gfs2_glock *gl, void *object); + extern const struct lm_lockops gfs2_dlm_ops; static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh) @@ -305,64 +306,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) return !list_empty(&gh->gh_list); } -/** - * glock_set_object - set the gl_object field of a glock - * @gl: the glock - * @object: the object - */ -static inline void glock_set_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) - gfs2_dump_glock(NULL, gl, true); - gl->gl_object = object; - spin_unlock(&gl->gl_lockref.lock); -} - -/** - * glock_clear_object - clear the gl_object field of a glock - * @gl: the glock - * @object: the object - * - * I'd love to similarly add this: - * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) - * gfs2_dump_glock(NULL, gl, true); - * Unfortunately, that's not possible because as soon as gfs2_delete_inode - * frees the block in the rgrp, another process can reassign it for an I_NEW - * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. - * That means gfs2_delete_inode may subsequently try to call this function - * for a glock that's already pointing to a brand new inode. If we clear the - * new inode's gl_object, we'll introduce metadata corruption. Function - * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also - * tries to clear gl_object, so it's more than just gfs2_delete_inode. - * - */ -static inline void glock_clear_object(struct gfs2_glock *gl, void *object) -{ - spin_lock(&gl->gl_lockref.lock); - if (gl->gl_object == object) - gl->gl_object = NULL; - spin_unlock(&gl->gl_lockref.lock); -} - -static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - spin_lock(&gl->gl_lockref.lock); - set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags); - spin_unlock(&gl->gl_lockref.lock); -} - -static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh) -{ - struct gfs2_glock *gl = gh->gh_gl; - - spin_lock(&gl->gl_lockref.lock); - clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags); - spin_unlock(&gl->gl_lockref.lock); -} - extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49210a2e7ce75..d78b61ecc1cdf 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) struct timespec64 atime; u16 height, depth; umode_t mode = be32_to_cpu(str->di_mode); - bool is_new = ip->i_inode.i_state & I_NEW; + struct inode *inode = &ip->i_inode; + bool is_new = inode->i_state & I_NEW; if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) goto corrupt; - if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode))) + if (unlikely(!is_new && inode_wrong_type(inode, mode))) goto corrupt; ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); - ip->i_inode.i_mode = mode; + inode->i_mode = mode; if (is_new) { - ip->i_inode.i_rdev = 0; + inode->i_rdev = 0; switch (mode & S_IFMT) { case S_IFBLK: case S_IFCHR: - ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), - be32_to_cpu(str->di_minor)); + inode->i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); break; } } - i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); - i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); - set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); - gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); + i_uid_write(inode, be32_to_cpu(str->di_uid)); + i_gid_write(inode, be32_to_cpu(str->di_gid)); + set_nlink(inode, be32_to_cpu(str->di_nlink)); + i_size_write(inode, be64_to_cpu(str->di_size)); + gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0) - ip->i_inode.i_atime = atime; - ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + if (timespec64_compare(&inode->i_atime, &atime) < 0) + inode->i_atime = atime; + inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); + inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); ip->i_goal = be64_to_cpu(str->di_goal_meta); ip->i_generation = be64_to_cpu(str->di_generation); @@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_diskflags = be32_to_cpu(str->di_flags); ip->i_eattr = be64_to_cpu(str->di_eattr); /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */ - gfs2_set_inode_flags(&ip->i_inode); + gfs2_set_inode_flags(inode); height = be16_to_cpu(str->di_height); if (unlikely(height > GFS2_MAX_META_HEIGHT)) goto corrupt; @@ -448,8 +449,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); - if (S_ISREG(ip->i_inode.i_mode)) - gfs2_set_aops(&ip->i_inode); + if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip)) + goto corrupt; + + if (S_ISREG(inode->i_mode)) + gfs2_set_aops(inode); return 0; corrupt: diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d09d9892cd055..c26765080f280 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -252,7 +252,6 @@ struct gfs2_lkstats { enum { /* States */ - HIF_MAY_DEMOTE = 1, HIF_HOLDER = 6, /* Set for gh that "holds" the glock */ HIF_WAIT = 10, }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1371e067d2a7c..614db3055c024 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (unlikely(error)) goto fail; + /* + * The only caller that sets @blktype to GFS2_BLKST_UNLINKED is + * delete_work_func(). Make sure not to cancel the delete work + * from within itself here. + */ if (blktype == GFS2_BLKST_UNLINKED) extra_flags |= LM_FLAG_TRY; else @@ -403,12 +408,17 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) goto out_ipreserv; error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); + if (error) + goto out_trans_end; + ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; + if (*dblocks > 1) + ip->i_eattr = ip->i_no_addr + 1; +out_trans_end: gfs2_trans_end(sdp); - out_ipreserv: gfs2_inplace_release(ip); out_quota: @@ -586,6 +596,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, * @size: The initial size of the inode (ignored for directories) * @excl: Force fail if inode exists * + * FIXME: Change to allocate the disk blocks and write them out in the same + * transaction. That way, we can no longer end up in a situation in which an + * inode is allocated, the node crashes, and the block looks like a valid + * inode. (With atomic creates in place, we will also no longer need to zero + * the link count and dirty the inode here on failure.) + * * Returns: 0 on success, or error code */ @@ -596,12 +612,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, { const struct qstr *name = &dentry->d_name; struct posix_acl *default_acl, *acl; - struct gfs2_holder ghs[2]; + struct gfs2_holder d_gh, gh; struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error, free_vfs_inode = 1; + int error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; @@ -617,10 +633,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh); if (error) goto fail; - gfs2_holder_mark_uninitialized(ghs + 1); + gfs2_holder_mark_uninitialized(&gh); error = create_ok(dip, name, mode); if (error) @@ -642,7 +658,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, else error = finish_no_open(file, NULL); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); goto fail; } else if (error != -ENOENT) { goto fail_gunlock; @@ -656,12 +672,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = -ENOMEM; if (!inode) goto fail_gunlock; + ip = GFS2_I(inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) goto fail_gunlock; - ip = GFS2_I(inode); error = gfs2_qa_get(ip); if (error) goto fail_free_acls; @@ -723,15 +739,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, goto fail_free_inode; gfs2_cancel_delete_work(io_gl); +retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); - BUG_ON(error); + if (error == -EBUSY) + goto retry; + if (error) + goto fail_gunlock2; error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID, &ip->i_iopen_gh); if (error) goto fail_gunlock2; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (error) goto fail_gunlock3; @@ -739,10 +759,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock3; - if (blocks > 1) { - ip->i_eattr = ip->i_no_addr + 1; + if (blocks > 1) gfs2_init_xattr(ip); - } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -750,9 +768,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, glock_set_object(io_gl, ip); gfs2_set_iop(inode); - free_vfs_inode = 0; /* After this point, the inode is no longer - considered free. Any failures need to undo - the gfs2 structures. */ if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) @@ -785,9 +800,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, file->f_mode |= FMODE_CREATED; error = finish_open(file, dentry, gfs2_open_common); } - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); gfs2_qa_put(ip); - gfs2_glock_dq_uninit(ghs + 1); + gfs2_glock_dq_uninit(&gh); gfs2_glock_put(io_gl); gfs2_qa_put(dip); unlock_new_inode(inode); @@ -801,10 +816,6 @@ fail_gunlock3: fail_gunlock2: gfs2_glock_put(io_gl); fail_free_inode: - if (ip->i_gl) { - if (free_vfs_inode) /* else evict will do the put for us */ - gfs2_glock_put(ip->i_gl); - } gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: @@ -812,20 +823,19 @@ fail_free_acls: posix_acl_release(acl); fail_gunlock: gfs2_dir_no_add(&da); - gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { + set_bit(GIF_ALLOC_FAILED, &ip->i_flags); clear_nlink(inode); - if (!free_vfs_inode) + if (ip->i_no_addr) mark_inode_dirty(inode); - set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, - &GFS2_I(inode)->i_flags); if (inode->i_state & I_NEW) iget_failed(inode); else iput(inode); } - if (gfs2_holder_initialized(ghs + 1)) - gfs2_glock_dq_uninit(ghs + 1); + if (gfs2_holder_initialized(&gh)) + gfs2_glock_dq_uninit(&gh); fail: gfs2_qa_put(dip); return error; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6ed728aae9a53..3c41b864ee5bc 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) struct buffer_head *bh; int ty; + if (!ip->i_gl) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + gfs2_ail1_wipe(sdp, bstart, blen); while (blen) { ty = REMOVE_META; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b018957a1bb24..999cc146d7083 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -379,6 +379,7 @@ out: void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { + const struct inode *inode = &ip->i_inode; struct gfs2_dinode *str = buf; str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode)); - str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode)); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); - str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + str->di_mode = cpu_to_be32(inode->i_mode); + str->di_uid = cpu_to_be32(i_uid_read(inode)); + str->di_gid = cpu_to_be32(i_gid_read(inode)); + str->di_nlink = cpu_to_be32(inode->i_nlink); + str->di_size = cpu_to_be64(i_size_read(inode)); + str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode)); + str->di_atime = cpu_to_be64(inode->i_atime.tv_sec); + str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); str->di_goal_meta = cpu_to_be64(ip->i_goal); str->di_goal_data = cpu_to_be64(ip->i_goal); @@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_flags = cpu_to_be32(ip->i_diskflags); str->di_height = cpu_to_be16(ip->i_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) && !(ip->i_diskflags & GFS2_DIF_EXHASH) ? GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(ip->i_depth); str->di_entries = cpu_to_be32(ip->i_entries); str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); + str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec); } /** @@ -475,6 +476,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; + if (unlikely(!ip->i_gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + if (unlikely(gfs2_withdrawn(sdp))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { @@ -927,8 +934,7 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && - inode->i_nlink && + if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (test_bit(GLF_DEMOTE, &gl->gl_flags)) @@ -1076,7 +1082,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) struct inode *inode = &ip->i_inode; struct gfs2_glock *gl = ip->i_gl; - truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); + if (unlikely(!gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); truncate_inode_pages(&inode->i_data, 0); if (atomic_read(&gl->gl_revokes) == 0) { @@ -1218,10 +1230,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) { - BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl)); + if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; - } if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) return SHOULD_DEFER_EVICTION; @@ -1294,13 +1304,22 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - /* We're about to clear the bitmap for the dinode, but as soon as we - do, gfs2_create_inode can create another inode at the same block - location and try to set gl_object again. We clear gl_object here so - that subsequent inode creates don't see an old gl_object. */ - glock_clear_object(ip->i_gl, ip); + if (ip->i_gl) + gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + + /* + * As soon as we clear the bitmap for the dinode, gfs2_create_inode() + * can get called to recreate it, or even gfs2_inode_lookup() if the + * inode was recreated on another node in the meantime. + * + * However, inserting the new inode into the inode hash table will not + * succeed until the old inode is removed, and that only happens after + * ->evict_inode() returns. The new inode is attached to its inode and + * iopen glocks after inserting it into the inode hash table, so at + * that point we can be sure that both glocks are unused. + */ + ret = gfs2_dinode_dealloc(ip); - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); out: return ret; } @@ -1367,12 +1386,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_holder gh; int ret; - if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { - clear_inode(inode); - return; - } - - if (inode->i_nlink || sb_rdonly(sb)) + if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) goto out; gfs2_holder_mark_uninitialized(&gh); @@ -1405,12 +1419,9 @@ out: struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; glock_clear_object(gl, ip); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - ip->i_iopen_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&ip->i_iopen_gh); - } gfs2_glock_hold(gl); - gfs2_holder_uninit(&ip->i_iopen_gh); + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } if (ip->i_gl) { @@ -1429,6 +1440,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL); if (!ip) return NULL; + ip->i_no_addr = 0; ip->i_flags = 0; ip->i_gl = NULL; gfs2_holder_mark_uninitialized(&ip->i_iopen_gh); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index f6a66050380e9..518c0677e12ae 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_trans_add_meta(ip->i_gl, dibh); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + } } gfs2_trans_end(sdp); @@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); - if (error) - goto out_quota; - - if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { - error = ea_dealloc_indirect(ip); + if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; + + if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) { + error = ea_dealloc_indirect(ip); + if (error) + goto out_quota; + } } error = ea_dealloc_block(ip); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1998b4d5f692a..45b2c9e3f6360 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -324,8 +324,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -377,24 +376,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -402,10 +412,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -421,17 +430,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -442,86 +457,60 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); - - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); -} - -static void -nfsd_file_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; + + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void -nfsd_file_dispose_list_sync(struct list_head *dispose) +nfsd_file_dispose_list(struct list_head *dispose) { - bool flush = false; struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; + nfsd_file_free(nf); } - if (flush) - flush_delayed_fput(); } static void @@ -591,21 +580,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -616,40 +592,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -659,7 +625,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -685,7 +651,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -695,72 +661,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; } /** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put, then flush and fput all cache items associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ -void -nfsd_file_close_inode_sync(struct inode *inode) +static void +nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); + nfsd_file_queue_for_close(inode, &dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** - * nfsd_file_close_inode - attempt a delayed close of a nfsd_file + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. */ -static void -nfsd_file_close_inode(struct inode *inode) +void +nfsd_file_close_inode_sync(struct inode *inode) { + struct nfsd_file *nf; LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); - nfsd_file_dispose_list_delayed(&dispose); + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); } /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -782,7 +787,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -903,6 +908,13 @@ out_err: goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -916,8 +928,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1084,8 +1099,12 @@ retry: if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1118,11 +1137,11 @@ wait_for_construction: goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1132,7 +1151,8 @@ out: this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1158,8 +1178,10 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d6e1d3894c82f..2a815f5a52c4b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -988,7 +988,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -1008,6 +1007,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c rpc_shutdown_client(client); return -ENOMEM; } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; rcu_read_lock(); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 73ed32ad23a20..bd880d55f565b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1461,13 +1461,6 @@ out_err: return status; } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ - nfs_do_sb_deactive(ss_mnt->mnt_sb); - mntput(ss_mnt); -} - /* * Verify COPY destination stateid. * @@ -1570,11 +1563,6 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, { } -static void -nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) -{ -} - static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, struct nfs_fh *src_fh, nfs4_stateid *stateid) @@ -1770,7 +1758,7 @@ static int nfsd4_do_async_copy(void *data) default: nfserr = nfserr_offload_denied; } - nfsd4_interssc_disconnect(copy->ss_mnt); + /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, @@ -1851,8 +1839,10 @@ out_err: if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (nfsd4_ssc_is_inter(copy)) - nfsd4_interssc_disconnect(copy->ss_mnt); + /* + * source's vfsmount of inter-copy will be unmounted + * by the laundromat + */ goto out; } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 46b8f68a24974..c852ae8eaf371 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -876,8 +876,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -912,6 +912,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1103,35 +1104,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1209,7 +1181,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1241,6 +1212,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, |