summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-14 12:26:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-14 12:26:41 -0800
commit0b48d42235caf627121f440b57d376f48a9af8b6 (patch)
tree400967c5fcb1cd08bbc0e1739e229f9717590f19
parent8e63dd6e1c589ba99a18df9cbaa41c3178607641 (diff)
parent7a6ef8c72314f254c107c6a9ed7cb201961ee05a (diff)
Merge branch 'for-3.3' of git://linux-nfs.org/~bfields/linux
* 'for-3.3' of git://linux-nfs.org/~bfields/linux: (31 commits) nfsd4: nfsd4_create_clid_dir return value is unused NFSD: Change name of extended attribute containing junction svcrpc: don't revert to SVC_POOL_DEFAULT on nfsd shutdown svcrpc: fix double-free on shutdown of nfsd after changing pool mode nfsd4: be forgiving in the absence of the recovery directory nfsd4: fix spurious 4.1 post-reboot failures NFSD: forget_delegations should use list_for_each_entry_safe NFSD: Only reinitilize the recall_lru list under the recall lock nfsd4: initialize special stateid's at compile time NFSd: use network-namespace-aware cache registering routines SUNRPC: create svc_xprt in proper network namespace svcrpc: update outdated BKL comment nfsd41: allow non-reclaim open-by-fh's in 4.1 svcrpc: avoid memory-corruption on pool shutdown svcrpc: destroy server sockets all at once svcrpc: make svc_delete_xprt static nfsd: Fix oops when parsing a 0 length export nfsd4: Use kmemdup rather than duplicating its implementation nfsd4: add a separate (lockowner, inode) lookup nfsd4: fix CONFIG_NFSD_FAULT_INJECTION compile error ...
-rw-r--r--CREDITS5
-rw-r--r--Documentation/filesystems/nfs/00-INDEX2
-rw-r--r--Documentation/filesystems/nfs/fault_injection.txt69
-rw-r--r--MAINTAINERS1
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/fault_inject.c91
-rw-r--r--fs/nfsd/fault_inject.h28
-rw-r--r--fs/nfsd/nfs4idmap.c11
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4recover.c22
-rw-r--r--fs/nfsd/nfs4state.c328
-rw-r--r--fs/nfsd/nfs4xdr.c3
-rw-r--r--fs/nfsd/nfsctl.c10
-rw-r--r--fs/nfsd/nfsd.h20
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/vfs.c17
-rw-r--r--include/linux/sunrpc/svc_xprt.h3
-rw-r--r--include/linux/sunrpc/svcsock.h2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/svc.c25
-rw-r--r--net/sunrpc/svc_xprt.c62
-rw-r--r--net/sunrpc/svcsock.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rwxr-xr-xtools/nfsd/inject_fault.sh49
26 files changed, 624 insertions, 169 deletions
diff --git a/CREDITS b/CREDITS
index 44fce988eaa..370b4c7da39 100644
--- a/CREDITS
+++ b/CREDITS
@@ -514,6 +514,11 @@ S: Bessemerstraat 21
S: Amsterdam
S: The Netherlands
+N: NeilBrown
+E: neil@brown.name
+P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F 497A 39EC 9EDD 5662 81B9
+D: NFSD Maintainer 2000-2007
+
N: Zach Brown
E: zab@zabbo.net
D: maestro pci sound
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index a57e12411d2..1716874a651 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -2,6 +2,8 @@
- this file (nfs-related documentation).
Exporting
- explanation of how to make filesystems exportable.
+fault_injection.txt
+ - information for using fault injection on the server
knfsd-stats.txt
- statistics which the NFS server makes available to user space.
nfs.txt
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt
new file mode 100644
index 00000000000..426d166089a
--- /dev/null
+++ b/Documentation/filesystems/nfs/fault_injection.txt
@@ -0,0 +1,69 @@
+
+Fault Injection
+===============
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce. Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system. Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd. This will show a list of files that will be used for
+injecting faults on the NFS server. As root, write a number n to the file
+corresponding to the action you want the server to take. The server will then
+process the first n items it finds. So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks. A value of 0 will tell the server to forget
+all corresponding items. A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+ The NFS server keeps a list of clients that have placed a mount call. If
+ this list is cleared, the server will have no knowledge of who the client
+ is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+ The NFS server keeps a list of what files are currently opened and who
+ they were opened by. Clearing this list will force the client to reopen
+ its files.
+
+forget_locks:
+ The NFS server keeps a list of what files are currently locked in the VFS.
+ Clearing this list will force the client to reclaim its locks (files are
+ unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+ A delegation is used to assure the client that a file, or part of a file,
+ has not changed since the delegation was awarded. Clearing this list will
+ force the client to reaquire its delegation before accessing the file
+ again.
+
+recall_delegations:
+ Delegations can be recalled by the server when another client attempts to
+ access a file. This test will notify the client that its delegation has
+ been revoked, forcing the client to reaquire the delegation before using
+ the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process. This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user. For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock. Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.
diff --git a/MAINTAINERS b/MAINTAINERS
index 7559c1ca56b..4d1ba2022a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3775,7 +3775,6 @@ S: Odd Fixes
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: "J. Bruce Fields" <bfields@fieldses.org>
-M: Neil Brown <neilb@suse.de>
L: linux-nfs@vger.kernel.org
W: http://nfs.sourceforge.net/
S: Supported
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 10e6366608f..8df1ea4a6ff 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -80,3 +80,13 @@ config NFSD_V4
available from http://linux-nfs.org/.
If unsure, say N.
+
+config NFSD_FAULT_INJECTION
+ bool "NFS server manual fault injection"
+ depends on NFSD_V4 && DEBUG_KERNEL
+ help
+ This option enables support for manually injecting faults
+ into the NFS server. This is intended to be used for
+ testing error recovery on the NFS client.
+
+ If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9b118ee2019..af32ef06b4f 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD) += nfsd.o
nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 62f3b9074e8..cf8a6bd062f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
struct svc_expkey key;
struct svc_expkey *ek = NULL;
- if (mesg[mlen-1] != '\n')
+ if (mlen < 1 || mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
@@ -1226,12 +1226,12 @@ nfsd_export_init(void)
int rv;
dprintk("nfsd: initializing export module.\n");
- rv = cache_register(&svc_export_cache);
+ rv = cache_register_net(&svc_export_cache, &init_net);
if (rv)
return rv;
- rv = cache_register(&svc_expkey_cache);
+ rv = cache_register_net(&svc_expkey_cache, &init_net);
if (rv)
- cache_unregister(&svc_export_cache);
+ cache_unregister_net(&svc_export_cache, &init_net);
return rv;
}
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)
dprintk("nfsd: shutting down export module.\n");
- cache_unregister(&svc_expkey_cache);
- cache_unregister(&svc_export_cache);
+ cache_unregister_net(&svc_expkey_cache, &init_net);
+ cache_unregister_net(&svc_export_cache, &init_net);
svcauth_unix_purge();
dprintk("nfsd: export shutdown complete.\n");
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
new file mode 100644
index 00000000000..ce7f0758d84
--- /dev/null
+++ b/fs/nfsd/fault_inject.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Uses debugfs to create fault injection points for client testing
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "state.h"
+#include "fault_inject.h"
+
+struct nfsd_fault_inject_op {
+ char *file;
+ void (*func)(u64);
+};
+
+static struct nfsd_fault_inject_op inject_ops[] = {
+ {
+ .file = "forget_clients",
+ .func = nfsd_forget_clients,
+ },
+ {
+ .file = "forget_locks",
+ .func = nfsd_forget_locks,
+ },
+ {
+ .file = "forget_openowners",
+ .func = nfsd_forget_openowners,
+ },
+ {
+ .file = "forget_delegations",
+ .func = nfsd_forget_delegations,
+ },
+ {
+ .file = "recall_delegations",
+ .func = nfsd_recall_delegations,
+ },
+};
+
+static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
+static struct dentry *debug_dir;
+
+static int nfsd_inject_set(void *op_ptr, u64 val)
+{
+ struct nfsd_fault_inject_op *op = op_ptr;
+
+ if (val == 0)
+ printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
+ else
+ printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
+
+ op->func(val);
+ return 0;
+}
+
+static int nfsd_inject_get(void *data, u64 *val)
+{
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+
+void nfsd_fault_inject_cleanup(void)
+{
+ debugfs_remove_recursive(debug_dir);
+}
+
+int nfsd_fault_inject_init(void)
+{
+ unsigned int i;
+ struct nfsd_fault_inject_op *op;
+ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+ debug_dir = debugfs_create_dir("nfsd", NULL);
+ if (!debug_dir)
+ goto fail;
+
+ for (i = 0; i < NUM_INJECT_OPS; i++) {
+ op = &inject_ops[i];
+ if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
+ goto fail;
+ }
+ return 0;
+
+fail:
+ nfsd_fault_inject_cleanup();
+ return -ENOMEM;
+}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
new file mode 100644
index 00000000000..90bd0570956
--- /dev/null
+++ b/fs/nfsd/fault_inject.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Function definitions for fault injection
+ */
+
+#ifndef LINUX_NFSD_FAULT_INJECT_H
+#define LINUX_NFSD_FAULT_INJECT_H
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+static inline void nfsd_forget_clients(u64 num) {}
+static inline void nfsd_forget_locks(u64 num) {}
+static inline void nfsd_forget_openowners(u64 num) {}
+static inline void nfsd_forget_delegations(u64 num) {}
+static inline void nfsd_recall_delegations(u64 num) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+#endif /* LINUX_NFSD_FAULT_INJECT_H */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 55780a22fdb..94096273cd6 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -36,6 +36,7 @@
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <net/net_namespace.h>
#include "idmap.h"
#include "nfsd.h"
@@ -466,20 +467,20 @@ nfsd_idmap_init(void)
{
int rv;
- rv = cache_register(&idtoname_cache);
+ rv = cache_register_net(&idtoname_cache, &init_net);
if (rv)
return rv;
- rv = cache_register(&nametoid_cache);
+ rv = cache_register_net(&nametoid_cache, &init_net);
if (rv)
- cache_unregister(&idtoname_cache);
+ cache_unregister_net(&idtoname_cache, &init_net);
return rv;
}
void
nfsd_idmap_shutdown(void)
{
- cache_unregister(&idtoname_cache);
- cache_unregister(&nametoid_cache);
+ cache_unregister_net(&idtoname_cache, &init_net);
+ cache_unregister_net(&nametoid_cache, &init_net);
}
static int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c5e28ed8bca..896da74ec56 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
{
__be32 status;
- /* Only reclaims from previously confirmed clients are valid */
- if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
- return status;
-
/* We don't know the target directory, and therefore can not
* set the change info
*/
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ status = nfs4_check_open_reclaim(&open->op_clientid);
+ if (status)
+ goto out;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, &cstate->current_fh,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 80a0be9ed00..0b3e875d1ab 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -117,8 +117,7 @@ out_no_tfm:
return status;
}
-int
-nfsd4_create_clid_dir(struct nfs4_client *clp)
+void nfsd4_create_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
char *dname = clp->cl_recdir;
@@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
- if (!rec_file || clp->cl_firststate)
- return 0;
-
+ if (clp->cl_firststate)
+ return;
clp->cl_firststate = 1;
+ if (!rec_file)
+ return;
status = nfs4_save_creds(&original_cred);
if (status < 0)
- return status;
+ return;
dir = rec_file->f_path.dentry;
/* lock the parent */
@@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
status = PTR_ERR(dentry);
goto out_unlock;
}
- status = -EEXIST;
if (dentry->d_inode)
+ /*
+ * In the 4.1 case, where we're called from
+ * reclaim_complete(), records from the previous reboot
+ * may still be left, so this is OK.
+ *
+ * In the 4.0 case, we should never get here; but we may
+ * as well be forgiving and just succeed silently.
+ */
goto out_put;
status = mnt_want_write_file(rec_file);
if (status)
@@ -164,7 +171,6 @@ out_unlock:
" and is writeable", status,
user_recovery_dirname);
nfs4_reset_creds(original_cred);
- return status;
}
typedef int (recdir_func)(struct dentry *, struct dentry *);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ca16dc09e0..e8c98f00967 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -49,12 +49,20 @@
time_t nfsd4_lease = 90; /* default lease time */
time_t nfsd4_grace = 90;
static time_t boot_time;
-static stateid_t zerostateid; /* bits all 0 */
-static stateid_t onestateid; /* bits all 1 */
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+ .si_generation = ~0,
+ .si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+ /* all fields zero */
+};
+
static u64 current_sessionid = 1;
-#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
/* forward declarations */
static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -133,21 +141,21 @@ unsigned int max_delegations;
* Open owner state (share locks)
*/
-/* hash tables for open owners */
-#define OPEN_OWNER_HASH_BITS 8
-#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS)
-#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1)
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS 8
+#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
-static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
{
unsigned int ret;
ret = opaque_hashval(ownername->data, ownername->len);
ret += clientid;
- return ret & OPEN_OWNER_HASH_MASK;
+ return ret & OWNER_HASH_MASK;
}
-static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
+static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
/* hash table for nfs4_file */
#define FILE_HASH_BITS 8
@@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
list_del(&lo->lo_owner.so_strhash);
list_del(&lo->lo_perstateid);
+ list_del(&lo->lo_owner_ino_hash);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
@@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+ clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
if (clp->cl_name.data == NULL) {
kfree(clp);
return NULL;
}
- memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
return clp;
}
@@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp)
spin_unlock(&recall_lock);
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
- list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
while (!list_empty(&clp->cl_openowners)) {
@@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void)
nfsd4_free_slab(&deleg_slab);
}
-static int
+int
nfsd4_init_slabs(void)
{
openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
{
- list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
+ list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
list_add(&oo->oo_perclient, &clp->cl_openowners);
}
@@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
struct nfs4_stateowner *so;
struct nfs4_openowner *oo;
- list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
+ list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ if (!so->so_is_open_owner)
+ continue;
if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
oo = openowner(so);
renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
if (open->op_file == NULL)
return nfserr_jukebox;
- strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
+ strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
oo = find_openstateowner_str(strhashval, open);
open->op_openowner = oo;
if (!oo) {
@@ -3123,7 +3132,6 @@ nfs4_laundromat(void)
spin_unlock(&recall_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
test_val = nfsd4_lease;
@@ -3718,13 +3726,11 @@ out:
}
-/*
- * Lock owner state (byte-range locks)
- */
#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
-#define LOCK_HASH_BITS 8
-#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS)
-#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1)
+
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
static inline u64
end_offset(u64 start, u64 len)
@@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len)
return end > start ? end - 1: NFS4_MAX_UINT64;
}
-static inline unsigned int
-lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
- struct xdr_netobj *ownername)
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
{
return (file_hashval(inode) + cl_id
+ opaque_hashval(ownername->data, ownername->len))
- & LOCK_HASH_MASK;
+ & LOCKOWNER_INO_HASH_MASK;
}
-static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
/*
* TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3809,23 +3813,39 @@ nevermind:
deny->ld_type = NFS4_WRITE_LT;
}
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+ struct nfs4_ol_stateid *lst;
+
+ if (!same_owner_str(&lo->lo_owner, owner, clid))
+ return false;
+ lst = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ return lst->st_file->fi_inode == inode;
+}
+
static struct nfs4_lockowner *
find_lockowner_str(struct inode *inode, clientid_t *clid,
struct xdr_netobj *owner)
{
- unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
- struct nfs4_stateowner *op;
+ unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
+ struct nfs4_lockowner *lo;
- list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
- if (same_owner_str(op, owner, clid))
- return lockowner(op);
+ list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+ if (same_lockowner_ino(lo, inode, clid, owner))
+ return lo;
}
return NULL;
}
static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
{
- list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+ struct inode *inode = open_stp->st_file->fi_inode;
+ unsigned int inohash = lockowner_ino_hashval(inode,
+ clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
+ list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+ list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
}
@@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
* Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
* occurred.
*
- * strhashval = lock_ownerstr_hashval
+ * strhashval = ownerstr_hashval
*/
static struct nfs4_lockowner *
@@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
__set_bit(access, &lock_stp->st_access_bmap);
}
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+ struct nfs4_file *fi = ost->st_file;
+ struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+ struct nfs4_client *cl = oo->oo_owner.so_client;
+ struct nfs4_lockowner *lo;
+ unsigned int strhashval;
+
+ lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+ if (lo) {
+ if (!cstate->minorversion)
+ return nfserr_bad_seqid;
+ /* XXX: a lockowner always has exactly one stateid: */
+ *lst = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ return nfs_ok;
+ }
+ strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
+ &lock->v.new.owner);
+ lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+ if (lo == NULL)
+ return nfserr_jukebox;
+ *lst = alloc_init_lock_stateid(lo, fi, ost);
+ if (*lst == NULL) {
+ release_lockowner(lo);
+ return nfserr_jukebox;
+ }
+ *new = true;
+ return nfs_ok;
+}
+
/*
* LOCK operation
*/
@@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct file_lock file_lock;
struct file_lock conflock;
__be32 status = 0;
- unsigned int strhashval;
+ bool new_state = false;
int lkflg;
int err;
@@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* lock stateid.
*/
struct nfs4_ol_stateid *open_stp = NULL;
-
+
+ if (nfsd4_has_session(cstate))
+ /* See rfc 5661 18.10.3: given clientid is ignored: */
+ memcpy(&lock->v.new.clientid,
+ &cstate->session->se_client->cl_clientid,
+ sizeof(clientid_t));
+
status = nfserr_stale_clientid;
- if (!nfsd4_has_session(cstate) &&
- STALE_CLIENTID(&lock->lk_new_clientid))
+ if (STALE_CLIENTID(&lock->lk_new_clientid))
goto out;
/* validate and update open stateid and open seqid */
@@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
- if (!nfsd4_has_session(cstate) &&
- !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+ if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
&lock->v.new.clientid))
goto out;
- /* create lockowner and lock stateid */
- fp = open_stp->st_file;
- strhashval = lock_ownerstr_hashval(fp->fi_inode,
- open_sop->oo_owner.so_client->cl_clientid.cl_id,
- &lock->v.new.owner);
- /* XXX: Do we need to check for duplicate stateowners on
- * the same file, or should they just be allowed (and
- * create new stateids)? */
- status = nfserr_jukebox;
- lock_sop = alloc_init_lock_stateowner(strhashval,
- open_sop->oo_owner.so_client, open_stp, lock);
- if (lock_sop == NULL)
- goto out;
- lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
- if (lock_stp == NULL)
+ status = lookup_or_create_lock_state(cstate, open_stp, lock,
+ &lock_stp, &new_state);
+ if (status)
goto out;
} else {
/* lock (lock owner + lock stateid) already exists */
@@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
NFS4_LOCK_STID, &lock_stp);
if (status)
goto out;
- lock_sop = lockowner(lock_stp->st_stateowner);
- fp = lock_stp->st_file;
}
- /* lock_sop and lock_stp have been created or found */
+ lock_sop = lockowner(lock_stp->st_stateowner);
+ fp = lock_stp->st_file;
lkflg = setlkflg(lock->lk_type);
status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
}
out:
- if (status && lock->lk_is_new && lock_sop)
+ if (status && new_state)
release_lockowner(lock_sop);
if (!cstate->replay_owner)
nfs4_unlock_state();
@@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
struct nfs4_ol_stateid *stp;
struct xdr_netobj *owner = &rlockowner->rl_owner;
struct list_head matches;
- int i;
+ unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
__be32 status;
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
nfs4_lock_state();
status = nfserr_locks_held;
- /* XXX: we're doing a linear search through all the lockowners.
- * Yipes! For now we'll just hope clients aren't really using
- * release_lockowner much, but eventually we have to fix these
- * data structures. */
INIT_LIST_HEAD(&matches);
- for (i = 0; i < LOCK_HASH_SIZE; i++) {
- list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
- if (!same_owner_str(sop, owner, clid))
- continue;
- list_for_each_entry(stp, &sop->so_stateids,
- st_perstateowner) {
- lo = lockowner(sop);
- if (check_for_locks(stp->st_file, lo))
- goto out;
- list_add(&lo->lo_list, &matches);
- }
+
+ list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+ if (sop->so_is_open_owner)
+ continue;
+ if (!same_owner_str(sop, owner, clid))
+ continue;
+ list_for_each_entry(stp, &sop->so_stateids,
+ st_perstateowner) {
+ lo = lockowner(sop);
+ if (check_for_locks(stp->st_file, lo))
+ goto out;
+ list_add(&lo->lo_list, &matches);
}
}
/* Clients probably won't expect us to return with some (but not all)
@@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid)
return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
}
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+ struct nfs4_client *clp, *next;
+ int count = 0;
+
+ nfs4_lock_state();
+ list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+ nfsd4_remove_clid_dir(clp);
+ expire_client(clp);
+ if (++count == num)
+ break;
+ }
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+ release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+ release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
+ void (*release_sop)(struct nfs4_stateowner *))
+{
+ int i, count = 0;
+ struct nfs4_stateowner *sop, *next;
+
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+ if (sop->so_is_open_owner != is_open_owner)
+ continue;
+ release_sop(sop);
+ if (++count == num)
+ return count;
+ }
+ }
+ return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+ int count;
+
+ nfs4_lock_state();
+ count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+ int count;
+
+ nfs4_lock_state();
+ count = nfsd_release_n_owners(num, true, release_openowner_sop);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+ int i, count = 0;
+ struct nfs4_file *fp, *fnext;
+ struct nfs4_delegation *dp, *dnext;
+
+ for (i = 0; i < FILE_HASH_SIZE; i++) {
+ list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+ list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
+ deleg_func(dp);
+ if (++count == num)
+ return count;
+ }
+ }
+ }
+
+ return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+ unsigned int count;
+
+ nfs4_lock_state();
+ count = nfsd_process_n_delegations(num, unhash_delegation);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+ unsigned int count;
+
+ nfs4_lock_state();
+ spin_lock(&recall_lock);
+ count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+ spin_unlock(&recall_lock);
+ nfs4_unlock_state();
+
+ printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
/* initialization to perform at module load time: */
-int
+void
nfs4_state_init(void)
{
- int i, status;
+ int i;
- status = nfsd4_init_slabs();
- if (status)
- return status;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
INIT_LIST_HEAD(&conf_id_hashtbl[i]);
INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4416,18 +4566,15 @@ nfs4_state_init(void)
for (i = 0; i < FILE_HASH_SIZE; i++) {
INIT_LIST_HEAD(&file_hashtbl[i]);
}
- for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
- INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
- }
- for (i = 0; i < LOCK_HASH_SIZE; i++) {
- INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
}
- memset(&onestateid, ~0, sizeof(stateid_t));
+ for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
INIT_LIST_HEAD(&close_lru);
INIT_LIST_HEAD(&client_lru);
INIT_LIST_HEAD(&del_recall_lru);
reclaim_str_hashtbl_size = 0;
- return 0;
}
static void
@@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void)
spin_unlock(&recall_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b6fa792d6b8..0ec5a1b9700 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,
static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
{
if (p == argp->tmp) {
- p = kmalloc(nbytes, GFP_KERNEL);
+ p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
if (!p)
return NULL;
- memcpy(p, argp->tmp, nbytes);
} else {
BUG_ON(p != argp->tmpp);
argp->tmpp = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index bb4a11d58a5..748eda93ce5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -18,6 +18,7 @@
#include "idmap.h"
#include "nfsd.h"
#include "cache.h"
+#include "fault_inject.h"
/*
* We have a single directory with several nodes in it.
@@ -1128,9 +1129,13 @@ static int __init init_nfsd(void)
int retval;
printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = nfs4_state_init(); /* nfs4 locking state */
+ retval = nfsd4_init_slabs();
if (retval)
return retval;
+ nfs4_state_init();
+ retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+ if (retval)
+ goto out_free_slabs;
nfsd_stat_init(); /* Statistics */
retval = nfsd_reply_cache_init();
if (retval)
@@ -1161,6 +1166,8 @@ out_free_cache:
nfsd_reply_cache_shutdown();
out_free_stat:
nfsd_stat_shutdown();
+ nfsd_fault_inject_cleanup();
+out_free_slabs:
nfsd4_free_slabs();
return retval;
}
@@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void)
nfsd_lockd_shutdown();
nfsd_idmap_shutdown();
nfsd4_free_slabs();
+ nfsd_fault_inject_cleanup();
unregister_filesystem(&nfsd_fs_type);
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 58134a23fdf..1d1e8589b4c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
*/
#ifdef CONFIG_NFSD_V4
extern unsigned int max_delegations;
-int nfs4_state_init(void);
+void nfs4_state_init(void);
+int nfsd4_init_slabs(void);
void nfsd4_free_slabs(void);
int nfs4_state_start(void);
void nfs4_state_shutdown(void);
void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
#else
-static inline int nfs4_state_init(void) { return 0; }
+static inline void nfs4_state_init(void) { }
+static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
static inline int nfs4_state_start(void) { return 0; }
static inline void nfs4_state_shutdown(void) { }
@@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
}
/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1 \
-(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+ (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0 \
-(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
-#define NFSD_WRITEABLE_ATTRS_WORD1 \
-(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+ (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
#define NFSD_WRITEABLE_ATTRS_WORD2 0
#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a3cf38476a1..ffb5df1db94 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -366,6 +366,7 @@ struct nfs4_openowner {
struct nfs4_lockowner {
struct nfs4_stateowner lo_owner; /* must be first element */
+ struct list_head lo_owner_ino_hash; /* hash by owner,file */
struct list_head lo_perstateid; /* for lockowners only */
struct list_head lo_list; /* for temporary uses */
};
@@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);
extern int nfs4_client_to_reclaim(const char *name);
extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
extern void release_session_client(struct nfsd4_session *);
extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d25a723b68a..edf6d3ed877 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
return error;
}
-#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
-#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
+/*
+ * NFS junction information is stored in an extended attribute.
+ */
+#define NFSD_JUNCTION_XATTR_NAME XATTR_TRUSTED_PREFIX "junction.nfs"
+
+/**
+ * nfsd4_is_junction - Test if an object could be an NFS junction
+ *
+ * @dentry: object to test
+ *
+ * Returns 1 if "dentry" appears to contain NFS junction information.
+ * Otherwise 0 is returned.
+ */
int nfsd4_is_junction(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
if (!(inode->i_mode & S_ISVTX))
return 0;
- if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
+ if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
return 0;
return 1;
}
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8620f79658d..dfa900948af 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
-void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
int svc_create_xprt(struct svc_serv *, const char *, struct net *,
const int, const unsigned short, int);
@@ -118,7 +118,6 @@ void svc_xprt_received(struct svc_xprt *);
void svc_xprt_put(struct svc_xprt *xprt);
void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
void svc_close_xprt(struct svc_xprt *xprt);
-void svc_delete_xprt(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 85c50b40759..c84e9741cb2 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -34,7 +34,7 @@ struct svc_sock {
/*
* Function prototypes.
*/
-void svc_close_all(struct list_head *);
+void svc_close_all(struct svc_serv *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 03b56bc3b65..465df9ae104 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net)
sunrpc_destroy_cache_detail(cd);
return ret;
}
+EXPORT_SYMBOL_GPL(cache_register_net);
int cache_register(struct cache_detail *cd)
{
@@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
remove_cache_proc_entries(cd, net);
sunrpc_destroy_cache_detail(cd);
}
+EXPORT_SYMBOL_GPL(cache_unregister_net);
void cache_unregister(struct cache_detail *cd)
{
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9d01d46b05f..e4aabc02368 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
fail_free:
kfree(m->to_pool);
+ m->to_pool = NULL;
fail:
return -ENOMEM;
}
@@ -285,9 +286,10 @@ svc_pool_map_put(void)
mutex_lock(&svc_pool_map_mutex);
if (!--m->count) {
- m->mode = SVC_POOL_DEFAULT;
kfree(m->to_pool);
+ m->to_pool = NULL;
kfree(m->pool_to);
+ m->pool_to = NULL;
m->npools = 0;
}
@@ -527,17 +529,20 @@ svc_destroy(struct svc_serv *serv)
printk("svc_destroy: no threads for serv=%p!\n", serv);
del_timer_sync(&serv->sv_temptimer);
-
- svc_close_all(&serv->sv_tempsocks);
+ /*
+ * The set of xprts (contained in the sv_tempsocks and
+ * sv_permsocks lists) is now constant, since it is modified
+ * only by accepting new sockets (done by service threads in
+ * svc_recv) or aging old ones (done by sv_temptimer), or
+ * configuration changes (excluded by whatever locking the
+ * caller is using--nfsd_mutex in the case of nfsd). So it's
+ * safe to traverse those lists and shut everything down:
+ */
+ svc_close_all(serv);
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
- svc_close_all(&serv->sv_permsocks);
-
- BUG_ON(!list_empty(&serv->sv_permsocks));
- BUG_ON(!list_empty(&serv->sv_tempsocks));
-
cache_clean_deferred(serv);
if (svc_serv_is_pooled(serv))
@@ -683,8 +688,8 @@ found_pool:
* Create or destroy enough new threads to make the number
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
- * all pools. Must be called with a svc_get() reference and
- * the BKL or another lock to protect access to svc_serv fields.
+ * all pools. Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
*
* Destroying threads relies on the service threads filling in
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 38649cfa4e8..74cb0d8e9ca 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
* idle connections after 5 minutes, servers after
@@ -147,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
* Called by transport drivers to initialize the transport independent
* portion of the transport instance.
*/
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
- struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+ struct svc_xprt *xprt, struct svc_serv *serv)
{
memset(xprt, 0, sizeof(*xprt));
xprt->xpt_class = xcl;
@@ -163,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
- xprt->xpt_net = get_net(&init_net);
+ xprt->xpt_net = get_net(net);
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
/*
* Remove a dead transport
*/
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
{
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
@@ -893,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
spin_lock_bh(&serv->sv_lock);
if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
list_del_init(&xprt->xpt_list);
- /*
- * The only time we're called while xpt_ready is still on a list
- * is while the list itself is about to be destroyed (in
- * svc_destroy). BUT svc_xprt_enqueue could still be attempting
- * to add new entries to the sp_sockets list, so we can't leave
- * a freed xprt on it.
- */
- list_del_init(&xprt->xpt_ready);
+ BUG_ON(!list_empty(&xprt->xpt_ready));
if (test_bit(XPT_TEMP, &xprt->xpt_flags))
serv->sv_tmpcnt--;
spin_unlock_bh(&serv->sv_lock);
@@ -928,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(svc_close_xprt);
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
+{
+ struct svc_xprt *xprt;
+
+ list_for_each_entry(xprt, xprt_list, xpt_list) {
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ set_bit(XPT_BUSY, &xprt->xpt_flags);
+ }
+}
+
+void svc_close_all(struct svc_serv *serv)
{
+ struct svc_pool *pool;
struct svc_xprt *xprt;
struct svc_xprt *tmp;
+ int i;
+
+ svc_close_list(&serv->sv_tempsocks);
+ svc_close_list(&serv->sv_permsocks);
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ pool = &serv->sv_pools[i];
+
+ spin_lock_bh(&pool->sp_lock);
+ while (!list_empty(&pool->sp_sockets)) {
+ xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready);
+ list_del_init(&xprt->xpt_ready);
+ }
+ spin_unlock_bh(&pool->sp_lock);
+ }
/*
- * The server is shutting down, and no more threads are running.
- * svc_xprt_enqueue() might still be running, but at worst it
- * will re-add the xprt to sp_sockets, which will soon get
- * freed. So we don't bother with any more locking, and don't
- * leave the close to the (nonexistent) server threads:
+ * At this point the sp_sockets lists will stay empty, since
+ * svc_enqueue will not add new entries without taking the
+ * sp_lock and checking XPT_BUSY.
*/
- list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)
svc_delete_xprt(xprt);
- }
+ list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list)
+ svc_delete_xprt(xprt);
+
+ BUG_ON(!list_empty(&serv->sv_permsocks));
+ BUG_ON(!list_empty(&serv->sv_tempsocks));
}
/*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 4653286fcc9..464570906f8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
int err, level, optname, one = 1;
- svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+ svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+ &svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
struct sock *sk = svsk->sk_sk;
- svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+ svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+ &svsk->sk_xprt, serv);
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
return ERR_PTR(-ENOMEM);
xprt = &svsk->sk_xprt;
- svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+ svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
serv->sv_bc_xprt = xprt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ba1296d88de..894cb42db91 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
if (!cma_xprt)
return NULL;
- svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+ svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755
index 00000000000..06a399ac8b2
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+ echo "debugfs does not appear to be mounted!"
+ echo "Please mount debugfs and try again"
+ exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+ echo "$DEBUGDIR does not exist"
+ echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+ exit 1
+fi
+
+function help()
+{
+ echo "Usage $0 injection_type [count]"
+ echo ""
+ echo "Injection types are:"
+ ls $DEBUGDIR
+ exit 1
+}
+
+if [ $# == 0 ]; then
+ help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+ help
+elif [ $# != 2 ]; then
+ COUNT=0
+else
+ COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER