From c5b29f885afe890f953f7f23424045cdad31d3e4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 16:55:22 +1000
Subject: sunrpc: use seconds since boot in expiry cache

This protects us from confusion when the wallclock time changes.

We convert to and from wallclock when  setting or reading expiry
times.

Also use seconds since boot for last_clost time.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b06410e584..8dc121955fd 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -42,7 +42,7 @@ static void cache_revisit_request(struct cache_head *item);
 
 static void cache_init(struct cache_head *h)
 {
-	time_t now = get_seconds();
+	time_t now = seconds_since_boot();
 	h->next = NULL;
 	h->flags = 0;
 	kref_init(&h->ref);
@@ -52,7 +52,7 @@ static void cache_init(struct cache_head *h)
 
 static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
-	return  (h->expiry_time < get_seconds()) ||
+	return  (h->expiry_time < seconds_since_boot()) ||
 		(detail->flush_time > h->last_refresh);
 }
 
@@ -127,7 +127,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 static void cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
 	head->expiry_time = expiry;
-	head->last_refresh = get_seconds();
+	head->last_refresh = seconds_since_boot();
 	set_bit(CACHE_VALID, &head->flags);
 }
 
@@ -238,7 +238,7 @@ int cache_check(struct cache_detail *detail,
 
 	/* now see if we want to start an upcall */
 	refresh_age = (h->expiry_time - h->last_refresh);
-	age = get_seconds() - h->last_refresh;
+	age = seconds_since_boot() - h->last_refresh;
 
 	if (rqstp == NULL) {
 		if (rv == -EAGAIN)
@@ -253,7 +253,7 @@ int cache_check(struct cache_detail *detail,
 				cache_revisit_request(h);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY);
+					cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
 					cache_fresh_unlocked(h, detail);
 					rv = -ENOENT;
 				}
@@ -388,11 +388,11 @@ static int cache_clean(void)
 			return -1;
 		}
 		current_detail = list_entry(next, struct cache_detail, others);
-		if (current_detail->nextcheck > get_seconds())
+		if (current_detail->nextcheck > seconds_since_boot())
 			current_index = current_detail->hash_size;
 		else {
 			current_index = 0;
-			current_detail->nextcheck = get_seconds()+30*60;
+			current_detail->nextcheck = seconds_since_boot()+30*60;
 		}
 	}
 
@@ -477,7 +477,7 @@ EXPORT_SYMBOL_GPL(cache_flush);
 void cache_purge(struct cache_detail *detail)
 {
 	detail->flush_time = LONG_MAX;
-	detail->nextcheck = get_seconds();
+	detail->nextcheck = seconds_since_boot();
 	cache_flush();
 	detail->flush_time = 1;
 }
@@ -902,7 +902,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 		filp->private_data = NULL;
 		kfree(rp);
 
-		cd->last_close = get_seconds();
+		cd->last_close = seconds_since_boot();
 		atomic_dec(&cd->readers);
 	}
 	module_put(cd->owner);
@@ -1034,7 +1034,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
 	int len;
 
 	if (atomic_read(&detail->readers) == 0 &&
-	    detail->last_close < get_seconds() - 30) {
+	    detail->last_close < seconds_since_boot() - 30) {
 			warn_no_listener(detail);
 			return -EINVAL;
 	}
@@ -1219,7 +1219,8 @@ static int c_show(struct seq_file *m, void *p)
 
 	ifdebug(CACHE)
 		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
-			   cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
+			   convert_to_wallclock(cp->expiry_time),
+			   atomic_read(&cp->ref.refcount), cp->flags);
 	cache_get(cp);
 	if (cache_check(cd, cp, NULL))
 		/* cache_check does a cache_put on failure */
@@ -1285,7 +1286,7 @@ static ssize_t read_flush(struct file *file, char __user *buf,
 	unsigned long p = *ppos;
 	size_t len;
 
-	sprintf(tbuf, "%lu\n", cd->flush_time);
+	sprintf(tbuf, "%lu\n", convert_to_wallclock(cd->flush_time));
 	len = strlen(tbuf);
 	if (p >= len)
 		return 0;
@@ -1303,19 +1304,20 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 			   struct cache_detail *cd)
 {
 	char tbuf[20];
-	char *ep;
-	long flushtime;
+	char *bp, *ep;
+
 	if (*ppos || count > sizeof(tbuf)-1)
 		return -EINVAL;
 	if (copy_from_user(tbuf, buf, count))
 		return -EFAULT;
 	tbuf[count] = 0;
-	flushtime = simple_strtoul(tbuf, &ep, 0);
+	simple_strtoul(tbuf, &ep, 0);
 	if (*ep && *ep != '\n')
 		return -EINVAL;
 
-	cd->flush_time = flushtime;
-	cd->nextcheck = get_seconds();
+	bp = tbuf;
+	cd->flush_time = get_expiry(&bp);
+	cd->nextcheck = seconds_since_boot();
 	cache_flush();
 
 	*ppos += count;
-- 
cgit v1.2.3


From f16b6e8d838b2e2bb4561201311c66ac02ad67df Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 17:04:06 +1000
Subject: sunrpc/cache: allow threads to block while waiting for cache update.

The current practice of waiting for cache updates by queueing the
whole request to be retried has (at least) two problems.

1/ With NFSv4, requests can be quite complex and re-trying a whole
  request when a later part fails should only be a last-resort, not a
  normal practice.

2/ Large requests, and in particular any 'write' request, will not be
  queued by the current code and doing so would be undesirable.

In many cases only a very sort wait is needed before the cache gets
valid data.

So, providing the underlying transport permits it by setting
 ->thread_wait,
arrange to wait briefly for an upcall to be completed (as reflected in
the clearing of CACHE_PENDING).
If the short wait was not long enough and CACHE_PENDING is still set,
fall back on the old approach.

The 'thread_wait' value is set to 5 seconds when there are spare
threads, and 1 second when there are no spare threads.

These values are probably much higher than needed, but will ensure
some forward progress.

Note that as we only request an update for a non-valid item, and as
non-valid items are updated in place it is extremely unlikely that
cache_check will return -ETIMEDOUT.  Normally cache_defer_req will
sleep for a short while and then find that the item is_valid.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  3 +++
 net/sunrpc/cache.c           | 59 +++++++++++++++++++++++++++++++++++++++++++-
 net/sunrpc/svc_xprt.c        | 11 +++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index ece432b7f87..52a7d7224e9 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -125,6 +125,9 @@ struct cache_detail {
  */
 struct cache_req {
 	struct cache_deferred_req *(*defer)(struct cache_req *req);
+	int thread_wait;  /* How long (jiffies) we can block the
+			   * current thread to wait for updates.
+			   */
 };
 /* this must be embedded in a deferred_request that is being
  * delayed awaiting cache-fill
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8dc121955fd..2c5297f245b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -509,10 +509,22 @@ static LIST_HEAD(cache_defer_list);
 static struct list_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
+struct thread_deferred_req {
+	struct cache_deferred_req handle;
+	struct completion completion;
+};
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+	struct thread_deferred_req *dr =
+		container_of(dreq, struct thread_deferred_req, handle);
+	complete(&dr->completion);
+}
+
 static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
 	struct cache_deferred_req *dreq, *discard;
 	int hash = DFR_HASH(item);
+	struct thread_deferred_req sleeper;
 
 	if (cache_defer_cnt >= DFR_MAX) {
 		/* too much in the cache, randomly drop this one,
@@ -521,7 +533,15 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		if (net_random()&1)
 			return -ENOMEM;
 	}
-	dreq = req->defer(req);
+	if (req->thread_wait) {
+		dreq = &sleeper.handle;
+		sleeper.completion =
+			COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+		dreq->revisit = cache_restart_thread;
+	} else
+		dreq = req->defer(req);
+
+ retry:
 	if (dreq == NULL)
 		return -ENOMEM;
 
@@ -555,6 +575,43 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		cache_revisit_request(item);
 		return -EAGAIN;
 	}
+
+	if (dreq == &sleeper.handle) {
+		if (wait_for_completion_interruptible_timeout(
+			    &sleeper.completion, req->thread_wait) <= 0) {
+			/* The completion wasn't completed, so we need
+			 * to clean up
+			 */
+			spin_lock(&cache_defer_lock);
+			if (!list_empty(&sleeper.handle.hash)) {
+				list_del_init(&sleeper.handle.recent);
+				list_del_init(&sleeper.handle.hash);
+				cache_defer_cnt--;
+				spin_unlock(&cache_defer_lock);
+			} else {
+				/* cache_revisit_request already removed
+				 * this from the hash table, but hasn't
+				 * called ->revisit yet.  It will very soon
+				 * and we need to wait for it.
+				 */
+				spin_unlock(&cache_defer_lock);
+				wait_for_completion(&sleeper.completion);
+			}
+		}
+		if (test_bit(CACHE_PENDING, &item->flags)) {
+			/* item is still pending, try request
+			 * deferral
+			 */
+			dreq = req->defer(req);
+			goto retry;
+		}
+		/* only return success if we actually deferred the
+		 * request.  In this case we waited until it was
+		 * answered so no deferral has happened - rather
+		 * an answer already exists.
+		 */
+		return -EEXIST;
+	}
 	return 0;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cbc084939dd..8ff6840866f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -651,6 +651,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 	if (signalled() || kthread_should_stop())
 		return -EINTR;
 
+	/* Normally we will wait up to 5 seconds for any required
+	 * cache information to be provided.
+	 */
+	rqstp->rq_chandle.thread_wait = 5*HZ;
+
 	spin_lock_bh(&pool->sp_lock);
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
@@ -658,6 +663,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+
+		/* As there is a shortage of threads and this request
+		 * had to be queue, don't allow the thread to wait so
+		 * long for cache updates.
+		 */
+		rqstp->rq_chandle.thread_wait = 1*HZ;
 	} else {
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);
-- 
cgit v1.2.3


From 6610f720e9e8103c22d1f1ccf8fbb695550a571f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 26 Aug 2010 13:19:52 -0400
Subject: svcrpc: minor cache cleanup

Pull out some code into helper functions, fix a typo.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c    | 44 ++++++++++++++++++++++++--------------------
 net/sunrpc/svc_xprt.c |  2 +-
 2 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2c5297f245b..18e5e8e6f62 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -520,10 +520,26 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
 	complete(&dr->completion);
 }
 
+static void __unhash_deferred_req(struct cache_deferred_req *dreq)
+{
+	list_del_init(&dreq->recent);
+	list_del_init(&dreq->hash);
+	cache_defer_cnt--;
+}
+
+static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
+{
+	int hash = DFR_HASH(item);
+
+	list_add(&dreq->recent, &cache_defer_list);
+	if (cache_defer_hash[hash].next == NULL)
+		INIT_LIST_HEAD(&cache_defer_hash[hash]);
+	list_add(&dreq->hash, &cache_defer_hash[hash]);
+}
+
 static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
 	struct cache_deferred_req *dreq, *discard;
-	int hash = DFR_HASH(item);
 	struct thread_deferred_req sleeper;
 
 	if (cache_defer_cnt >= DFR_MAX) {
@@ -549,20 +565,14 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 
 	spin_lock(&cache_defer_lock);
 
-	list_add(&dreq->recent, &cache_defer_list);
-
-	if (cache_defer_hash[hash].next == NULL)
-		INIT_LIST_HEAD(&cache_defer_hash[hash]);
-	list_add(&dreq->hash, &cache_defer_hash[hash]);
+	__hash_deferred_req(dreq, item);
 
 	/* it is in, now maybe clean up */
 	discard = NULL;
 	if (++cache_defer_cnt > DFR_MAX) {
 		discard = list_entry(cache_defer_list.prev,
 				     struct cache_deferred_req, recent);
-		list_del_init(&discard->recent);
-		list_del_init(&discard->hash);
-		cache_defer_cnt--;
+		__unhash_deferred_req(discard);
 	}
 	spin_unlock(&cache_defer_lock);
 
@@ -584,9 +594,7 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 			 */
 			spin_lock(&cache_defer_lock);
 			if (!list_empty(&sleeper.handle.hash)) {
-				list_del_init(&sleeper.handle.recent);
-				list_del_init(&sleeper.handle.hash);
-				cache_defer_cnt--;
+				__unhash_deferred_req(&sleeper.handle);
 				spin_unlock(&cache_defer_lock);
 			} else {
 				/* cache_revisit_request already removed
@@ -632,9 +640,8 @@ static void cache_revisit_request(struct cache_head *item)
 			dreq = list_entry(lp, struct cache_deferred_req, hash);
 			lp = lp->next;
 			if (dreq->item == item) {
-				list_del_init(&dreq->hash);
-				list_move(&dreq->recent, &pending);
-				cache_defer_cnt--;
+				__unhash_deferred_req(dreq);
+				list_add(&dreq->recent, &pending);
 			}
 		}
 	}
@@ -657,11 +664,8 @@ void cache_clean_deferred(void *owner)
 	spin_lock(&cache_defer_lock);
 
 	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
-		if (dreq->owner == owner) {
-			list_del_init(&dreq->hash);
-			list_move(&dreq->recent, &pending);
-			cache_defer_cnt--;
-		}
+		if (dreq->owner == owner)
+			__unhash_deferred_req(dreq);
 	}
 	spin_unlock(&cache_defer_lock);
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8ff6840866f..95fc3e8c51d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -665,7 +665,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 
 		/* As there is a shortage of threads and this request
-		 * had to be queue, don't allow the thread to wait so
+		 * had to be queued, don't allow the thread to wait so
 		 * long for cache updates.
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;
-- 
cgit v1.2.3


From 3211af1119174fbe8b676422b74870cdd51d7314 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 26 Aug 2010 16:56:23 -0400
Subject: svcrpc: cache deferral cleanup

Attempt to make obvious the first-try-sleeping-then-try-deferral logic
by putting that logic into a top-level function that calls helpers.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 143 +++++++++++++++++++++++++++++------------------------
 1 file changed, 79 insertions(+), 64 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 18e5e8e6f62..da872f9fe1e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -509,17 +509,6 @@ static LIST_HEAD(cache_defer_list);
 static struct list_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
-struct thread_deferred_req {
-	struct cache_deferred_req handle;
-	struct completion completion;
-};
-static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
-{
-	struct thread_deferred_req *dr =
-		container_of(dreq, struct thread_deferred_req, handle);
-	complete(&dr->completion);
-}
-
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
 	list_del_init(&dreq->recent);
@@ -537,29 +526,9 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he
 	list_add(&dreq->hash, &cache_defer_hash[hash]);
 }
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
 {
-	struct cache_deferred_req *dreq, *discard;
-	struct thread_deferred_req sleeper;
-
-	if (cache_defer_cnt >= DFR_MAX) {
-		/* too much in the cache, randomly drop this one,
-		 * or continue and drop the oldest below
-		 */
-		if (net_random()&1)
-			return -ENOMEM;
-	}
-	if (req->thread_wait) {
-		dreq = &sleeper.handle;
-		sleeper.completion =
-			COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
-		dreq->revisit = cache_restart_thread;
-	} else
-		dreq = req->defer(req);
-
- retry:
-	if (dreq == NULL)
-		return -ENOMEM;
+	struct cache_deferred_req *discard;
 
 	dreq->item = item;
 
@@ -585,42 +554,88 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		cache_revisit_request(item);
 		return -EAGAIN;
 	}
+	return 0;
+}
 
-	if (dreq == &sleeper.handle) {
-		if (wait_for_completion_interruptible_timeout(
-			    &sleeper.completion, req->thread_wait) <= 0) {
-			/* The completion wasn't completed, so we need
-			 * to clean up
-			 */
-			spin_lock(&cache_defer_lock);
-			if (!list_empty(&sleeper.handle.hash)) {
-				__unhash_deferred_req(&sleeper.handle);
-				spin_unlock(&cache_defer_lock);
-			} else {
-				/* cache_revisit_request already removed
-				 * this from the hash table, but hasn't
-				 * called ->revisit yet.  It will very soon
-				 * and we need to wait for it.
-				 */
-				spin_unlock(&cache_defer_lock);
-				wait_for_completion(&sleeper.completion);
-			}
-		}
-		if (test_bit(CACHE_PENDING, &item->flags)) {
-			/* item is still pending, try request
-			 * deferral
+struct thread_deferred_req {
+	struct cache_deferred_req handle;
+	struct completion completion;
+};
+
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+	struct thread_deferred_req *dr =
+		container_of(dreq, struct thread_deferred_req, handle);
+	complete(&dr->completion);
+}
+
+static int cache_wait_req(struct cache_req *req, struct cache_head *item)
+{
+	struct thread_deferred_req sleeper;
+	struct cache_deferred_req *dreq = &sleeper.handle;
+	int ret;
+
+	sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+	dreq->revisit = cache_restart_thread;
+
+	ret = setup_deferral(dreq, item);
+	if (ret)
+		return ret;
+
+	if (wait_for_completion_interruptible_timeout(
+		    &sleeper.completion, req->thread_wait) <= 0) {
+		/* The completion wasn't completed, so we need
+		 * to clean up
+		 */
+		spin_lock(&cache_defer_lock);
+		if (!list_empty(&sleeper.handle.hash)) {
+			__unhash_deferred_req(&sleeper.handle);
+			spin_unlock(&cache_defer_lock);
+		} else {
+			/* cache_revisit_request already removed
+			 * this from the hash table, but hasn't
+			 * called ->revisit yet.  It will very soon
+			 * and we need to wait for it.
 			 */
-			dreq = req->defer(req);
-			goto retry;
+			spin_unlock(&cache_defer_lock);
+			wait_for_completion(&sleeper.completion);
 		}
-		/* only return success if we actually deferred the
-		 * request.  In this case we waited until it was
-		 * answered so no deferral has happened - rather
-		 * an answer already exists.
+	}
+	if (test_bit(CACHE_PENDING, &item->flags)) {
+		/* item is still pending, try request
+		 * deferral
 		 */
-		return -EEXIST;
+		return -ETIMEDOUT;
 	}
-	return 0;
+	/* only return success if we actually deferred the
+	 * request.  In this case we waited until it was
+	 * answered so no deferral has happened - rather
+	 * an answer already exists.
+	 */
+	return -EEXIST;
+}
+
+static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+	struct cache_deferred_req *dreq;
+	int ret;
+
+	if (cache_defer_cnt >= DFR_MAX) {
+		/* too much in the cache, randomly drop this one,
+		 * or continue and drop the oldest
+		 */
+		if (net_random()&1)
+			return -ENOMEM;
+	}
+	if (req->thread_wait) {
+		ret = cache_wait_req(req, item);
+		if (ret != -ETIMEDOUT)
+			return ret;
+	}
+	dreq = req->defer(req);
+	if (dreq == NULL)
+		return -ENOMEM;
+	return setup_deferral(dreq, item);
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v1.2.3


From 06497524589f2a7717da33969d541674e0a27da6 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sun, 19 Sep 2010 22:55:06 -0400
Subject: nfsd4: fix hang on fast-booting nfs servers

The last_close field of a cache_detail is initialized to zero, so the
condition

	detail->last_close < seconds_since_boot() - 30

may be false even for a cache that was never opened.

However, we want to immediately fail upcalls to caches that were never
opened: in the case of the auth_unix_gid cache, especially, which may
never be opened by mountd (if the --manage-gids option is not set), we
want to fail the upcall immediately.  Otherwise client requests will be
dropped unnecessarily on reboot.

Also document these conditions.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index da872f9fe1e..ca7c621cd97 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1091,6 +1091,23 @@ static void warn_no_listener(struct cache_detail *detail)
 	}
 }
 
+static bool cache_listeners_exist(struct cache_detail *detail)
+{
+	if (atomic_read(&detail->readers))
+		return true;
+	if (detail->last_close == 0)
+		/* This cache was never opened */
+		return false;
+	if (detail->last_close < seconds_since_boot() - 30)
+		/*
+		 * We allow for the possibility that someone might
+		 * restart a userspace daemon without restarting the
+		 * server; but after 30 seconds, we give up.
+		 */
+		 return false;
+	return true;
+}
+
 /*
  * register an upcall request to user-space and queue it up for read() by the
  * upcall daemon.
@@ -1109,10 +1126,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
 	char *bp;
 	int len;
 
-	if (atomic_read(&detail->readers) == 0 &&
-	    detail->last_close < seconds_since_boot() - 30) {
-			warn_no_listener(detail);
-			return -EINVAL;
+	if (!cache_listeners_exist(detail)) {
+		warn_no_listener(detail);
+		return -EINVAL;
 	}
 
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-- 
cgit v1.2.3


From 1117449276bb909b029ed0b9ba13f53e4784db9d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 12 Aug 2010 17:04:08 +1000
Subject: sunrpc/cache: change deferred-request hash table to use hlist.

Being a hash table, hlist is the best option.

There is currently some ugliness were we treat "->next == NULL" as
a special case to avoid having to initialise the whole array.
This change nicely gets rid of that case.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 +-
 net/sunrpc/cache.c           | 28 ++++++++++------------------
 2 files changed, 11 insertions(+), 19 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 52a7d7224e9..03496357f45 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -133,7 +133,7 @@ struct cache_req {
  * delayed awaiting cache-fill
  */
 struct cache_deferred_req {
-	struct list_head	hash;	/* on hash chain */
+	struct hlist_node	hash;	/* on hash chain */
 	struct list_head	recent; /* on fifo */
 	struct cache_head	*item;  /* cache item we wait on */
 	void			*owner; /* we might need to discard all defered requests
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ca7c621cd97..2a840519405 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -506,13 +506,13 @@ EXPORT_SYMBOL_GPL(cache_purge);
 
 static DEFINE_SPINLOCK(cache_defer_lock);
 static LIST_HEAD(cache_defer_list);
-static struct list_head cache_defer_hash[DFR_HASHSIZE];
+static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
 	list_del_init(&dreq->recent);
-	list_del_init(&dreq->hash);
+	hlist_del_init(&dreq->hash);
 	cache_defer_cnt--;
 }
 
@@ -521,9 +521,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he
 	int hash = DFR_HASH(item);
 
 	list_add(&dreq->recent, &cache_defer_list);
-	if (cache_defer_hash[hash].next == NULL)
-		INIT_LIST_HEAD(&cache_defer_hash[hash]);
-	list_add(&dreq->hash, &cache_defer_hash[hash]);
+	hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
 }
 
 static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
@@ -588,7 +586,7 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item)
 		 * to clean up
 		 */
 		spin_lock(&cache_defer_lock);
-		if (!list_empty(&sleeper.handle.hash)) {
+		if (!hlist_unhashed(&sleeper.handle.hash)) {
 			__unhash_deferred_req(&sleeper.handle);
 			spin_unlock(&cache_defer_lock);
 		} else {
@@ -642,24 +640,18 @@ static void cache_revisit_request(struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
 	struct list_head pending;
-
-	struct list_head *lp;
+	struct hlist_node *lp, *tmp;
 	int hash = DFR_HASH(item);
 
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
 
-	lp = cache_defer_hash[hash].next;
-	if (lp) {
-		while (lp != &cache_defer_hash[hash]) {
-			dreq = list_entry(lp, struct cache_deferred_req, hash);
-			lp = lp->next;
-			if (dreq->item == item) {
-				__unhash_deferred_req(dreq);
-				list_add(&dreq->recent, &pending);
-			}
+	hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+		if (dreq->item == item) {
+			__unhash_deferred_req(dreq);
+			list_add(&dreq->recent, &pending);
 		}
-	}
+
 	spin_unlock(&cache_defer_lock);
 
 	while (!list_empty(&pending)) {
-- 
cgit v1.2.3


From e7f483eabea8ef6d2b5ce1b74c8184cc06819f15 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Tue, 21 Sep 2010 09:40:25 +0300
Subject: sunrpc/cache: don't use custom hex_to_bin() converter

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2a840519405..ac2c6e6abe6 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1179,13 +1179,19 @@ int qword_get(char **bpp, char *dest, int bufsize)
 	if (bp[0] == '\\' && bp[1] == 'x') {
 		/* HEX STRING */
 		bp += 2;
-		while (isxdigit(bp[0]) && isxdigit(bp[1]) && len < bufsize) {
-			int byte = isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
-			bp++;
-			byte <<= 4;
-			byte |= isdigit(*bp) ? *bp-'0' : toupper(*bp)-'A'+10;
-			*dest++ = byte;
-			bp++;
+		while (len < bufsize) {
+			int h, l;
+
+			h = hex_to_bin(bp[0]);
+			if (h < 0)
+				break;
+
+			l = hex_to_bin(bp[1]);
+			if (l < 0)
+				break;
+
+			*dest++ = (h << 4) | l;
+			bp += 2;
 			len++;
 		}
 	} else {
-- 
cgit v1.2.3


From e95dffa4304186ad87963255f3e5e96b5c41849f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 22 Sep 2010 12:55:06 +1000
Subject: sunrpc/cache: fix recent breakage of cache_clean_deferred

commit 6610f720e9e8103c22d1f1ccf8fbb695550a571f
broke cache_clean_deferred as entries are no longer added to the
pending list for subsequent revisiting.

So put those requests back on the pending list.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ac2c6e6abe6..ff733dfef3b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -671,8 +671,10 @@ void cache_clean_deferred(void *owner)
 	spin_lock(&cache_defer_lock);
 
 	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
-		if (dreq->owner == owner)
+		if (dreq->owner == owner) {
 			__unhash_deferred_req(dreq);
+			list_add(&dreq->recent, &pending);
+		}
 	}
 	spin_unlock(&cache_defer_lock);
 
-- 
cgit v1.2.3


From 593ce16b943ea37d4ec62c377b32d7f3f4085e84 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 14:00:15 +0400
Subject: sunrpc: Add routines that allow registering per-net caches

Existing calls do the same, but for the init_net.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 ++
 net/sunrpc/cache.c           | 27 +++++++++++++++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03496357f45..6950c981882 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -197,7 +197,9 @@ extern void cache_purge(struct cache_detail *detail);
 #define NEVER (0x7FFFFFFF)
 extern void __init cache_initialize(void);
 extern int cache_register(struct cache_detail *cd);
+extern int cache_register_net(struct cache_detail *cd, struct net *net);
 extern void cache_unregister(struct cache_detail *cd);
+extern void cache_unregister_net(struct cache_detail *cd, struct net *net);
 
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
 					mode_t, struct cache_detail *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ff733dfef3b..e84e7ddeecd 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,6 +34,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <net/net_namespace.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1537,7 +1538,7 @@ static const struct file_operations cache_flush_operations_procfs = {
 	.release	= release_flush_procfs,
 };
 
-static void remove_cache_proc_entries(struct cache_detail *cd)
+static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	if (cd->u.procfs.proc_ent == NULL)
 		return;
@@ -1552,7 +1553,7 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
 }
 
 #ifdef CONFIG_PROC_FS
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	struct proc_dir_entry *p;
 
@@ -1587,11 +1588,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
 	}
 	return 0;
 out_nomem:
-	remove_cache_proc_entries(cd);
+	remove_cache_proc_entries(cd, net);
 	return -ENOMEM;
 }
 #else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd)
+static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	return 0;
 }
@@ -1602,23 +1603,33 @@ void __init cache_initialize(void)
 	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
 }
 
-int cache_register(struct cache_detail *cd)
+int cache_register_net(struct cache_detail *cd, struct net *net)
 {
 	int ret;
 
 	sunrpc_init_cache_detail(cd);
-	ret = create_cache_proc_entries(cd);
+	ret = create_cache_proc_entries(cd, net);
 	if (ret)
 		sunrpc_destroy_cache_detail(cd);
 	return ret;
 }
+
+int cache_register(struct cache_detail *cd)
+{
+	return cache_register_net(cd, &init_net);
+}
 EXPORT_SYMBOL_GPL(cache_register);
 
-void cache_unregister(struct cache_detail *cd)
+void cache_unregister_net(struct cache_detail *cd, struct net *net)
 {
-	remove_cache_proc_entries(cd);
+	remove_cache_proc_entries(cd, net);
 	sunrpc_destroy_cache_detail(cd);
 }
+
+void cache_unregister(struct cache_detail *cd)
+{
+	cache_unregister_net(cd, &init_net);
+}
 EXPORT_SYMBOL_GPL(cache_unregister);
 
 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
-- 
cgit v1.2.3


From 4f42d0d53ca4737f82937edb0efc83564c124853 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 27 Sep 2010 14:01:58 +0400
Subject: sunrpc: Make the /proc/net/rpc appear in net namespaces

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/stats.h | 23 +++++++++++++++--------
 net/sunrpc/cache.c           | 11 ++++++++---
 net/sunrpc/netns.h           |  1 +
 net/sunrpc/stats.c           | 43 +++++++++++++++++++++++++------------------
 net/sunrpc/sunrpc_syms.c     | 16 ++++++++++------
 5 files changed, 59 insertions(+), 35 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index 5fa0f208430..680471d1f28 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -38,8 +38,21 @@ struct svc_stat {
 				rpcbadclnt;
 };
 
-void			rpc_proc_init(void);
-void			rpc_proc_exit(void);
+struct net;
+#ifdef CONFIG_PROC_FS
+int			rpc_proc_init(struct net *);
+void			rpc_proc_exit(struct net *);
+#else
+static inline int rpc_proc_init(struct net *net)
+{
+	return 0;
+}
+
+static inline void rpc_proc_exit(struct net *net)
+{
+}
+#endif
+
 #ifdef MODULE
 void			rpc_modcount(struct inode *, int);
 #endif
@@ -54,9 +67,6 @@ void			svc_proc_unregister(const char *);
 
 void			svc_seq_show(struct seq_file *,
 				     const struct svc_stat *);
-
-extern struct proc_dir_entry	*proc_net_rpc;
-
 #else
 
 static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; }
@@ -69,9 +79,6 @@ static inline void svc_proc_unregister(const char *p) {}
 
 static inline void svc_seq_show(struct seq_file *seq,
 				const struct svc_stat *st) {}
-
-#define proc_net_rpc NULL
-
 #endif
 
 #endif /* _LINUX_SUNRPC_STATS_H */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e84e7ddeecd..e20968aac68 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,7 +34,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-#include <net/net_namespace.h>
+#include "netns.h"
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1540,6 +1540,8 @@ static const struct file_operations cache_flush_operations_procfs = {
 
 static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
+	struct sunrpc_net *sn;
+
 	if (cd->u.procfs.proc_ent == NULL)
 		return;
 	if (cd->u.procfs.flush_ent)
@@ -1549,15 +1551,18 @@ static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	if (cd->u.procfs.content_ent)
 		remove_proc_entry("content", cd->u.procfs.proc_ent);
 	cd->u.procfs.proc_ent = NULL;
-	remove_proc_entry(cd->name, proc_net_rpc);
+	sn = net_generic(net, sunrpc_net_id);
+	remove_proc_entry(cd->name, sn->proc_net_rpc);
 }
 
 #ifdef CONFIG_PROC_FS
 static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 {
 	struct proc_dir_entry *p;
+	struct sunrpc_net *sn;
 
-	cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+	sn = net_generic(net, sunrpc_net_id);
+	cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
 	if (cd->u.procfs.proc_ent == NULL)
 		goto out_nomem;
 	cd->u.procfs.channel_ent = NULL;
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index b2d18af2815..e52ce897dde 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -5,6 +5,7 @@
 #include <net/netns/generic.h>
 
 struct sunrpc_net {
+	struct proc_dir_entry *proc_net_rpc;
 };
 
 extern int sunrpc_net_id;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index ea1046f3f9a..f71a73107ae 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -22,11 +22,10 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/metrics.h>
-#include <net/net_namespace.h>
 
-#define RPCDBG_FACILITY	RPCDBG_MISC
+#include "netns.h"
 
-struct proc_dir_entry	*proc_net_rpc = NULL;
+#define RPCDBG_FACILITY	RPCDBG_MISC
 
 /*
  * Get RPC client stats
@@ -218,10 +217,11 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats);
 static inline struct proc_dir_entry *
 do_register(const char *name, void *data, const struct file_operations *fops)
 {
-	rpc_proc_init();
-	dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
+	struct sunrpc_net *sn;
 
-	return proc_create_data(name, 0, proc_net_rpc, fops, data);
+	dprintk("RPC:       registering /proc/net/rpc/%s\n", name);
+	sn = net_generic(&init_net, sunrpc_net_id);
+	return proc_create_data(name, 0, sn->proc_net_rpc, fops, data);
 }
 
 struct proc_dir_entry *
@@ -234,7 +234,10 @@ EXPORT_SYMBOL_GPL(rpc_proc_register);
 void
 rpc_proc_unregister(const char *name)
 {
-	remove_proc_entry(name, proc_net_rpc);
+	struct sunrpc_net *sn;
+
+	sn = net_generic(&init_net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(rpc_proc_unregister);
 
@@ -248,25 +251,29 @@ EXPORT_SYMBOL_GPL(svc_proc_register);
 void
 svc_proc_unregister(const char *name)
 {
-	remove_proc_entry(name, proc_net_rpc);
+	struct sunrpc_net *sn;
+
+	sn = net_generic(&init_net, sunrpc_net_id);
+	remove_proc_entry(name, sn->proc_net_rpc);
 }
 EXPORT_SYMBOL_GPL(svc_proc_unregister);
 
-void
-rpc_proc_init(void)
+int rpc_proc_init(struct net *net)
 {
+	struct sunrpc_net *sn;
+
 	dprintk("RPC:       registering /proc/net/rpc\n");
-	if (!proc_net_rpc)
-		proc_net_rpc = proc_mkdir("rpc", init_net.proc_net);
+	sn = net_generic(net, sunrpc_net_id);
+	sn->proc_net_rpc = proc_mkdir("rpc", net->proc_net);
+	if (sn->proc_net_rpc == NULL)
+		return -ENOMEM;
+
+	return 0;
 }
 
-void
-rpc_proc_exit(void)
+void rpc_proc_exit(struct net *net)
 {
 	dprintk("RPC:       unregistering /proc/net/rpc\n");
-	if (proc_net_rpc) {
-		proc_net_rpc = NULL;
-		remove_proc_entry("rpc", init_net.proc_net);
-	}
+	remove_proc_entry("rpc", net->proc_net);
 }
 
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index faa23229bd2..c076af8535d 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -28,11 +28,21 @@ int sunrpc_net_id;
 
 static __net_init int sunrpc_init_net(struct net *net)
 {
+	int err;
+
+	err = rpc_proc_init(net);
+	if (err)
+		goto err_proc;
+
 	return 0;
+
+err_proc:
+	return err;
 }
 
 static __net_exit void sunrpc_exit_net(struct net *net)
 {
+	rpc_proc_exit(net);
 }
 
 static struct pernet_operations sunrpc_net_ops = {
@@ -66,9 +76,6 @@ init_sunrpc(void)
 		goto out4;
 #ifdef RPC_DEBUG
 	rpc_register_sysctl();
-#endif
-#ifdef CONFIG_PROC_FS
-	rpc_proc_init();
 #endif
 	cache_register(&ip_map_cache);
 	cache_register(&unix_gid_cache);
@@ -100,9 +107,6 @@ cleanup_sunrpc(void)
 	unregister_pernet_subsys(&sunrpc_net_ops);
 #ifdef RPC_DEBUG
 	rpc_unregister_sysctl();
-#endif
-#ifdef CONFIG_PROC_FS
-	rpc_proc_exit();
 #endif
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }
-- 
cgit v1.2.3


From 277f68dbba397997c7f3dc843d14afa1654bb80e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 22 Sep 2010 12:55:06 +1000
Subject: sunrpc: fix race in new cache_wait code.

If we set up to wait for a cache item to be filled in, and then find
that it is no longer pending, it could be that some other thread is
in 'cache_revisit_request' and has moved our request to its 'pending' list.
So when our setup_deferral calls cache_revisit_request it will find nothing to
put on the pending list, and do nothing.

We then return from cache_wait_req, thus leaving the 'sleeper'
on-stack structure open to being corrupted by subsequent stack usage.

However that 'sleeper' could still be on the 'pending' list that the
other thread is looking at and so any corruption could cause it to behave badly.

To avoid this race we simply take the same path as if the
'wait_for_completion_interruptible_timeout' was interrupted and if the
sleeper is no longer on the list (which it won't be) we wait on the
completion - which will ensure that any other cache_revisit_request
will have let go of the sleeper.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e20968aac68..1e72cc95593 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -578,10 +578,9 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item)
 	dreq->revisit = cache_restart_thread;
 
 	ret = setup_deferral(dreq, item);
-	if (ret)
-		return ret;
 
-	if (wait_for_completion_interruptible_timeout(
+	if (ret ||
+	    wait_for_completion_interruptible_timeout(
 		    &sleeper.completion, req->thread_wait) <= 0) {
 		/* The completion wasn't completed, so we need
 		 * to clean up
-- 
cgit v1.2.3


From d29068c431599fa96729556846562eb18429092d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 7 Oct 2010 15:29:46 +1100
Subject: sunrpc: Simplify cache_defer_req and related functions.

The return value from cache_defer_req is somewhat confusing.
Various different error codes are returned, but the single caller is
only interested in success or failure.

In fact it can measure this success or failure itself by checking
CACHE_PENDING, which makes the point of the code more explicit.

So change cache_defer_req to return 'void' and test CACHE_PENDING
after it completes, to see if the request was actually deferred or
not.

Similarly setup_deferral and cache_wait_req don't need a return value,
so make them void and remove some code.

The call to cache_revisit_request (to guard against a race) is only
needed for the second call to setup_deferral, so move it out of
setup_deferral to after that second call.  With the first call the
race is handled differently (by explicitly calling
'wait_for_completion').

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 58 +++++++++++++++++++++---------------------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1e72cc95593..49115b107fb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -38,7 +38,7 @@
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item);
+static void cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
 static void cache_init(struct cache_head *h)
@@ -269,7 +269,8 @@ int cache_check(struct cache_detail *detail,
 	}
 
 	if (rv == -EAGAIN) {
-		if (cache_defer_req(rqstp, h) < 0) {
+		cache_defer_req(rqstp, h);
+		if (!test_bit(CACHE_PENDING, &h->flags)) {
 			/* Request is not deferred */
 			rv = cache_is_valid(detail, h);
 			if (rv == -EAGAIN)
@@ -525,7 +526,7 @@ static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_he
 	hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
 }
 
-static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
+static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
 {
 	struct cache_deferred_req *discard;
 
@@ -547,13 +548,6 @@ static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *it
 	if (discard)
 		/* there was one too many */
 		discard->revisit(discard, 1);
-
-	if (!test_bit(CACHE_PENDING, &item->flags)) {
-		/* must have just been validated... */
-		cache_revisit_request(item);
-		return -EAGAIN;
-	}
-	return 0;
 }
 
 struct thread_deferred_req {
@@ -568,18 +562,17 @@ static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
 	complete(&dr->completion);
 }
 
-static int cache_wait_req(struct cache_req *req, struct cache_head *item)
+static void cache_wait_req(struct cache_req *req, struct cache_head *item)
 {
 	struct thread_deferred_req sleeper;
 	struct cache_deferred_req *dreq = &sleeper.handle;
-	int ret;
 
 	sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
 	dreq->revisit = cache_restart_thread;
 
-	ret = setup_deferral(dreq, item);
+	setup_deferral(dreq, item);
 
-	if (ret ||
+	if (!test_bit(CACHE_PENDING, &item->flags) ||
 	    wait_for_completion_interruptible_timeout(
 		    &sleeper.completion, req->thread_wait) <= 0) {
 		/* The completion wasn't completed, so we need
@@ -599,41 +592,34 @@ static int cache_wait_req(struct cache_req *req, struct cache_head *item)
 			wait_for_completion(&sleeper.completion);
 		}
 	}
-	if (test_bit(CACHE_PENDING, &item->flags)) {
-		/* item is still pending, try request
-		 * deferral
-		 */
-		return -ETIMEDOUT;
-	}
-	/* only return success if we actually deferred the
-	 * request.  In this case we waited until it was
-	 * answered so no deferral has happened - rather
-	 * an answer already exists.
-	 */
-	return -EEXIST;
 }
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void cache_defer_req(struct cache_req *req, struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
-	int ret;
 
-	if (cache_defer_cnt >= DFR_MAX) {
+	if (cache_defer_cnt >= DFR_MAX)
 		/* too much in the cache, randomly drop this one,
 		 * or continue and drop the oldest
 		 */
 		if (net_random()&1)
-			return -ENOMEM;
-	}
+			return;
+
+
 	if (req->thread_wait) {
-		ret = cache_wait_req(req, item);
-		if (ret != -ETIMEDOUT)
-			return ret;
+		cache_wait_req(req, item);
+		if (!test_bit(CACHE_PENDING, &item->flags))
+			return;
 	}
 	dreq = req->defer(req);
 	if (dreq == NULL)
-		return -ENOMEM;
-	return setup_deferral(dreq, item);
+		return;
+	setup_deferral(dreq, item);
+	if (!test_bit(CACHE_PENDING, &item->flags))
+		/* Bit could have been cleared before we managed to
+		 * set up the deferral, so need to revisit just in case
+		 */
+		cache_revisit_request(item);
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v1.2.3


From e33534d54f1fde3e541f64fa5ad0dd379fc45fa7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 7 Oct 2010 15:29:46 +1100
Subject: sunrpc/cache: centralise handling of size limit on deferred list.

We limit the number of 'defer' requests to DFR_MAX.

The imposition of this limit is spread about a bit - sometime we don't
add new things to the list, sometimes we remove old things.

Also it is currently applied to requests which we are 'waiting' for
rather than 'deferring'.  This doesn't seem ideal as 'waiting'
requests are naturally limited by the number of threads.

So gather the DFR_MAX handling code to one place and only apply it to
requests that are actually being deferred.

This means that not all 'cache_deferred_req' structures go on the
'cache_defer_list, so we need to be careful when adding and removing
things.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 67 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 24 deletions(-)

(limited to 'net/sunrpc/cache.c')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 49115b107fb..ba61d0fa4b8 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -513,22 +513,25 @@ static int cache_defer_cnt;
 
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
-	list_del_init(&dreq->recent);
 	hlist_del_init(&dreq->hash);
-	cache_defer_cnt--;
+	if (!list_empty(&dreq->recent)) {
+		list_del_init(&dreq->recent);
+		cache_defer_cnt--;
+	}
 }
 
 static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
 {
 	int hash = DFR_HASH(item);
 
-	list_add(&dreq->recent, &cache_defer_list);
+	INIT_LIST_HEAD(&dreq->recent);
 	hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
 }
 
-static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
+static void setup_deferral(struct cache_deferred_req *dreq,
+			   struct cache_head *item,
+			   int count_me)
 {
-	struct cache_deferred_req *discard;
 
 	dreq->item = item;
 
@@ -536,18 +539,13 @@ static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *i
 
 	__hash_deferred_req(dreq, item);
 
-	/* it is in, now maybe clean up */
-	discard = NULL;
-	if (++cache_defer_cnt > DFR_MAX) {
-		discard = list_entry(cache_defer_list.prev,
-				     struct cache_deferred_req, recent);
-		__unhash_deferred_req(discard);
+	if (count_me) {
+		cache_defer_cnt++;
+		list_add(&dreq->recent, &cache_defer_list);
 	}
+
 	spin_unlock(&cache_defer_lock);
 
-	if (discard)
-		/* there was one too many */
-		discard->revisit(discard, 1);
 }
 
 struct thread_deferred_req {
@@ -570,7 +568,7 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
 	sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
 	dreq->revisit = cache_restart_thread;
 
-	setup_deferral(dreq, item);
+	setup_deferral(dreq, item, 0);
 
 	if (!test_bit(CACHE_PENDING, &item->flags) ||
 	    wait_for_completion_interruptible_timeout(
@@ -594,17 +592,36 @@ static void cache_wait_req(struct cache_req *req, struct cache_head *item)
 	}
 }
 
-static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void cache_limit_defers(void)
 {
-	struct cache_deferred_req *dreq;
+	/* Make sure we haven't exceed the limit of allowed deferred
+	 * requests.
+	 */
+	struct cache_deferred_req *discard = NULL;
 
-	if (cache_defer_cnt >= DFR_MAX)
-		/* too much in the cache, randomly drop this one,
-		 * or continue and drop the oldest
-		 */
-		if (net_random()&1)
-			return;
+	if (cache_defer_cnt <= DFR_MAX)
+		return;
+
+	spin_lock(&cache_defer_lock);
 
+	/* Consider removing either the first or the last */
+	if (cache_defer_cnt > DFR_MAX) {
+		if (net_random() & 1)
+			discard = list_entry(cache_defer_list.next,
+					     struct cache_deferred_req, recent);
+		else
+			discard = list_entry(cache_defer_list.prev,
+					     struct cache_deferred_req, recent);
+		__unhash_deferred_req(discard);
+	}
+	spin_unlock(&cache_defer_lock);
+	if (discard)
+		discard->revisit(discard, 1);
+}
+
+static void cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+	struct cache_deferred_req *dreq;
 
 	if (req->thread_wait) {
 		cache_wait_req(req, item);
@@ -614,12 +631,14 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item)
 	dreq = req->defer(req);
 	if (dreq == NULL)
 		return;
-	setup_deferral(dreq, item);
+	setup_deferral(dreq, item, 1);
 	if (!test_bit(CACHE_PENDING, &item->flags))
 		/* Bit could have been cleared before we managed to
 		 * set up the deferral, so need to revisit just in case
 		 */
 		cache_revisit_request(item);
+
+	cache_limit_defers();
 }
 
 static void cache_revisit_request(struct cache_head *item)
-- 
cgit v1.2.3