From 265d47e7115023df9e2b7a864b207b4738d9e18c Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 15 Nov 2011 15:04:00 -0800
Subject: slub: add taint flag outputting to debug paths

When we get corruption reports, it's useful to see if the kernel was
tainted, to rule out problems we can't do anything about.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index 7d2a996c307..60552d52f84 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 	va_end(args);
 	printk(KERN_ERR "========================================"
 			"=====================================\n");
-	printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
+	printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
 	printk(KERN_ERR "----------------------------------------"
 			"-------------------------------------\n\n");
 }
-- 
cgit v1.2.3


From 4c493a5a5c0bab6c434af2723328edd79c49aa0c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Fri, 11 Nov 2011 14:54:14 +0800
Subject: slub: add missed accounting

With per-cpu partial list, slab is added to partial list first and then moved
to node list. The __slab_free() code path for add/remove_partial is almost
deprecated(except for slub debug). But we forget to account add/remove_partial
when move per-cpu partial pages to node list, so the statistics for such events
are always 0. Add corresponding accounting.

This is against the patch "slub: use correct parameter to add a page to
partial list tail"

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index c3138233a6e..108ed03fb42 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
 			}
 
 			if (l != m) {
-				if (l == M_PARTIAL)
+				if (l == M_PARTIAL) {
 					remove_partial(n, page);
-				else
+					stat(s, FREE_REMOVE_PARTIAL);
+				} else {
 					add_partial(n, page,
 						DEACTIVATE_TO_TAIL);
+					stat(s, FREE_ADD_PARTIAL);
+				}
 
 				l = m;
 			}
-- 
cgit v1.2.3


From 73736e0387ba0e6d2b703407b4d26168d31516a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 13 Dec 2011 04:57:06 +0100
Subject: slub: fix a possible memleak in __slab_alloc()

Zhihua Che reported a possible memleak in slub allocator on
CONFIG_PREEMPT=y builds.

It is possible current thread migrates right before disabling irqs in
__slab_alloc(). We must check again c->freelist, and perform a normal
allocation instead of scratching c->freelist.

Many thanks to Zhihua Che for spotting this bug, introduced in 2.6.39

V2: Its also possible an IRQ freed one (or several) object(s) and
populated c->freelist, so its not a CONFIG_PREEMPT only problem.

Cc: <stable@vger.kernel.org>        [2.6.39+]
Reported-by: Zhihua Che <zhihua.che@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index 108ed03fb42..5e410a95aba 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2169,6 +2169,11 @@ redo:
 		goto new_slab;
 	}
 
+	/* must check again c->freelist in case of cpu migration or IRQ */
+	object = c->freelist;
+	if (object)
+		goto load_freelist;
+
 	stat(s, ALLOC_SLOWPATH);
 
 	do {
-- 
cgit v1.2.3


From 8f1e33daeda6cd89753f9e77d174805a6f21db09 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 23 Nov 2011 09:24:27 -0600
Subject: slub: Switch per cpu partial page support off for debugging

Eric saw an issue with accounting of slabs during validation. Its not
possible to determine accurately how many per cpu partial slabs exist at
any time so this switches off per cpu partial pages during debug.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index ed3334d9b6d..4056d29e661 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3028,7 +3028,9 @@ static int kmem_cache_open(struct kmem_cache *s,
 	 *    per node list when we run out of per cpu objects. We only fetch 50%
 	 *    to keep some capacity around for frees.
 	 */
-	if (s->size >= PAGE_SIZE)
+	if (kmem_cache_debug(s))
+		s->cpu_partial = 0;
+	else if (s->size >= PAGE_SIZE)
 		s->cpu_partial = 2;
 	else if (s->size >= 1024)
 		s->cpu_partial = 6;
-- 
cgit v1.2.3


From 213eeb9fd9d66c33109e2ace242df214dc3a653d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 11 Nov 2011 14:07:14 -0600
Subject: slub: Extract get_freelist from __slab_alloc

get_freelist retrieves free objects from the page freelist (put there by remote
frees) or deactivates a slab page if no more objects are available.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 57 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index 5e410a95aba..6dc79f8e6ce 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2126,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
 	return object;
 }
 
+/*
+ * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
+ * or deactivate the page.
+ *
+ * The page is still frozen if the return value is not NULL.
+ *
+ * If this function returns NULL then the page has been unfrozen.
+ */
+static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+{
+	struct page new;
+	unsigned long counters;
+	void *freelist;
+
+	do {
+		freelist = page->freelist;
+		counters = page->counters;
+		new.counters = counters;
+		VM_BUG_ON(!new.frozen);
+
+		new.inuse = page->objects;
+		new.frozen = freelist != NULL;
+
+	} while (!cmpxchg_double_slab(s, page,
+		freelist, counters,
+		NULL, new.counters,
+		"get_freelist"));
+
+	return freelist;
+}
+
 /*
  * Slow path. The lockless freelist is empty or we need to perform
  * debugging duties.
@@ -2147,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
 	void **object;
 	unsigned long flags;
-	struct page new;
-	unsigned long counters;
 
 	local_irq_save(flags);
 #ifdef CONFIG_PREEMPT
@@ -2176,29 +2205,7 @@ redo:
 
 	stat(s, ALLOC_SLOWPATH);
 
-	do {
-		object = c->page->freelist;
-		counters = c->page->counters;
-		new.counters = counters;
-		VM_BUG_ON(!new.frozen);
-
-		/*
-		 * If there is no object left then we use this loop to
-		 * deactivate the slab which is simple since no objects
-		 * are left in the slab and therefore we do not need to
-		 * put the page back onto the partial list.
-		 *
-		 * If there are objects left then we retrieve them
-		 * and use them to refill the per cpu queue.
-		 */
-
-		new.inuse = c->page->objects;
-		new.frozen = object != NULL;
-
-	} while (!__cmpxchg_double_slab(s, c->page,
-			object, counters,
-			NULL, new.counters,
-			"__slab_alloc"));
+	object = get_freelist(s, c->page);
 
 	if (!object) {
 		c->page = NULL;
-- 
cgit v1.2.3


From b13683d1cc14d1dd30b8e20f3ebea3f814ad029f Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Fri, 11 Nov 2011 14:54:14 +0800
Subject: slub: add missed accounting

With per-cpu partial list, slab is added to partial list first and then moved
to node list. The __slab_free() code path for add/remove_partial is almost
deprecated(except for slub debug). But we forget to account add/remove_partial
when move per-cpu partial pages to node list, so the statistics for such events
are always 0. Add corresponding accounting.

This is against the patch "slub: use correct parameter to add a page to
partial list tail"

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index 4056d29e661..8284a206f48 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s)
 			}
 
 			if (l != m) {
-				if (l == M_PARTIAL)
+				if (l == M_PARTIAL) {
 					remove_partial(n, page);
-				else
+					stat(s, FREE_REMOVE_PARTIAL);
+				} else {
 					add_partial(n, page,
 						DEACTIVATE_TO_TAIL);
+					stat(s, FREE_ADD_PARTIAL);
+				}
 
 				l = m;
 			}
-- 
cgit v1.2.3


From 74ee4ef1f901fbb014bdcdc9171d126490ce2b62 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 9 Jan 2012 13:19:45 -0800
Subject: slub: disallow changing cpu_partial from userspace for debug caches

For caches with debugging enabled, "slub: Switch per cpu partial page
support off for debugging" changes cpu_partial to 0.  It shouldn't be
tunable from userspace for such caches, otherwise the same accounting
issues arise during validation.

This patch disallows tuning /sys/kernel/slab/cache/cpu_partial to be non-
zero for caches with debugging enabled.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'mm/slub.c')

diff --git a/mm/slub.c b/mm/slub.c
index 6dc79f8e6ce..a47df0aa5d3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4649,6 +4649,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
 	err = strict_strtoul(buf, 10, &objects);
 	if (err)
 		return err;
+	if (objects && kmem_cache_debug(s))
+		return -EINVAL;
 
 	s->cpu_partial = objects;
 	flush_all(s);
-- 
cgit v1.2.3