From 265d47e7115023df9e2b7a864b207b4738d9e18c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 15 Nov 2011 15:04:00 -0800 Subject: slub: add taint flag outputting to debug paths When we get corruption reports, it's useful to see if the kernel was tainted, to rule out problems we can't do anything about. Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 7d2a996c307..60552d52f84 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -570,7 +570,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) va_end(args); printk(KERN_ERR "========================================" "=====================================\n"); - printk(KERN_ERR "BUG %s: %s\n", s->name, buf); + printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); printk(KERN_ERR "----------------------------------------" "-------------------------------------\n\n"); } -- cgit v1.2.3 From 4c493a5a5c0bab6c434af2723328edd79c49aa0c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 11 Nov 2011 14:54:14 +0800 Subject: slub: add missed accounting With per-cpu partial list, slab is added to partial list first and then moved to node list. The __slab_free() code path for add/remove_partial is almost deprecated(except for slub debug). But we forget to account add/remove_partial when move per-cpu partial pages to node list, so the statistics for such events are always 0. Add corresponding accounting. This is against the patch "slub: use correct parameter to add a page to partial list tail" Acked-by: Christoph Lameter Signed-off-by: Shaohua Li Signed-off-by: Pekka Enberg --- mm/slub.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index c3138233a6e..108ed03fb42 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s) } if (l != m) { - if (l == M_PARTIAL) + if (l == M_PARTIAL) { remove_partial(n, page); - else + stat(s, FREE_REMOVE_PARTIAL); + } else { add_partial(n, page, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } l = m; } -- cgit v1.2.3 From 73736e0387ba0e6d2b703407b4d26168d31516a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Dec 2011 04:57:06 +0100 Subject: slub: fix a possible memleak in __slab_alloc() Zhihua Che reported a possible memleak in slub allocator on CONFIG_PREEMPT=y builds. It is possible current thread migrates right before disabling irqs in __slab_alloc(). We must check again c->freelist, and perform a normal allocation instead of scratching c->freelist. Many thanks to Zhihua Che for spotting this bug, introduced in 2.6.39 V2: Its also possible an IRQ freed one (or several) object(s) and populated c->freelist, so its not a CONFIG_PREEMPT only problem. Cc: [2.6.39+] Reported-by: Zhihua Che Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 108ed03fb42..5e410a95aba 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2169,6 +2169,11 @@ redo: goto new_slab; } + /* must check again c->freelist in case of cpu migration or IRQ */ + object = c->freelist; + if (object) + goto load_freelist; + stat(s, ALLOC_SLOWPATH); do { -- cgit v1.2.3 From 8f1e33daeda6cd89753f9e77d174805a6f21db09 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 23 Nov 2011 09:24:27 -0600 Subject: slub: Switch per cpu partial page support off for debugging Eric saw an issue with accounting of slabs during validation. Its not possible to determine accurately how many per cpu partial slabs exist at any time so this switches off per cpu partial pages during debug. Acked-by: Eric Dumazet Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index ed3334d9b6d..4056d29e661 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3028,7 +3028,9 @@ static int kmem_cache_open(struct kmem_cache *s, * per node list when we run out of per cpu objects. We only fetch 50% * to keep some capacity around for frees. */ - if (s->size >= PAGE_SIZE) + if (kmem_cache_debug(s)) + s->cpu_partial = 0; + else if (s->size >= PAGE_SIZE) s->cpu_partial = 2; else if (s->size >= 1024) s->cpu_partial = 6; -- cgit v1.2.3 From 213eeb9fd9d66c33109e2ace242df214dc3a653d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 11 Nov 2011 14:07:14 -0600 Subject: slub: Extract get_freelist from __slab_alloc get_freelist retrieves free objects from the page freelist (put there by remote frees) or deactivates a slab page if no more objects are available. Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 57 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 5e410a95aba..6dc79f8e6ce 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2126,6 +2126,37 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, return object; } +/* + * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist + * or deactivate the page. + * + * The page is still frozen if the return value is not NULL. + * + * If this function returns NULL then the page has been unfrozen. + */ +static inline void *get_freelist(struct kmem_cache *s, struct page *page) +{ + struct page new; + unsigned long counters; + void *freelist; + + do { + freelist = page->freelist; + counters = page->counters; + new.counters = counters; + VM_BUG_ON(!new.frozen); + + new.inuse = page->objects; + new.frozen = freelist != NULL; + + } while (!cmpxchg_double_slab(s, page, + freelist, counters, + NULL, new.counters, + "get_freelist")); + + return freelist; +} + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -2147,8 +2178,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, { void **object; unsigned long flags; - struct page new; - unsigned long counters; local_irq_save(flags); #ifdef CONFIG_PREEMPT @@ -2176,29 +2205,7 @@ redo: stat(s, ALLOC_SLOWPATH); - do { - object = c->page->freelist; - counters = c->page->counters; - new.counters = counters; - VM_BUG_ON(!new.frozen); - - /* - * If there is no object left then we use this loop to - * deactivate the slab which is simple since no objects - * are left in the slab and therefore we do not need to - * put the page back onto the partial list. - * - * If there are objects left then we retrieve them - * and use them to refill the per cpu queue. - */ - - new.inuse = c->page->objects; - new.frozen = object != NULL; - - } while (!__cmpxchg_double_slab(s, c->page, - object, counters, - NULL, new.counters, - "__slab_alloc")); + object = get_freelist(s, c->page); if (!object) { c->page = NULL; -- cgit v1.2.3 From b13683d1cc14d1dd30b8e20f3ebea3f814ad029f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 11 Nov 2011 14:54:14 +0800 Subject: slub: add missed accounting With per-cpu partial list, slab is added to partial list first and then moved to node list. The __slab_free() code path for add/remove_partial is almost deprecated(except for slub debug). But we forget to account add/remove_partial when move per-cpu partial pages to node list, so the statistics for such events are always 0. Add corresponding accounting. This is against the patch "slub: use correct parameter to add a page to partial list tail" Acked-by: Christoph Lameter Signed-off-by: Shaohua Li Signed-off-by: Pekka Enberg --- mm/slub.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 4056d29e661..8284a206f48 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1901,11 +1901,14 @@ static void unfreeze_partials(struct kmem_cache *s) } if (l != m) { - if (l == M_PARTIAL) + if (l == M_PARTIAL) { remove_partial(n, page); - else + stat(s, FREE_REMOVE_PARTIAL); + } else { add_partial(n, page, DEACTIVATE_TO_TAIL); + stat(s, FREE_ADD_PARTIAL); + } l = m; } -- cgit v1.2.3 From 74ee4ef1f901fbb014bdcdc9171d126490ce2b62 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 9 Jan 2012 13:19:45 -0800 Subject: slub: disallow changing cpu_partial from userspace for debug caches For caches with debugging enabled, "slub: Switch per cpu partial page support off for debugging" changes cpu_partial to 0. It shouldn't be tunable from userspace for such caches, otherwise the same accounting issues arise during validation. This patch disallows tuning /sys/kernel/slab/cache/cpu_partial to be non- zero for caches with debugging enabled. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 6dc79f8e6ce..a47df0aa5d3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4649,6 +4649,8 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, err = strict_strtoul(buf, 10, &objects); if (err) return err; + if (objects && kmem_cache_debug(s)) + return -EINVAL; s->cpu_partial = objects; flush_all(s); -- cgit v1.2.3