diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 104 | 
1 files changed, 53 insertions, 51 deletions
| diff --git a/kernel/fork.c b/kernel/fork.c index e45354cc7cac..c4ada32598bd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -189,33 +189,33 @@ static inline void free_task_struct(struct task_struct *tsk)  	kmem_cache_free(task_struct_cachep, tsk);  } -/* - * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a - * kmemcache based allocator. - */ -# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) - -#  ifdef CONFIG_VMAP_STACK +#ifdef CONFIG_VMAP_STACK  /*   * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB   * flush.  Try to minimize the number of calls by caching stacks.   */  #define NR_CACHED_STACKS 2  static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); +/* + * Allocated stacks are cached and later reused by new threads, so memcg + * accounting is performed by the code assigning/releasing stacks to tasks. + * We need a zeroed memory without __GFP_ACCOUNT. + */ +#define GFP_VMAP_STACK (GFP_KERNEL | __GFP_ZERO)  struct vm_stack {  	struct rcu_head rcu;  	struct vm_struct *stack_vm_area;  }; -static bool try_release_thread_stack_to_cache(struct vm_struct *vm) +static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)  {  	unsigned int i;  	for (i = 0; i < NR_CACHED_STACKS; i++) {  		struct vm_struct *tmp = NULL; -		if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm)) +		if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area))  			return true;  	}  	return false; @@ -224,11 +224,12 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm)  static void thread_stack_free_rcu(struct rcu_head *rh)  {  	struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); +	struct vm_struct *vm_area = vm_stack->stack_vm_area;  	if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))  		return; -	vfree(vm_stack); +	vfree(vm_area->addr);  }  static void thread_stack_delayed_free(struct task_struct *tsk) @@ -241,32 +242,32 @@ static void thread_stack_delayed_free(struct task_struct *tsk)  static int free_vm_stack_cache(unsigned int cpu)  { -	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); +	struct vm_struct **cached_vm_stack_areas = per_cpu_ptr(cached_stacks, cpu);  	int i;  	for (i = 0; i < NR_CACHED_STACKS; i++) { -		struct vm_struct *vm_stack = cached_vm_stacks[i]; +		struct vm_struct *vm_area = cached_vm_stack_areas[i]; -		if (!vm_stack) +		if (!vm_area)  			continue; -		vfree(vm_stack->addr); -		cached_vm_stacks[i] = NULL; +		vfree(vm_area->addr); +		cached_vm_stack_areas[i] = NULL;  	}  	return 0;  } -static int memcg_charge_kernel_stack(struct vm_struct *vm) +static int memcg_charge_kernel_stack(struct vm_struct *vm_area)  {  	int i;  	int ret;  	int nr_charged = 0; -	BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); +	BUG_ON(vm_area->nr_pages != THREAD_SIZE / PAGE_SIZE);  	for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { -		ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); +		ret = memcg_kmem_charge_page(vm_area->pages[i], GFP_KERNEL, 0);  		if (ret)  			goto err;  		nr_charged++; @@ -274,55 +275,47 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm)  	return 0;  err:  	for (i = 0; i < nr_charged; i++) -		memcg_kmem_uncharge_page(vm->pages[i], 0); +		memcg_kmem_uncharge_page(vm_area->pages[i], 0);  	return ret;  }  static int alloc_thread_stack_node(struct task_struct *tsk, int node)  { -	struct vm_struct *vm; +	struct vm_struct *vm_area;  	void *stack;  	int i;  	for (i = 0; i < NR_CACHED_STACKS; i++) { -		struct vm_struct *s; - -		s = this_cpu_xchg(cached_stacks[i], NULL); - -		if (!s) +		vm_area = this_cpu_xchg(cached_stacks[i], NULL); +		if (!vm_area)  			continue;  		/* Reset stack metadata. */ -		kasan_unpoison_range(s->addr, THREAD_SIZE); +		kasan_unpoison_range(vm_area->addr, THREAD_SIZE); -		stack = kasan_reset_tag(s->addr); +		stack = kasan_reset_tag(vm_area->addr);  		/* Clear stale pointers from reused stack. */  		memset(stack, 0, THREAD_SIZE); -		if (memcg_charge_kernel_stack(s)) { -			vfree(s->addr); +		if (memcg_charge_kernel_stack(vm_area)) { +			vfree(vm_area->addr);  			return -ENOMEM;  		} -		tsk->stack_vm_area = s; +		tsk->stack_vm_area = vm_area;  		tsk->stack = stack;  		return 0;  	} -	/* -	 * Allocated stacks are cached and later reused by new threads, -	 * so memcg accounting is performed manually on assigning/releasing -	 * stacks to tasks. Drop __GFP_ACCOUNT. -	 */  	stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, -				     THREADINFO_GFP & ~__GFP_ACCOUNT, +				     GFP_VMAP_STACK,  				     node, __builtin_return_address(0));  	if (!stack)  		return -ENOMEM; -	vm = find_vm_area(stack); -	if (memcg_charge_kernel_stack(vm)) { +	vm_area = find_vm_area(stack); +	if (memcg_charge_kernel_stack(vm_area)) {  		vfree(stack);  		return -ENOMEM;  	} @@ -331,7 +324,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)  	 * free_thread_stack() can be called in interrupt context,  	 * so cache the vm_struct.  	 */ -	tsk->stack_vm_area = vm; +	tsk->stack_vm_area = vm_area;  	stack = kasan_reset_tag(stack);  	tsk->stack = stack;  	return 0; @@ -346,7 +339,13 @@ static void free_thread_stack(struct task_struct *tsk)  	tsk->stack_vm_area = NULL;  } -#  else /* !CONFIG_VMAP_STACK */ +#else /* !CONFIG_VMAP_STACK */ + +/* + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a + * kmemcache based allocator. + */ +#if THREAD_SIZE >= PAGE_SIZE  static void thread_stack_free_rcu(struct rcu_head *rh)  { @@ -378,8 +377,7 @@ static void free_thread_stack(struct task_struct *tsk)  	tsk->stack = NULL;  } -#  endif /* CONFIG_VMAP_STACK */ -# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */ +#else /* !(THREAD_SIZE >= PAGE_SIZE) */  static struct kmem_cache *thread_stack_cache; @@ -418,7 +416,8 @@ void thread_stack_cache_init(void)  	BUG_ON(thread_stack_cache == NULL);  } -# endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#endif /* THREAD_SIZE >= PAGE_SIZE */ +#endif /* CONFIG_VMAP_STACK */  /* SLAB cache for signal_struct structures (tsk->signal) */  static struct kmem_cache *signal_cachep; @@ -438,11 +437,11 @@ static struct kmem_cache *mm_cachep;  static void account_kernel_stack(struct task_struct *tsk, int account)  {  	if (IS_ENABLED(CONFIG_VMAP_STACK)) { -		struct vm_struct *vm = task_stack_vm_area(tsk); +		struct vm_struct *vm_area = task_stack_vm_area(tsk);  		int i;  		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) -			mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB, +			mod_lruvec_page_state(vm_area->pages[i], NR_KERNEL_STACK_KB,  					      account * (PAGE_SIZE / 1024));  	} else {  		void *stack = task_stack_page(tsk); @@ -458,12 +457,12 @@ void exit_task_stack_account(struct task_struct *tsk)  	account_kernel_stack(tsk, -1);  	if (IS_ENABLED(CONFIG_VMAP_STACK)) { -		struct vm_struct *vm; +		struct vm_struct *vm_area;  		int i; -		vm = task_stack_vm_area(tsk); +		vm_area = task_stack_vm_area(tsk);  		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) -			memcg_kmem_uncharge_page(vm->pages[i], 0); +			memcg_kmem_uncharge_page(vm_area->pages[i], 0);  	}  } @@ -586,9 +585,12 @@ static void check_mm(struct mm_struct *mm)  	for (i = 0; i < NR_MM_COUNTERS; i++) {  		long x = percpu_counter_sum(&mm->rss_stat[i]); -		if (unlikely(x)) -			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", -				 mm, resident_page_types[i], x); +		if (unlikely(x)) { +			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld Comm:%s Pid:%d\n", +				 mm, resident_page_types[i], x, +				 current->comm, +				 task_pid_nr(current)); +		}  	}  	if (mm_pgtables_bytes(mm)) | 
