diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 95 | 
1 files changed, 63 insertions, 32 deletions
| diff --git a/kernel/fork.c b/kernel/fork.c index 89ceb4a68af2..735405a9c5f3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -16,7 +16,6 @@  #include <linux/slab.h>  #include <linux/sched/autogroup.h>  #include <linux/sched/mm.h> -#include <linux/sched/coredump.h>  #include <linux/sched/user.h>  #include <linux/sched/numa_balancing.h>  #include <linux/sched/stat.h> @@ -105,6 +104,7 @@  #include <linux/rseq.h>  #include <uapi/linux/pidfd.h>  #include <linux/pidfs.h> +#include <linux/tick.h>  #include <asm/pgalloc.h>  #include <linux/uaccess.h> @@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)  		return false;  	init_rwsem(&vma->vm_lock->lock); -	vma->vm_lock_seq = -1; +	vma->vm_lock_seq = UINT_MAX;  	return true;  } @@ -621,6 +621,12 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)  	exe_file = get_mm_exe_file(oldmm);  	RCU_INIT_POINTER(mm->exe_file, exe_file); +	/* +	 * We depend on the oldmm having properly denied write access to the +	 * exe_file already. +	 */ +	if (exe_file && exe_file_deny_write_access(exe_file)) +		pr_warn_once("exe_file_deny_write_access() failed in %s\n", __func__);  }  #ifdef CONFIG_MMU @@ -633,11 +639,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,  	LIST_HEAD(uf);  	VMA_ITERATOR(vmi, mm, 0); -	uprobe_start_dup_mmap(); -	if (mmap_write_lock_killable(oldmm)) { -		retval = -EINTR; -		goto fail_uprobe_end; -	} +	if (mmap_write_lock_killable(oldmm)) +		return -EINTR;  	flush_cache_dup_mm(oldmm);  	uprobe_dup_mmap(oldmm, mm);  	/* @@ -653,11 +656,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,  	mm->exec_vm = oldmm->exec_vm;  	mm->stack_vm = oldmm->stack_vm; -	retval = ksm_fork(mm, oldmm); -	if (retval) -		goto out; -	khugepaged_fork(mm, oldmm); -  	/* Use __mt_dup() to efficiently build an identical maple tree. */  	retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);  	if (unlikely(retval)) @@ -760,7 +758,10 @@ loop_out:  	vma_iter_free(&vmi);  	if (!retval) {  		mt_set_in_rcu(vmi.mas.tree); -	} else if (mpnt) { +		ksm_fork(mm, oldmm); +		khugepaged_fork(mm, oldmm); +	} else { +  		/*  		 * The entire maple tree has already been duplicated. If the  		 * mmap duplication fails, mark the failure point with @@ -768,16 +769,27 @@ loop_out:  		 * stop releasing VMAs that have not been duplicated after this  		 * point.  		 */ -		mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); -		mas_store(&vmi.mas, XA_ZERO_ENTRY); +		if (mpnt) { +			mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1); +			mas_store(&vmi.mas, XA_ZERO_ENTRY); +			/* Avoid OOM iterating a broken tree */ +			set_bit(MMF_OOM_SKIP, &mm->flags); +		} +		/* +		 * The mm_struct is going to exit, but the locks will be dropped +		 * first.  Set the mm_struct as unstable is advisable as it is +		 * not fully initialised. +		 */ +		set_bit(MMF_UNSTABLE, &mm->flags);  	}  out:  	mmap_write_unlock(mm);  	flush_tlb_mm(oldmm);  	mmap_write_unlock(oldmm); -	dup_userfaultfd_complete(&uf); -fail_uprobe_end: -	uprobe_end_dup_mmap(); +	if (!retval) +		dup_userfaultfd_complete(&uf); +	else +		dup_userfaultfd_fail(&uf);  	return retval;  fail_nomem_anon_vma_fork: @@ -1184,7 +1196,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)  	tsk->active_memcg = NULL;  #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT  	tsk->reported_split_lock = 0;  #endif @@ -1261,9 +1273,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,  	seqcount_init(&mm->write_protect_seq);  	mmap_init_lock(mm);  	INIT_LIST_HEAD(&mm->mmlist); -#ifdef CONFIG_PER_VMA_LOCK -	mm->mm_lock_seq = 0; -#endif  	mm_pgtables_bytes_init(mm);  	mm->map_count = 0;  	mm->locked_vm = 0; @@ -1298,7 +1307,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,  	if (init_new_context(p, mm))  		goto fail_nocontext; -	if (mm_alloc_cid(mm)) +	if (mm_alloc_cid(mm, p))  		goto fail_cid;  	if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, @@ -1413,11 +1422,20 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)  	 */  	old_exe_file = rcu_dereference_raw(mm->exe_file); -	if (new_exe_file) +	if (new_exe_file) { +		/* +		 * We expect the caller (i.e., sys_execve) to already denied +		 * write access, so this is unlikely to fail. +		 */ +		if (unlikely(exe_file_deny_write_access(new_exe_file))) +			return -EACCES;  		get_file(new_exe_file); +	}  	rcu_assign_pointer(mm->exe_file, new_exe_file); -	if (old_exe_file) +	if (old_exe_file) { +		exe_file_allow_write_access(old_exe_file);  		fput(old_exe_file); +	}  	return 0;  } @@ -1456,6 +1474,9 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)  			return ret;  	} +	ret = exe_file_deny_write_access(new_exe_file); +	if (ret) +		return -EACCES;  	get_file(new_exe_file);  	/* set the new file */ @@ -1464,8 +1485,10 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)  	rcu_assign_pointer(mm->exe_file, new_exe_file);  	mmap_write_unlock(mm); -	if (old_exe_file) +	if (old_exe_file) { +		exe_file_allow_write_access(old_exe_file);  		fput(old_exe_file); +	}  	return 0;  } @@ -1499,12 +1522,13 @@ struct file *get_task_exe_file(struct task_struct *task)  	struct file *exe_file = NULL;  	struct mm_struct *mm; +	if (task->flags & PF_KTHREAD) +		return NULL; +  	task_lock(task);  	mm = task->mm; -	if (mm) { -		if (!(task->flags & PF_KTHREAD)) -			exe_file = get_mm_exe_file(mm); -	} +	if (mm) +		exe_file = get_mm_exe_file(mm);  	task_unlock(task);  	return exe_file;  } @@ -1545,8 +1569,9 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)  		return ERR_PTR(err);  	mm = get_task_mm(task); -	if (mm && mm != current->mm && -			!ptrace_may_access(task, mode)) { +	if (!mm) { +		mm = ERR_PTR(-ESRCH); +	} else if (mm != current->mm && !ptrace_may_access(task, mode)) {  		mmput(mm);  		mm = ERR_PTR(-EACCES);  	} @@ -1671,9 +1696,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,  	if (!mm_init(mm, tsk, mm->user_ns))  		goto fail_nomem; +	uprobe_start_dup_mmap();  	err = dup_mmap(mm, oldmm);  	if (err)  		goto free_pt; +	uprobe_end_dup_mmap();  	mm->hiwater_rss = get_mm_rss(mm);  	mm->hiwater_vm = mm->total_vm; @@ -1688,6 +1715,8 @@ free_pt:  	mm->binfmt = NULL;  	mm_init_owner(mm, NULL);  	mmput(mm); +	if (err) +		uprobe_end_dup_mmap();  fail_nomem:  	return NULL; @@ -1861,6 +1890,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)  #ifdef CONFIG_POSIX_TIMERS  	INIT_HLIST_HEAD(&sig->posix_timers); +	INIT_HLIST_HEAD(&sig->ignored_posix_timers);  	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);  	sig->real_timer.function = it_real_fn;  #endif @@ -2292,6 +2322,7 @@ __latent_entropy struct task_struct *copy_process(  	acct_clear_integrals(p);  	posix_cputimers_init(&p->posix_cputimers); +	tick_dep_init_task(p);  	p->io_context = NULL;  	audit_set_context(p, NULL); | 
