summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c95
1 files changed, 63 insertions, 32 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..735405a9c5f3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -16,7 +16,6 @@
#include <linux/slab.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/mm.h>
-#include <linux/sched/coredump.h>
#include <linux/sched/user.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/stat.h>
@@ -105,6 +104,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
return false;
init_rwsem(&vma->vm_lock->lock);
- vma->vm_lock_seq = -1;
+ vma->vm_lock_seq = UINT_MAX;
return true;
}
@@ -621,6 +621,12 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
exe_file = get_mm_exe_file(oldmm);
RCU_INIT_POINTER(mm->exe_file, exe_file);
+ /*
+ * We depend on the oldmm having properly denied write access to the
+ * exe_file already.
+ */
+ if (exe_file && exe_file_deny_write_access(exe_file))
+ pr_warn_once("exe_file_deny_write_access() failed in %s\n", __func__);
}
#ifdef CONFIG_MMU
@@ -633,11 +639,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
LIST_HEAD(uf);
VMA_ITERATOR(vmi, mm, 0);
- uprobe_start_dup_mmap();
- if (mmap_write_lock_killable(oldmm)) {
- retval = -EINTR;
- goto fail_uprobe_end;
- }
+ if (mmap_write_lock_killable(oldmm))
+ return -EINTR;
flush_cache_dup_mm(oldmm);
uprobe_dup_mmap(oldmm, mm);
/*
@@ -653,11 +656,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
mm->exec_vm = oldmm->exec_vm;
mm->stack_vm = oldmm->stack_vm;
- retval = ksm_fork(mm, oldmm);
- if (retval)
- goto out;
- khugepaged_fork(mm, oldmm);
-
/* Use __mt_dup() to efficiently build an identical maple tree. */
retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
if (unlikely(retval))
@@ -760,7 +758,10 @@ loop_out:
vma_iter_free(&vmi);
if (!retval) {
mt_set_in_rcu(vmi.mas.tree);
- } else if (mpnt) {
+ ksm_fork(mm, oldmm);
+ khugepaged_fork(mm, oldmm);
+ } else {
+
/*
* The entire maple tree has already been duplicated. If the
* mmap duplication fails, mark the failure point with
@@ -768,16 +769,27 @@ loop_out:
* stop releasing VMAs that have not been duplicated after this
* point.
*/
- mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
- mas_store(&vmi.mas, XA_ZERO_ENTRY);
+ if (mpnt) {
+ mas_set_range(&vmi.mas, mpnt->vm_start, mpnt->vm_end - 1);
+ mas_store(&vmi.mas, XA_ZERO_ENTRY);
+ /* Avoid OOM iterating a broken tree */
+ set_bit(MMF_OOM_SKIP, &mm->flags);
+ }
+ /*
+ * The mm_struct is going to exit, but the locks will be dropped
+ * first. Set the mm_struct as unstable is advisable as it is
+ * not fully initialised.
+ */
+ set_bit(MMF_UNSTABLE, &mm->flags);
}
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
mmap_write_unlock(oldmm);
- dup_userfaultfd_complete(&uf);
-fail_uprobe_end:
- uprobe_end_dup_mmap();
+ if (!retval)
+ dup_userfaultfd_complete(&uf);
+ else
+ dup_userfaultfd_fail(&uf);
return retval;
fail_nomem_anon_vma_fork:
@@ -1184,7 +1196,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->active_memcg = NULL;
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
tsk->reported_split_lock = 0;
#endif
@@ -1261,9 +1273,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
seqcount_init(&mm->write_protect_seq);
mmap_init_lock(mm);
INIT_LIST_HEAD(&mm->mmlist);
-#ifdef CONFIG_PER_VMA_LOCK
- mm->mm_lock_seq = 0;
-#endif
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
@@ -1298,7 +1307,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (init_new_context(p, mm))
goto fail_nocontext;
- if (mm_alloc_cid(mm))
+ if (mm_alloc_cid(mm, p))
goto fail_cid;
if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
@@ -1413,11 +1422,20 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
*/
old_exe_file = rcu_dereference_raw(mm->exe_file);
- if (new_exe_file)
+ if (new_exe_file) {
+ /*
+ * We expect the caller (i.e., sys_execve) to already denied
+ * write access, so this is unlikely to fail.
+ */
+ if (unlikely(exe_file_deny_write_access(new_exe_file)))
+ return -EACCES;
get_file(new_exe_file);
+ }
rcu_assign_pointer(mm->exe_file, new_exe_file);
- if (old_exe_file)
+ if (old_exe_file) {
+ exe_file_allow_write_access(old_exe_file);
fput(old_exe_file);
+ }
return 0;
}
@@ -1456,6 +1474,9 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
return ret;
}
+ ret = exe_file_deny_write_access(new_exe_file);
+ if (ret)
+ return -EACCES;
get_file(new_exe_file);
/* set the new file */
@@ -1464,8 +1485,10 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
rcu_assign_pointer(mm->exe_file, new_exe_file);
mmap_write_unlock(mm);
- if (old_exe_file)
+ if (old_exe_file) {
+ exe_file_allow_write_access(old_exe_file);
fput(old_exe_file);
+ }
return 0;
}
@@ -1499,12 +1522,13 @@ struct file *get_task_exe_file(struct task_struct *task)
struct file *exe_file = NULL;
struct mm_struct *mm;
+ if (task->flags & PF_KTHREAD)
+ return NULL;
+
task_lock(task);
mm = task->mm;
- if (mm) {
- if (!(task->flags & PF_KTHREAD))
- exe_file = get_mm_exe_file(mm);
- }
+ if (mm)
+ exe_file = get_mm_exe_file(mm);
task_unlock(task);
return exe_file;
}
@@ -1545,8 +1569,9 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
return ERR_PTR(err);
mm = get_task_mm(task);
- if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
+ if (!mm) {
+ mm = ERR_PTR(-ESRCH);
+ } else if (mm != current->mm && !ptrace_may_access(task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
@@ -1671,9 +1696,11 @@ static struct mm_struct *dup_mm(struct task_struct *tsk,
if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
+ uprobe_start_dup_mmap();
err = dup_mmap(mm, oldmm);
if (err)
goto free_pt;
+ uprobe_end_dup_mmap();
mm->hiwater_rss = get_mm_rss(mm);
mm->hiwater_vm = mm->total_vm;
@@ -1688,6 +1715,8 @@ free_pt:
mm->binfmt = NULL;
mm_init_owner(mm, NULL);
mmput(mm);
+ if (err)
+ uprobe_end_dup_mmap();
fail_nomem:
return NULL;
@@ -1861,6 +1890,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
#ifdef CONFIG_POSIX_TIMERS
INIT_HLIST_HEAD(&sig->posix_timers);
+ INIT_HLIST_HEAD(&sig->ignored_posix_timers);
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sig->real_timer.function = it_real_fn;
#endif
@@ -2292,6 +2322,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);