summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c15
-rw-r--r--kernel/bpf/inode.c4
-rw-r--r--kernel/bpf/sysfs_btf.c6
-rw-r--r--kernel/bpf/verifier.c8
-rw-r--r--kernel/entry/common.c8
-rw-r--r--kernel/events/core.c5
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/irq/chip.c27
-rw-r--r--kernel/irq/debugfs.c4
-rw-r--r--kernel/irq/irqdomain.c99
-rw-r--r--kernel/irq/pm.c34
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/resend.c15
-rw-r--r--kernel/irq/settings.h7
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/locking/lockdep.c35
-rw-r--r--kernel/locking/lockdep_internals.h2
-rw-r--r--kernel/locking/percpu-rwsem.c4
-rw-r--r--kernel/rcu/rcu.h2
-rw-r--r--kernel/rcu/tasks.h2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/rcu/update.c2
-rw-r--r--kernel/softirq.c1
-rw-r--r--kernel/stackleak.c2
-rw-r--r--kernel/stacktrace.c8
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/hrtimer.c4
-rw-r--r--kernel/time/timekeeping.c109
-rw-r--r--kernel/time/timer.c6
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace.c48
-rw-r--r--kernel/trace/trace_events_hist.c1
-rw-r--r--kernel/trace/trace_output.c12
-rw-r--r--kernel/trace/trace_preemptirq.c4
-rw-r--r--kernel/umh.c9
-rw-r--r--kernel/workqueue.c4
37 files changed, 432 insertions, 120 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 78dfff6a501b..7df28a45c66b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1622,7 +1622,6 @@ struct bpf_iter_seq_hash_map_info {
struct bpf_map *map;
struct bpf_htab *htab;
void *percpu_value_buf; // non-zero means percpu hash
- unsigned long flags;
u32 bucket_id;
u32 skip_elems;
};
@@ -1632,7 +1631,6 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
struct htab_elem *prev_elem)
{
const struct bpf_htab *htab = info->htab;
- unsigned long flags = info->flags;
u32 skip_elems = info->skip_elems;
u32 bucket_id = info->bucket_id;
struct hlist_nulls_head *head;
@@ -1656,19 +1654,18 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
/* not found, unlock and go to the next bucket */
b = &htab->buckets[bucket_id++];
- htab_unlock_bucket(htab, b, flags);
+ rcu_read_unlock();
skip_elems = 0;
}
for (i = bucket_id; i < htab->n_buckets; i++) {
b = &htab->buckets[i];
- flags = htab_lock_bucket(htab, b);
+ rcu_read_lock();
count = 0;
head = &b->head;
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
if (count >= skip_elems) {
- info->flags = flags;
info->bucket_id = i;
info->skip_elems = count;
return elem;
@@ -1676,7 +1673,7 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
count++;
}
- htab_unlock_bucket(htab, b, flags);
+ rcu_read_unlock();
skip_elems = 0;
}
@@ -1754,14 +1751,10 @@ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
{
- struct bpf_iter_seq_hash_map_info *info = seq->private;
-
if (!v)
(void)__bpf_hash_map_seq_show(seq, NULL);
else
- htab_unlock_bucket(info->htab,
- &info->htab->buckets[info->bucket_id],
- info->flags);
+ rcu_read_unlock();
}
static int bpf_iter_init_hash_map(void *priv_data,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index fb878ba3f22f..18f4969552ac 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -226,10 +226,12 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
else
prev_key = key;
+ rcu_read_lock();
if (map->ops->map_get_next_key(map, prev_key, key)) {
map_iter(m)->done = true;
- return NULL;
+ key = NULL;
}
+ rcu_read_unlock();
return key;
}
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 3b495773de5a..11b3380887fa 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -30,15 +30,15 @@ static struct kobject *btf_kobj;
static int __init btf_vmlinux_init(void)
{
- if (!__start_BTF)
+ bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
+
+ if (!__start_BTF || bin_attr_btf_vmlinux.size == 0)
return 0;
btf_kobj = kobject_create_and_add("btf", kernel_kobj);
if (!btf_kobj)
return -ENOMEM;
- bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
-
return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47e74f09fa37..fba52d9ec8fc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5667,8 +5667,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
bool src_known = tnum_subreg_is_const(src_reg->var_off);
bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
struct tnum var32_off = tnum_subreg(dst_reg->var_off);
- s32 smin_val = src_reg->smin_value;
- u32 umin_val = src_reg->umin_value;
+ s32 smin_val = src_reg->s32_min_value;
+ u32 umin_val = src_reg->u32_min_value;
/* Assuming scalar64_min_max_or will be called so it is safe
* to skip updating register for known case.
@@ -5691,8 +5691,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
/* ORing two positives gives a positive, so safe to
* cast result into s64.
*/
- dst_reg->s32_min_value = dst_reg->umin_value;
- dst_reg->s32_max_value = dst_reg->umax_value;
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
}
}
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 18683598edbc..145ab11b8318 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -60,13 +60,15 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
return ret;
}
+ /* Either of the above might have changed the syscall number */
+ syscall = syscall_get_nr(current, regs);
+
if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall);
syscall_enter_audit(regs, syscall);
- /* The above might have changed the syscall number */
- return ret ? : syscall_get_nr(current, regs);
+ return ret ? : syscall;
}
static __always_inline long
@@ -206,7 +208,7 @@ static inline bool report_single_step(unsigned long ti_work)
/*
* If TIF_SYSCALL_EMU is set, then the only reason to report is when
* TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
- * instruction has been already reported in syscall_enter_from_usermode().
+ * instruction has been already reported in syscall_enter_from_user_mode().
*/
#define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7ed5248f0445..e8bf92202542 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -99,7 +99,7 @@ static void remote_function(void *data)
* retry due to any failures in smp_call_function_single(), such as if the
* task_cpu() goes offline concurrently.
*
- * returns @func return value or -ESRCH when the process isn't running
+ * returns @func return value or -ESRCH or -ENXIO when the process isn't running
*/
static int
task_function_call(struct task_struct *p, remote_function_f func, void *info)
@@ -115,7 +115,8 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
for (;;) {
ret = smp_call_function_single(task_cpu(p), remote_function,
&data, 1);
- ret = !ret ? data.ret : -EAGAIN;
+ if (!ret)
+ ret = data.ret;
if (ret != -EAGAIN)
break;
diff --git a/kernel/fork.c b/kernel/fork.c
index 49677d668de4..da8d360fb032 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -589,7 +589,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
- retval = copy_page_range(mm, oldmm, mpnt);
+ retval = copy_page_range(mm, oldmm, mpnt, tmp);
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
@@ -1011,6 +1011,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm_pgtables_bytes_init(mm);
mm->map_count = 0;
mm->locked_vm = 0;
+ atomic_set(&mm->has_pinned, 0);
atomic64_set(&mm->pinned_vm, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 0ae308efa604..b9b9618e1aca 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -945,6 +945,33 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
}
/**
+ * handle_percpu_devid_fasteoi_ipi - Per CPU local IPI handler with per cpu
+ * dev ids
+ * @desc: the interrupt description structure for this irq
+ *
+ * The biggest difference with the IRQ version is that the interrupt is
+ * EOIed early, as the IPI could result in a context switch, and we need to
+ * make sure the IPI can fire again. We also assume that the arch code has
+ * registered an action. If not, we are positively doomed.
+ */
+void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irqaction *action = desc->action;
+ unsigned int irq = irq_desc_get_irq(desc);
+ irqreturn_t res;
+
+ __kstat_incr_irqs_this_cpu(desc);
+
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+
+ trace_irq_handler_entry(irq, action);
+ res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+ trace_irq_handler_exit(irq, action, res);
+}
+
+/**
* handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu
* dev ids
* @desc: the interrupt description structure for this irq
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index b95ff5d5f4bd..e4cff358b437 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -57,6 +57,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
+ BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND),
};
static void
@@ -125,6 +126,8 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET),
BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),
+
+ BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND),
};
static const struct irq_bit_descr irqdesc_states[] = {
@@ -136,6 +139,7 @@ static const struct irq_bit_descr irqdesc_states[] = {
BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID),
BIT_MASK_DESCR(_IRQ_IS_POLLED),
BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY),
+ BIT_MASK_DESCR(_IRQ_HIDDEN),
};
static const struct irq_bit_descr irqdesc_istates[] = {
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 76cd7ebd1178..cf8b374b892d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1136,6 +1136,17 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
return irq_data;
}
+static void __irq_domain_free_hierarchy(struct irq_data *irq_data)
+{
+ struct irq_data *tmp;
+
+ while (irq_data) {
+ tmp = irq_data;
+ irq_data = irq_data->parent_data;
+ kfree(tmp);
+ }
+}
+
static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *irq_data, *tmp;
@@ -1147,12 +1158,83 @@ static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs)
irq_data->parent_data = NULL;
irq_data->domain = NULL;
- while (tmp) {
- irq_data = tmp;
- tmp = tmp->parent_data;
- kfree(irq_data);
+ __irq_domain_free_hierarchy(tmp);
+ }
+}
+
+/**
+ * irq_domain_disconnect_hierarchy - Mark the first unused level of a hierarchy
+ * @domain: IRQ domain from which the hierarchy is to be disconnected
+ * @virq: IRQ number where the hierarchy is to be trimmed
+ *
+ * Marks the @virq level belonging to @domain as disconnected.
+ * Returns -EINVAL if @virq doesn't have a valid irq_data pointing
+ * to @domain.
+ *
+ * Its only use is to be able to trim levels of hierarchy that do not
+ * have any real meaning for this interrupt, and that the driver marks
+ * as such from its .alloc() callback.
+ */
+int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
+ unsigned int virq)
+{
+ struct irq_data *irqd;
+
+ irqd = irq_domain_get_irq_data(domain, virq);
+ if (!irqd)
+ return -EINVAL;
+
+ irqd->chip = ERR_PTR(-ENOTCONN);
+ return 0;
+}
+
+static int irq_domain_trim_hierarchy(unsigned int virq)
+{
+ struct irq_data *tail, *irqd, *irq_data;
+
+ irq_data = irq_get_irq_data(virq);
+ tail = NULL;
+
+ /* The first entry must have a valid irqchip */
+ if (!irq_data->chip || IS_ERR(irq_data->chip))
+ return -EINVAL;
+
+ /*
+ * Validate that the irq_data chain is sane in the presence of
+ * a hierarchy trimming marker.
+ */
+ for (irqd = irq_data->parent_data; irqd; irq_data = irqd, irqd = irqd->parent_data) {
+ /* Can't have a valid irqchip after a trim marker */
+ if (irqd->chip && tail)
+ return -EINVAL;
+
+ /* Can't have an empty irqchip before a trim marker */
+ if (!irqd->chip && !tail)
+ return -EINVAL;
+
+ if (IS_ERR(irqd->chip)) {
+ /* Only -ENOTCONN is a valid trim marker */
+ if (PTR_ERR(irqd->chip) != -ENOTCONN)
+ return -EINVAL;
+
+ tail = irq_data;
}
}
+
+ /* No trim marker, nothing to do */
+ if (!tail)
+ return 0;
+
+ pr_info("IRQ%d: trimming hierarchy from %s\n",
+ virq, tail->parent_data->domain->name);
+
+ /* Sever the inner part of the hierarchy... */
+ irqd = tail;
+ tail = tail->parent_data;
+ irqd->parent_data = NULL;
+ __irq_domain_free_hierarchy(tail);
+
+ return 0;
}
static int irq_domain_alloc_irq_data(struct irq_domain *domain,
@@ -1362,6 +1444,15 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data;
}
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = irq_domain_trim_hierarchy(virq + i);
+ if (ret) {
+ mutex_unlock(&irq_domain_mutex);
+ goto out_free_irq_data;
+ }
+ }
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
mutex_unlock(&irq_domain_mutex);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index c6c7e187ae74..ce0adb22ee96 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -69,12 +69,26 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
static bool suspend_device_irq(struct irq_desc *desc)
{
+ unsigned long chipflags = irq_desc_get_chip(desc)->flags;
+ struct irq_data *irqd = &desc->irq_data;
+
if (!desc->action || irq_desc_is_chained(desc) ||
desc->no_suspend_depth)
return false;
- if (irqd_is_wakeup_set(&desc->irq_data)) {
- irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+ if (irqd_is_wakeup_set(irqd)) {
+ irqd_set(irqd, IRQD_WAKEUP_ARMED);
+
+ if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) &&
+ irqd_irq_disabled(irqd)) {
+ /*
+ * Interrupt marked for wakeup is in disabled state.
+ * Enable interrupt here to unmask/enable in irqchip
+ * to be able to resume with such interrupts.
+ */
+ __enable_irq(desc);
+ irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND);
+ }
/*
* We return true here to force the caller to issue
* synchronize_irq(). We need to make sure that the
@@ -93,7 +107,7 @@ static bool suspend_device_irq(struct irq_desc *desc)
* chip level. The chip implementation indicates that with
* IRQCHIP_MASK_ON_SUSPEND.
*/
- if (irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND)
+ if (chipflags & IRQCHIP_MASK_ON_SUSPEND)
mask_irq(desc);
return true;
}
@@ -137,7 +151,19 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
static void resume_irq(struct irq_desc *desc)
{
- irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+ struct irq_data *irqd = &desc->irq_data;
+
+ irqd_clear(irqd, IRQD_WAKEUP_ARMED);
+
+ if (irqd_is_enabled_on_suspend(irqd)) {
+ /*
+ * Interrupt marked for wakeup was enabled during suspend
+ * entry. Disable such interrupts to restore them back to
+ * original state.
+ */
+ __disable_irq(desc);
+ irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND);
+ }
if (desc->istate & IRQS_SUSPENDED)
goto resume;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 32c071d7bc03..72513ed2a5fc 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -485,7 +485,7 @@ int show_interrupts(struct seq_file *p, void *v)
rcu_read_lock();
desc = irq_to_desc(i);
- if (!desc)
+ if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
if (desc->kstat_irqs)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index c48ce19a257f..8ccd32a0cc80 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -86,6 +86,18 @@ static int irq_sw_resend(struct irq_desc *desc)
}
#endif
+static int try_retrigger(struct irq_desc *desc)
+{
+ if (desc->irq_data.chip->irq_retrigger)
+ return desc->irq_data.chip->irq_retrigger(&desc->irq_data);
+
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ return irq_chip_retrigger_hierarchy(&desc->irq_data);
+#else
+ return 0;
+#endif
+}
+
/*
* IRQ resend
*
@@ -113,8 +125,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject)
desc->istate &= ~IRQS_PENDING;
- if (!desc->irq_data.chip->irq_retrigger ||
- !desc->irq_data.chip->irq_retrigger(&desc->irq_data))
+ if (!try_retrigger(desc))
err = irq_sw_resend(desc);
/* If the retrigger was successfull, mark it with the REPLAY bit */
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index e43795cd2ccf..403378b9947b 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -17,6 +17,7 @@ enum {
_IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
_IRQ_IS_POLLED = IRQ_IS_POLLED,
_IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
+ _IRQ_HIDDEN = IRQ_HIDDEN,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -31,6 +32,7 @@ enum {
#define IRQ_PER_CPU_DEVID GOT_YOU_MORON
#define IRQ_IS_POLLED GOT_YOU_MORON
#define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
+#define IRQ_HIDDEN GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
@@ -167,3 +169,8 @@ static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc)
{
desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY;
}
+
+static inline bool irq_settings_is_hidden(struct irq_desc *desc)
+{
+ return desc->status_use_accessors & _IRQ_HIDDEN;
+}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 287b263c9cb9..e995541d277d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p)
lockdep_assert_held(&kprobe_mutex);
+ if (WARN_ON_ONCE(kprobe_gone(p)))
+ return;
+
p->flags |= KPROBE_FLAG_GONE;
if (kprobe_aggrprobe(p)) {
/*
@@ -2159,9 +2162,10 @@ static void kill_kprobe(struct kprobe *p)
/*
* The module is going away. We should disarm the kprobe which
- * is using ftrace.
+ * is using ftrace, because ftrace framework is still available at
+ * MODULE_STATE_GOING notification.
*/
- if (kprobe_ftrace(p))
+ if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
disarm_kprobe_ftrace(p);
}
@@ -2419,7 +2423,10 @@ static int kprobes_module_callback(struct notifier_block *nb,
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist) {
+ if (kprobe_gone(p))
+ continue;
+
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
@@ -2436,6 +2443,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
*/
kill_kprobe(p);
}
+ }
}
if (val == MODULE_STATE_GOING)
remove_module_kprobe_blacklist(mod);
@@ -2452,6 +2460,28 @@ static struct notifier_block kprobe_module_nb = {
extern unsigned long __start_kprobe_blacklist[];
extern unsigned long __stop_kprobe_blacklist[];
+void kprobe_free_init_mem(void)
+{
+ void *start = (void *)(&__init_begin);
+ void *end = (void *)(&__init_end);
+ struct hlist_head *head;
+ struct kprobe *p;
+ int i;
+
+ mutex_lock(&kprobe_mutex);
+
+ /* Kill all kprobes on initmem */
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ head = &kprobe_table[i];
+ hlist_for_each_entry(p, head, hlist) {
+ if (start <= (void *)p->addr && (void *)p->addr < end)
+ kill_kprobe(p);
+ }
+ }
+
+ mutex_unlock(&kprobe_mutex);
+}
+
static int __init init_kprobes(void)
{
int i, err = 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 54b74fabf40c..2facbbd146ec 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3969,13 +3969,18 @@ static int separate_irq_context(struct task_struct *curr,
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit)
{
- unsigned int new_mask = 1 << new_bit, ret = 1;
+ unsigned int old_mask, new_mask, ret = 1;
if (new_bit >= LOCK_USAGE_STATES) {
DEBUG_LOCKS_WARN_ON(1);
return 0;
}
+ if (new_bit == LOCK_USED && this->read)
+ new_bit = LOCK_USED_READ;
+
+ new_mask = 1 << new_bit;
+
/*
* If already set then do not dirty the cacheline,
* nor do any checks:
@@ -3988,13 +3993,22 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
/*
* Make sure we didn't race:
*/
- if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
- graph_unlock();
- return 1;
- }
+ if (unlikely(hlock_class(this)->usage_mask & new_mask))
+ goto unlock;
+ old_mask = hlock_class(this)->usage_mask;
hlock_class(this)->usage_mask |= new_mask;
+ /*
+ * Save one usage_traces[] entry and map both LOCK_USED and
+ * LOCK_USED_READ onto the same entry.
+ */
+ if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) {
+ if (old_mask & (LOCKF_USED | LOCKF_USED_READ))
+ goto unlock;
+ new_bit = LOCK_USED;
+ }
+
if (!(hlock_class(this)->usage_traces[new_bit] = save_trace()))
return 0;
@@ -4008,6 +4022,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
}
+unlock:
graph_unlock();
/*
@@ -4942,12 +4957,20 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock
{
#ifdef CONFIG_PROVE_LOCKING
struct lock_class *class = look_up_lock_class(lock, subclass);
+ unsigned long mask = LOCKF_USED;
/* if it doesn't have a class (yet), it certainly hasn't been used yet */
if (!class)
return;
- if (!(class->usage_mask & LOCK_USED))
+ /*
+ * READ locks only conflict with USED, such that if we only ever use
+ * READ locks, there is no deadlock possible -- RCU.
+ */
+ if (!hlock->read)
+ mask |= LOCKF_USED_READ;
+
+ if (!(class->usage_mask & mask))
return;
hlock->class_idx = class - lock_classes;
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index baca699b94e9..b0be1560ed17 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -19,6 +19,7 @@ enum lock_usage_bit {
#include "lockdep_states.h"
#undef LOCKDEP_STATE
LOCK_USED,
+ LOCK_USED_READ,
LOCK_USAGE_STATES
};
@@ -40,6 +41,7 @@ enum {
#include "lockdep_states.h"
#undef LOCKDEP_STATE
__LOCKF(USED)
+ __LOCKF(USED_READ)
};
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 8bbafe3e5203..70a32a576f3f 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
- __this_cpu_inc(*sem->read_count);
+ this_cpu_inc(*sem->read_count);
/*
* Due to having preemption disabled the decrement happens on
@@ -71,7 +71,7 @@ static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
if (likely(!atomic_read_acquire(&sem->block)))
return true;
- __this_cpu_dec(*sem->read_count);
+ this_cpu_dec(*sem->read_count);
/* Prod writer to re-evaluate readers_active_check() */
rcuwait_wake_up(&sem->writer);
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index cf66a3ccd757..e01cba5e4b52 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -167,7 +167,7 @@ static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
# define STATE_RCU_HEAD_READY 0
# define STATE_RCU_HEAD_QUEUED 1
-extern struct debug_obj_descr rcuhead_debug_descr;
+extern const struct debug_obj_descr rcuhead_debug_descr;
static inline int debug_rcu_head_queue(struct rcu_head *head)
{
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 835e2df8590a..05d3e1375e4c 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -590,7 +590,7 @@ void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
}
#else /* #ifdef CONFIG_TASKS_RCU */
-static void show_rcu_tasks_classic_gp_kthread(void) { }
+static inline void show_rcu_tasks_classic_gp_kthread(void) { }
void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8ce77d9ac716..f78ee759af9c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -673,6 +673,7 @@ void rcu_idle_enter(void)
lockdep_assert_irqs_disabled();
rcu_eqs_enter(false);
}
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
#ifdef CONFIG_NO_HZ_FULL
/**
@@ -886,6 +887,7 @@ void rcu_idle_exit(void)
rcu_eqs_exit(false);
local_irq_restore(flags);
}
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
#ifdef CONFIG_NO_HZ_FULL
/**
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 2de49b5d8dd2..3e0f4bcb558f 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -469,7 +469,7 @@ void destroy_rcu_head_on_stack(struct rcu_head *head)
}
EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
-struct debug_obj_descr rcuhead_debug_descr = {
+const struct debug_obj_descr rcuhead_debug_descr = {
.name = "rcu_head",
.is_static_object = rcuhead_is_static_object,
};
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bf88d7f62433..09229ad82209 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -481,6 +481,7 @@ void raise_softirq(unsigned int nr)
void __raise_softirq_irqoff(unsigned int nr)
{
+ lockdep_assert_irqs_disabled();
trace_softirq_raise(nr);
or_softirq_pending(1UL << nr);
}
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index a8fc9ae1d03d..ce161a8e8d97 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -20,7 +20,7 @@
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
int stack_erasing_sysctl(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = 0;
int state = !static_branch_unlikely(&stack_erasing_bypass);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 946f44a9e86a..9f8117c7cfdd 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -78,8 +78,7 @@ struct stacktrace_cookie {
unsigned int len;
};
-static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
- bool reliable)
+static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
{
struct stacktrace_cookie *c = cookie;
@@ -94,12 +93,11 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
return c->len < c->size;
}
-static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr,
- bool reliable)
+static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr)
{
if (in_sched_functions(addr))
return true;
- return stack_trace_consume_entry(cookie, addr, reliable);
+ return stack_trace_consume_entry(cookie, addr);
}
/**
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 09e70ee2332e..afad085960b8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2912,6 +2912,14 @@ static struct ctl_table vm_table[] = {
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
.extra1 = SYSCTL_ZERO,
},
+ {
+ .procname = "page_lock_unfairness",
+ .data = &sysctl_page_lock_unfairness,
+ .maxlen = sizeof(sysctl_page_lock_unfairness),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
#ifdef CONFIG_MMU
{
.procname = "max_map_count",
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index ca223a89530a..f4ace1bf8382 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -908,7 +908,7 @@ static int __init alarmtimer_init(void)
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real;
- alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64,
+ alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64;
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime;
alarm_bases[ALARM_BOOTTIME].get_timespec = get_boottime_timespec;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 95b6a708b040..3624b9b5835d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-static struct debug_obj_descr hrtimer_debug_descr;
+static const struct debug_obj_descr hrtimer_debug_descr;
static void *hrtimer_debug_hint(void *addr)
{
@@ -401,7 +401,7 @@ static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
}
}
-static struct debug_obj_descr hrtimer_debug_descr = {
+static const struct debug_obj_descr hrtimer_debug_descr = {
.name = "hrtimer",
.debug_hint = hrtimer_debug_hint,
.fixup_init = hrtimer_fixup_init,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c47f388a83f..ba7657685e22 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -54,6 +54,9 @@ static struct {
static struct timekeeper shadow_timekeeper;
+/* flag for if timekeeping is suspended */
+int __read_mostly timekeeping_suspended;
+
/**
* struct tk_fast - NMI safe timekeeper
* @seq: Sequence counter for protecting updates. The lowest bit
@@ -73,28 +76,42 @@ static u64 cycles_at_suspend;
static u64 dummy_clock_read(struct clocksource *cs)
{
- return cycles_at_suspend;
+ if (timekeeping_suspended)
+ return cycles_at_suspend;
+ return local_clock();
}
static struct clocksource dummy_clock = {
.read = dummy_clock_read,
};
+/*
+ * Boot time initialization which allows local_clock() to be utilized
+ * during early boot when clocksources are not available. local_clock()
+ * returns nanoseconds already so no conversion is required, hence mult=1
+ * and shift=0. When the first proper clocksource is installed then
+ * the fast time keepers are updated with the correct values.
+ */
+#define FAST_TK_INIT \
+ { \
+ .clock = &dummy_clock, \
+ .mask = CLOCKSOURCE_MASK(64), \
+ .mult = 1, \
+ .shift = 0, \
+ }
+
static struct tk_fast tk_fast_mono ____cacheline_aligned = {
.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
- .base[0] = { .clock = &dummy_clock, },
- .base[1] = { .clock = &dummy_clock, },
+ .base[0] = FAST_TK_INIT,
+ .base[1] = FAST_TK_INIT,
};
static struct tk_fast tk_fast_raw ____cacheline_aligned = {
.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
- .base[0] = { .clock = &dummy_clock, },
- .base[1] = { .clock = &dummy_clock, },
+ .base[0] = FAST_TK_INIT,
+ .base[1] = FAST_TK_INIT,
};
-/* flag for if timekeeping is suspended */
-int __read_mostly timekeeping_suspended;
-
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
@@ -513,29 +530,29 @@ u64 notrace ktime_get_boot_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
-
/*
* See comment for __ktime_get_fast_ns() vs. timestamp ordering
*/
-static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
+static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{
struct tk_read_base *tkr;
+ u64 basem, baser, delta;
unsigned int seq;
- u64 now;
do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
- now = ktime_to_ns(tkr->base_real);
+ basem = ktime_to_ns(tkr->base);
+ baser = ktime_to_ns(tkr->base_real);
- now += timekeeping_delta_to_ns(tkr,
- clocksource_delta(
- tk_clock_read(tkr),
- tkr->cycle_last,
- tkr->mask));
+ delta = timekeeping_delta_to_ns(tkr,
+ clocksource_delta(tk_clock_read(tkr),
+ tkr->cycle_last, tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
- return now;
+ if (mono)
+ *mono = basem + delta;
+ return baser + delta;
}
/**
@@ -543,11 +560,65 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
*/
u64 ktime_get_real_fast_ns(void)
{
- return __ktime_get_real_fast_ns(&tk_fast_mono);
+ return __ktime_get_real_fast(&tk_fast_mono, NULL);
}
EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
/**
+ * ktime_get_fast_timestamps: - NMI safe timestamps
+ * @snapshot: Pointer to timestamp storage
+ *
+ * Stores clock monotonic, boottime and realtime timestamps.
+ *
+ * Boot time is a racy access on 32bit systems if the sleep time injection
+ * happens late during resume and not in timekeeping_resume(). That could
+ * be avoided by expanding struct tk_read_base with boot offset for 32bit
+ * and adding more overhead to the update. As this is a hard to observe
+ * once per resume event which can be filtered with reasonable effort using
+ * the accurate mono/real timestamps, it's probably not worth the trouble.
+ *
+ * Aside of that it might be possible on 32 and 64 bit to observe the
+ * following when the sleep time injection happens late:
+ *
+ * CPU 0 CPU 1
+ * timekeeping_resume()
+ * ktime_get_fast_timestamps()
+ * mono, real = __ktime_get_real_fast()
+ * inject_sleep_time()
+ * update boot offset
+ * boot = mono + bootoffset;
+ *
+ * That means that boot time already has the sleep time adjustment, but
+ * real time does not. On the next readout both are in sync again.
+ *
+ * Preventing this for 64bit is not really feasible without destroying the
+ * careful cache layout of the timekeeper because the sequence count and
+ * struct tk_read_base would then need two cache lines instead of one.
+ *
+ * Access to the time keeper clock source is disabled accross the innermost
+ * steps of suspend/resume. The accessors still work, but the timestamps
+ * are frozen until time keeping is resumed which happens very early.
+ *
+ * For regular suspend/resume there is no observable difference vs. sched
+ * clock, but it might affect some of the nasty low level debug printks.
+ *
+ * OTOH, access to sched clock is not guaranteed accross suspend/resume on
+ * all systems either so it depends on the hardware in use.
+ *
+ * If that turns out to be a real problem then this could be mitigated by
+ * using sched clock in a similar way as during early boot. But it's not as
+ * trivial as on early boot because it needs some careful protection
+ * against the clock monotonic timestamp jumping backwards on resume.
+ */
+void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
+ snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
+}
+
+/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
*
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a50364df1054..dda05f4b7a1f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -611,7 +611,7 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-static struct debug_obj_descr timer_debug_descr;
+static const struct debug_obj_descr timer_debug_descr;
static void *timer_debug_hint(void *addr)
{
@@ -707,7 +707,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
}
}
-static struct debug_obj_descr timer_debug_descr = {
+static const struct debug_obj_descr timer_debug_descr = {
.name = "timer_list",
.debug_hint = timer_debug_hint,
.is_static_object = timer_is_static_object,
@@ -794,6 +794,8 @@ static void do_init_timer(struct timer_list *timer,
{
timer->entry.pprev = NULL;
timer->function = func;
+ if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
+ flags &= TIMER_INIT_FLAGS;
timer->flags = flags | raw_smp_processor_id();
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 275441254bb5..541453927c82 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2782,6 +2782,7 @@ static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops)
{
lockdep_assert_held(&ftrace_lock);
list_del_rcu(&ops->list);
+ synchronize_rcu();
}
/*
@@ -2862,6 +2863,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
__unregister_ftrace_function(ops);
ftrace_start_up--;
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+ if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+ ftrace_trampoline_free(ops);
return ret;
}
@@ -6990,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
{
int bit;
- if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching())
- return;
-
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;
preempt_disable_notrace();
- op->func(ip, parent_ip, op, regs);
+ if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
+ op->func(ip, parent_ip, op, regs);
preempt_enable_notrace();
trace_clear_recursion(bit);
@@ -7531,8 +7532,7 @@ static bool is_permanent_ops_registered(void)
int
ftrace_enable_sysctl(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret = -ENODEV;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f40d850ebabc..d3e5de717df2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3546,13 +3546,15 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
if (iter->ent && iter->ent != iter->temp) {
if ((!iter->temp || iter->temp_size < iter->ent_size) &&
!WARN_ON_ONCE(iter->temp == static_temp_buf)) {
- kfree(iter->temp);
- iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
- if (!iter->temp)
+ void *temp;
+ temp = kmalloc(iter->ent_size, GFP_KERNEL);
+ if (!temp)
return NULL;
+ kfree(iter->temp);
+ iter->temp = temp;
+ iter->temp_size = iter->ent_size;
}
memcpy(iter->temp, iter->ent, iter->ent_size);
- iter->temp_size = iter->ent_size;
iter->ent = iter->temp;
}
entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
@@ -3782,14 +3784,14 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
- "# | / _----=> need-resched \n"
- "# || / _---=> hardirq/softirq \n"
- "# ||| / _--=> preempt-depth \n"
- "# |||| / delay \n"
- "# cmd pid ||||| time | caller \n"
- "# \\ / ||||| \\ | / \n");
+ seq_puts(m, "# _------=> CPU# \n"
+ "# / _-----=> irqs-off \n"
+ "# | / _----=> need-resched \n"
+ "# || / _---=> hardirq/softirq \n"
+ "# ||| / _--=> preempt-depth \n"
+ "# |||| / delay \n"
+ "# cmd pid ||||| time | caller \n"
+ "# \\ / ||||| \\ | / \n");
}
static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -3810,26 +3812,26 @@ static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
print_event_info(buf, m);
- seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
- seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
+ seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
+ seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
}
static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
- const char *space = " ";
- int prec = tgid ? 10 : 2;
+ const char *space = " ";
+ int prec = tgid ? 12 : 2;
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
- seq_printf(m, "# %.*s||| / delay\n", prec, space);
- seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
- seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
+ seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
}
void
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 0b933546142e..1b2ef6490229 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3865,7 +3865,6 @@ static int parse_var_defs(struct hist_trigger_data *hist_data)
s = kstrdup(field_str, GFP_KERNEL);
if (!s) {
- kfree(hist_data->attrs->var_defs.name[n_vars]);
ret = -ENOMEM;
goto free;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 4d1893564912..000e9dc224c6 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -497,7 +497,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
trace_find_cmdline(entry->pid, comm);
- trace_seq_printf(s, "%8.8s-%-5d %3d",
+ trace_seq_printf(s, "%8.8s-%-7d %3d",
comm, entry->pid, cpu);
return trace_print_lat_fmt(s, entry);
@@ -588,15 +588,15 @@ int trace_print_context(struct trace_iterator *iter)
trace_find_cmdline(entry->pid, comm);
- trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+ trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid);
if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
unsigned int tgid = trace_find_tgid(entry->pid);
if (!tgid)
- trace_seq_printf(s, "(-----) ");
+ trace_seq_printf(s, "(-------) ");
else
- trace_seq_printf(s, "(%5d) ", tgid);
+ trace_seq_printf(s, "(%7d) ", tgid);
}
trace_seq_printf(s, "[%03d] ", iter->cpu);
@@ -636,7 +636,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
trace_find_cmdline(entry->pid, comm);
trace_seq_printf(
- s, "%16s %5d %3d %d %08x %08lx ",
+ s, "%16s %7d %3d %d %08x %08lx ",
comm, entry->pid, iter->cpu, entry->flags,
entry->preempt_count, iter->idx);
} else {
@@ -917,7 +917,7 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
S = task_index_to_char(field->prev_state);
trace_find_cmdline(field->next_pid, comm);
trace_seq_printf(&iter->seq,
- " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+ " %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n",
field->prev_pid,
field->prev_prio,
S, delim,
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index f10073e62603..f4938040c228 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -102,14 +102,14 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
+ lockdep_hardirqs_off(CALLER_ADDR0);
+
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, caller_addr);
if (!in_nmi())
trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
}
-
- lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
NOKPROBE_SYMBOL(trace_hardirqs_off_caller);
diff --git a/kernel/umh.c b/kernel/umh.c
index fcf3ee803630..3f646613a9d3 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -14,6 +14,7 @@
#include <linux/cred.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/fs_struct.h>
#include <linux/workqueue.h>
#include <linux/security.h>
#include <linux/mount.h>
@@ -72,6 +73,14 @@ static int call_usermodehelper_exec_async(void *data)
spin_unlock_irq(&current->sighand->siglock);
/*
+ * Initial kernel threads share ther FS with init, in order to
+ * get the init root directory. But we've now created a new
+ * thread that is going to execve a user process and has its own
+ * 'struct fs_struct'. Reset umask to the default.
+ */
+ current->fs->umask = 0022;
+
+ /*
* Our parent (unbound workqueue) runs with elevated scheduling
* priority. Avoid propagating that into the userspace child.
*/
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c41c3c17b86a..ac088ce6059b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -427,7 +427,7 @@ static void show_pwq(struct pool_workqueue *pwq);
#ifdef CONFIG_DEBUG_OBJECTS_WORK
-static struct debug_obj_descr work_debug_descr;
+static const struct debug_obj_descr work_debug_descr;
static void *work_debug_hint(void *addr)
{
@@ -477,7 +477,7 @@ static bool work_fixup_free(void *addr, enum debug_obj_state state)
}
}
-static struct debug_obj_descr work_debug_descr = {
+static const struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
.debug_hint = work_debug_hint,
.is_static_object = work_is_static_object,