diff options
Diffstat (limited to 'kernel')
37 files changed, 432 insertions, 120 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 78dfff6a501b..7df28a45c66b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1622,7 +1622,6 @@ struct bpf_iter_seq_hash_map_info { struct bpf_map *map; struct bpf_htab *htab; void *percpu_value_buf; // non-zero means percpu hash - unsigned long flags; u32 bucket_id; u32 skip_elems; }; @@ -1632,7 +1631,6 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, struct htab_elem *prev_elem) { const struct bpf_htab *htab = info->htab; - unsigned long flags = info->flags; u32 skip_elems = info->skip_elems; u32 bucket_id = info->bucket_id; struct hlist_nulls_head *head; @@ -1656,19 +1654,18 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, /* not found, unlock and go to the next bucket */ b = &htab->buckets[bucket_id++]; - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } for (i = bucket_id; i < htab->n_buckets; i++) { b = &htab->buckets[i]; - flags = htab_lock_bucket(htab, b); + rcu_read_lock(); count = 0; head = &b->head; hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { if (count >= skip_elems) { - info->flags = flags; info->bucket_id = i; info->skip_elems = count; return elem; @@ -1676,7 +1673,7 @@ bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, count++; } - htab_unlock_bucket(htab, b, flags); + rcu_read_unlock(); skip_elems = 0; } @@ -1754,14 +1751,10 @@ static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) { - struct bpf_iter_seq_hash_map_info *info = seq->private; - if (!v) (void)__bpf_hash_map_seq_show(seq, NULL); else - htab_unlock_bucket(info->htab, - &info->htab->buckets[info->bucket_id], - info->flags); + rcu_read_unlock(); } static int bpf_iter_init_hash_map(void *priv_data, diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fb878ba3f22f..18f4969552ac 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -226,10 +226,12 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) else prev_key = key; + rcu_read_lock(); if (map->ops->map_get_next_key(map, prev_key, key)) { map_iter(m)->done = true; - return NULL; + key = NULL; } + rcu_read_unlock(); return key; } diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 3b495773de5a..11b3380887fa 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -30,15 +30,15 @@ static struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { - if (!__start_BTF) + bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; + + if (!__start_BTF || bin_attr_btf_vmlinux.size == 0) return 0; btf_kobj = kobject_create_and_add("btf", kernel_kobj); if (!btf_kobj) return -ENOMEM; - bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; - return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 47e74f09fa37..fba52d9ec8fc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5667,8 +5667,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->smin_value; - u32 umin_val = src_reg->umin_value; + s32 smin_val = src_reg->s32_min_value; + u32 umin_val = src_reg->u32_min_value; /* Assuming scalar64_min_max_or will be called so it is safe * to skip updating register for known case. @@ -5691,8 +5691,8 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, /* ORing two positives gives a positive, so safe to * cast result into s64. */ - dst_reg->s32_min_value = dst_reg->umin_value; - dst_reg->s32_max_value = dst_reg->umax_value; + dst_reg->s32_min_value = dst_reg->u32_min_value; + dst_reg->s32_max_value = dst_reg->u32_max_value; } } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 18683598edbc..145ab11b8318 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -60,13 +60,15 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret; } + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall); syscall_enter_audit(regs, syscall); - /* The above might have changed the syscall number */ - return ret ? : syscall_get_nr(current, regs); + return ret ? : syscall; } static __always_inline long @@ -206,7 +208,7 @@ static inline bool report_single_step(unsigned long ti_work) /* * If TIF_SYSCALL_EMU is set, then the only reason to report is when * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall - * instruction has been already reported in syscall_enter_from_usermode(). + * instruction has been already reported in syscall_enter_from_user_mode(). */ #define SYSEMU_STEP (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7ed5248f0445..e8bf92202542 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -99,7 +99,7 @@ static void remote_function(void *data) * retry due to any failures in smp_call_function_single(), such as if the * task_cpu() goes offline concurrently. * - * returns @func return value or -ESRCH when the process isn't running + * returns @func return value or -ESRCH or -ENXIO when the process isn't running */ static int task_function_call(struct task_struct *p, remote_function_f func, void *info) @@ -115,7 +115,8 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info) for (;;) { ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1); - ret = !ret ? data.ret : -EAGAIN; + if (!ret) + ret = data.ret; if (ret != -EAGAIN) break; diff --git a/kernel/fork.c b/kernel/fork.c index 49677d668de4..da8d360fb032 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -589,7 +589,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) - retval = copy_page_range(mm, oldmm, mpnt); + retval = copy_page_range(mm, oldmm, mpnt, tmp); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); @@ -1011,6 +1011,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; + atomic_set(&mm->has_pinned, 0); atomic64_set(&mm->pinned_vm, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 0ae308efa604..b9b9618e1aca 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -945,6 +945,33 @@ void handle_percpu_devid_irq(struct irq_desc *desc) } /** + * handle_percpu_devid_fasteoi_ipi - Per CPU local IPI handler with per cpu + * dev ids + * @desc: the interrupt description structure for this irq + * + * The biggest difference with the IRQ version is that the interrupt is + * EOIed early, as the IPI could result in a context switch, and we need to + * make sure the IPI can fire again. We also assume that the arch code has + * registered an action. If not, we are positively doomed. + */ +void handle_percpu_devid_fasteoi_ipi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + __kstat_incr_irqs_this_cpu(desc); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); +} + +/** * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu * dev ids * @desc: the interrupt description structure for this irq diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index b95ff5d5f4bd..e4cff358b437 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -57,6 +57,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), + BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), }; static void @@ -125,6 +126,8 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_DEFAULT_TRIGGER_SET), BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), + + BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), }; static const struct irq_bit_descr irqdesc_states[] = { @@ -136,6 +139,7 @@ static const struct irq_bit_descr irqdesc_states[] = { BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID), BIT_MASK_DESCR(_IRQ_IS_POLLED), BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY), + BIT_MASK_DESCR(_IRQ_HIDDEN), }; static const struct irq_bit_descr irqdesc_istates[] = { diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 76cd7ebd1178..cf8b374b892d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1136,6 +1136,17 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, return irq_data; } +static void __irq_domain_free_hierarchy(struct irq_data *irq_data) +{ + struct irq_data *tmp; + + while (irq_data) { + tmp = irq_data; + irq_data = irq_data->parent_data; + kfree(tmp); + } +} + static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs) { struct irq_data *irq_data, *tmp; @@ -1147,12 +1158,83 @@ static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs) irq_data->parent_data = NULL; irq_data->domain = NULL; - while (tmp) { - irq_data = tmp; - tmp = tmp->parent_data; - kfree(irq_data); + __irq_domain_free_hierarchy(tmp); + } +} + +/** + * irq_domain_disconnect_hierarchy - Mark the first unused level of a hierarchy + * @domain: IRQ domain from which the hierarchy is to be disconnected + * @virq: IRQ number where the hierarchy is to be trimmed + * + * Marks the @virq level belonging to @domain as disconnected. + * Returns -EINVAL if @virq doesn't have a valid irq_data pointing + * to @domain. + * + * Its only use is to be able to trim levels of hierarchy that do not + * have any real meaning for this interrupt, and that the driver marks + * as such from its .alloc() callback. + */ +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, + unsigned int virq) +{ + struct irq_data *irqd; + + irqd = irq_domain_get_irq_data(domain, virq); + if (!irqd) + return -EINVAL; + + irqd->chip = ERR_PTR(-ENOTCONN); + return 0; +} + +static int irq_domain_trim_hierarchy(unsigned int virq) +{ + struct irq_data *tail, *irqd, *irq_data; + + irq_data = irq_get_irq_data(virq); + tail = NULL; + + /* The first entry must have a valid irqchip */ + if (!irq_data->chip || IS_ERR(irq_data->chip)) + return -EINVAL; + + /* + * Validate that the irq_data chain is sane in the presence of + * a hierarchy trimming marker. + */ + for (irqd = irq_data->parent_data; irqd; irq_data = irqd, irqd = irqd->parent_data) { + /* Can't have a valid irqchip after a trim marker */ + if (irqd->chip && tail) + return -EINVAL; + + /* Can't have an empty irqchip before a trim marker */ + if (!irqd->chip && !tail) + return -EINVAL; + + if (IS_ERR(irqd->chip)) { + /* Only -ENOTCONN is a valid trim marker */ + if (PTR_ERR(irqd->chip) != -ENOTCONN) + return -EINVAL; + + tail = irq_data; } } + + /* No trim marker, nothing to do */ + if (!tail) + return 0; + + pr_info("IRQ%d: trimming hierarchy from %s\n", + virq, tail->parent_data->domain->name); + + /* Sever the inner part of the hierarchy... */ + irqd = tail; + tail = tail->parent_data; + irqd->parent_data = NULL; + __irq_domain_free_hierarchy(tail); + + return 0; } static int irq_domain_alloc_irq_data(struct irq_domain *domain, @@ -1362,6 +1444,15 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, mutex_unlock(&irq_domain_mutex); goto out_free_irq_data; } + + for (i = 0; i < nr_irqs; i++) { + ret = irq_domain_trim_hierarchy(virq + i); + if (ret) { + mutex_unlock(&irq_domain_mutex); + goto out_free_irq_data; + } + } + for (i = 0; i < nr_irqs; i++) irq_domain_insert_irq(virq + i); mutex_unlock(&irq_domain_mutex); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index c6c7e187ae74..ce0adb22ee96 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -69,12 +69,26 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) static bool suspend_device_irq(struct irq_desc *desc) { + unsigned long chipflags = irq_desc_get_chip(desc)->flags; + struct irq_data *irqd = &desc->irq_data; + if (!desc->action || irq_desc_is_chained(desc) || desc->no_suspend_depth) return false; - if (irqd_is_wakeup_set(&desc->irq_data)) { - irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + if (irqd_is_wakeup_set(irqd)) { + irqd_set(irqd, IRQD_WAKEUP_ARMED); + + if ((chipflags & IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND) && + irqd_irq_disabled(irqd)) { + /* + * Interrupt marked for wakeup is in disabled state. + * Enable interrupt here to unmask/enable in irqchip + * to be able to resume with such interrupts. + */ + __enable_irq(desc); + irqd_set(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); + } /* * We return true here to force the caller to issue * synchronize_irq(). We need to make sure that the @@ -93,7 +107,7 @@ static bool suspend_device_irq(struct irq_desc *desc) * chip level. The chip implementation indicates that with * IRQCHIP_MASK_ON_SUSPEND. */ - if (irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND) + if (chipflags & IRQCHIP_MASK_ON_SUSPEND) mask_irq(desc); return true; } @@ -137,7 +151,19 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs); static void resume_irq(struct irq_desc *desc) { - irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + struct irq_data *irqd = &desc->irq_data; + + irqd_clear(irqd, IRQD_WAKEUP_ARMED); + + if (irqd_is_enabled_on_suspend(irqd)) { + /* + * Interrupt marked for wakeup was enabled during suspend + * entry. Disable such interrupts to restore them back to + * original state. + */ + __disable_irq(desc); + irqd_clear(irqd, IRQD_IRQ_ENABLED_ON_SUSPEND); + } if (desc->istate & IRQS_SUSPENDED) goto resume; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 32c071d7bc03..72513ed2a5fc 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -485,7 +485,7 @@ int show_interrupts(struct seq_file *p, void *v) rcu_read_lock(); desc = irq_to_desc(i); - if (!desc) + if (!desc || irq_settings_is_hidden(desc)) goto outsparse; if (desc->kstat_irqs) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index c48ce19a257f..8ccd32a0cc80 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -86,6 +86,18 @@ static int irq_sw_resend(struct irq_desc *desc) } #endif +static int try_retrigger(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_retrigger) + return desc->irq_data.chip->irq_retrigger(&desc->irq_data); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irq_chip_retrigger_hierarchy(&desc->irq_data); +#else + return 0; +#endif +} + /* * IRQ resend * @@ -113,8 +125,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject) desc->istate &= ~IRQS_PENDING; - if (!desc->irq_data.chip->irq_retrigger || - !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) + if (!try_retrigger(desc)) err = irq_sw_resend(desc); /* If the retrigger was successfull, mark it with the REPLAY bit */ diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index e43795cd2ccf..403378b9947b 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h @@ -17,6 +17,7 @@ enum { _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, + _IRQ_HIDDEN = IRQ_HIDDEN, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; @@ -31,6 +32,7 @@ enum { #define IRQ_PER_CPU_DEVID GOT_YOU_MORON #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON +#define IRQ_HIDDEN GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON @@ -167,3 +169,8 @@ static inline void irq_settings_clr_disable_unlazy(struct irq_desc *desc) { desc->status_use_accessors &= ~_IRQ_DISABLE_UNLAZY; } + +static inline bool irq_settings_is_hidden(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_HIDDEN; +} diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..e995541d277d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p) lockdep_assert_held(&kprobe_mutex); + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2159,9 +2162,10 @@ static void kill_kprobe(struct kprobe *p) /* * The module is going away. We should disarm the kprobe which - * is using ftrace. + * is using ftrace, because ftrace framework is still available at + * MODULE_STATE_GOING notification. */ - if (kprobe_ftrace(p)) + if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) disarm_kprobe_ftrace(p); } @@ -2419,7 +2423,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry(p, head, hlist) + hlist_for_each_entry(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2436,6 +2443,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } if (val == MODULE_STATE_GOING) remove_module_kprobe_blacklist(mod); @@ -2452,6 +2460,28 @@ static struct notifier_block kprobe_module_nb = { extern unsigned long __start_kprobe_blacklist[]; extern unsigned long __stop_kprobe_blacklist[]; +void kprobe_free_init_mem(void) +{ + void *start = (void *)(&__init_begin); + void *end = (void *)(&__init_end); + struct hlist_head *head; + struct kprobe *p; + int i; + + mutex_lock(&kprobe_mutex); + + /* Kill all kprobes on initmem */ + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + head = &kprobe_table[i]; + hlist_for_each_entry(p, head, hlist) { + if (start <= (void *)p->addr && (void *)p->addr < end) + kill_kprobe(p); + } + } + + mutex_unlock(&kprobe_mutex); +} + static int __init init_kprobes(void) { int i, err = 0; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 54b74fabf40c..2facbbd146ec 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3969,13 +3969,18 @@ static int separate_irq_context(struct task_struct *curr, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit) { - unsigned int new_mask = 1 << new_bit, ret = 1; + unsigned int old_mask, new_mask, ret = 1; if (new_bit >= LOCK_USAGE_STATES) { DEBUG_LOCKS_WARN_ON(1); return 0; } + if (new_bit == LOCK_USED && this->read) + new_bit = LOCK_USED_READ; + + new_mask = 1 << new_bit; + /* * If already set then do not dirty the cacheline, * nor do any checks: @@ -3988,13 +3993,22 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, /* * Make sure we didn't race: */ - if (unlikely(hlock_class(this)->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } + if (unlikely(hlock_class(this)->usage_mask & new_mask)) + goto unlock; + old_mask = hlock_class(this)->usage_mask; hlock_class(this)->usage_mask |= new_mask; + /* + * Save one usage_traces[] entry and map both LOCK_USED and + * LOCK_USED_READ onto the same entry. + */ + if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) { + if (old_mask & (LOCKF_USED | LOCKF_USED_READ)) + goto unlock; + new_bit = LOCK_USED; + } + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) return 0; @@ -4008,6 +4022,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; } +unlock: graph_unlock(); /* @@ -4942,12 +4957,20 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock { #ifdef CONFIG_PROVE_LOCKING struct lock_class *class = look_up_lock_class(lock, subclass); + unsigned long mask = LOCKF_USED; /* if it doesn't have a class (yet), it certainly hasn't been used yet */ if (!class) return; - if (!(class->usage_mask & LOCK_USED)) + /* + * READ locks only conflict with USED, such that if we only ever use + * READ locks, there is no deadlock possible -- RCU. + */ + if (!hlock->read) + mask |= LOCKF_USED_READ; + + if (!(class->usage_mask & mask)) return; hlock->class_idx = class - lock_classes; diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index baca699b94e9..b0be1560ed17 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -19,6 +19,7 @@ enum lock_usage_bit { #include "lockdep_states.h" #undef LOCKDEP_STATE LOCK_USED, + LOCK_USED_READ, LOCK_USAGE_STATES }; @@ -40,6 +41,7 @@ enum { #include "lockdep_states.h" #undef LOCKDEP_STATE __LOCKF(USED) + __LOCKF(USED_READ) }; #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE | diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 8bbafe3e5203..70a32a576f3f 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem); static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { - __this_cpu_inc(*sem->read_count); + this_cpu_inc(*sem->read_count); /* * Due to having preemption disabled the decrement happens on @@ -71,7 +71,7 @@ static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(!atomic_read_acquire(&sem->block))) return true; - __this_cpu_dec(*sem->read_count); + this_cpu_dec(*sem->read_count); /* Prod writer to re-evaluate readers_active_check() */ rcuwait_wake_up(&sem->writer); diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index cf66a3ccd757..e01cba5e4b52 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -167,7 +167,7 @@ static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old) # define STATE_RCU_HEAD_READY 0 # define STATE_RCU_HEAD_QUEUED 1 -extern struct debug_obj_descr rcuhead_debug_descr; +extern const struct debug_obj_descr rcuhead_debug_descr; static inline int debug_rcu_head_queue(struct rcu_head *head) { diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 835e2df8590a..05d3e1375e4c 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -590,7 +590,7 @@ void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) } #else /* #ifdef CONFIG_TASKS_RCU */ -static void show_rcu_tasks_classic_gp_kthread(void) { } +static inline void show_rcu_tasks_classic_gp_kthread(void) { } void exit_tasks_rcu_start(void) { } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } #endif /* #else #ifdef CONFIG_TASKS_RCU */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8ce77d9ac716..f78ee759af9c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -673,6 +673,7 @@ void rcu_idle_enter(void) lockdep_assert_irqs_disabled(); rcu_eqs_enter(false); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL /** @@ -886,6 +887,7 @@ void rcu_idle_exit(void) rcu_eqs_exit(false); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); #ifdef CONFIG_NO_HZ_FULL /** diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 2de49b5d8dd2..3e0f4bcb558f 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -469,7 +469,7 @@ void destroy_rcu_head_on_stack(struct rcu_head *head) } EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); -struct debug_obj_descr rcuhead_debug_descr = { +const struct debug_obj_descr rcuhead_debug_descr = { .name = "rcu_head", .is_static_object = rcuhead_is_static_object, }; diff --git a/kernel/softirq.c b/kernel/softirq.c index bf88d7f62433..09229ad82209 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -481,6 +481,7 @@ void raise_softirq(unsigned int nr) void __raise_softirq_irqoff(unsigned int nr) { + lockdep_assert_irqs_disabled(); trace_softirq_raise(nr); or_softirq_pending(1UL << nr); } diff --git a/kernel/stackleak.c b/kernel/stackleak.c index a8fc9ae1d03d..ce161a8e8d97 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -20,7 +20,7 @@ static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 946f44a9e86a..9f8117c7cfdd 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -78,8 +78,7 @@ struct stacktrace_cookie { unsigned int len; }; -static bool stack_trace_consume_entry(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry(void *cookie, unsigned long addr) { struct stacktrace_cookie *c = cookie; @@ -94,12 +93,11 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr, return c->len < c->size; } -static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr, - bool reliable) +static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr) { if (in_sched_functions(addr)) return true; - return stack_trace_consume_entry(cookie, addr, reliable); + return stack_trace_consume_entry(cookie, addr); } /** diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e70ee2332e..afad085960b8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2912,6 +2912,14 @@ static struct ctl_table vm_table[] = { .proc_handler = percpu_pagelist_fraction_sysctl_handler, .extra1 = SYSCTL_ZERO, }, + { + .procname = "page_lock_unfairness", + .data = &sysctl_page_lock_unfairness, + .maxlen = sizeof(sysctl_page_lock_unfairness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, #ifdef CONFIG_MMU { .procname = "max_map_count", diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ca223a89530a..f4ace1bf8382 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -908,7 +908,7 @@ static int __init alarmtimer_init(void) /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real; - alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64, + alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64; alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime; alarm_bases[ALARM_BOOTTIME].get_timespec = get_boottime_timespec; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 95b6a708b040..3624b9b5835d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(ktime_add_safe); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -static struct debug_obj_descr hrtimer_debug_descr; +static const struct debug_obj_descr hrtimer_debug_descr; static void *hrtimer_debug_hint(void *addr) { @@ -401,7 +401,7 @@ static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) } } -static struct debug_obj_descr hrtimer_debug_descr = { +static const struct debug_obj_descr hrtimer_debug_descr = { .name = "hrtimer", .debug_hint = hrtimer_debug_hint, .fixup_init = hrtimer_fixup_init, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4c47f388a83f..ba7657685e22 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -54,6 +54,9 @@ static struct { static struct timekeeper shadow_timekeeper; +/* flag for if timekeeping is suspended */ +int __read_mostly timekeeping_suspended; + /** * struct tk_fast - NMI safe timekeeper * @seq: Sequence counter for protecting updates. The lowest bit @@ -73,28 +76,42 @@ static u64 cycles_at_suspend; static u64 dummy_clock_read(struct clocksource *cs) { - return cycles_at_suspend; + if (timekeeping_suspended) + return cycles_at_suspend; + return local_clock(); } static struct clocksource dummy_clock = { .read = dummy_clock_read, }; +/* + * Boot time initialization which allows local_clock() to be utilized + * during early boot when clocksources are not available. local_clock() + * returns nanoseconds already so no conversion is required, hence mult=1 + * and shift=0. When the first proper clocksource is installed then + * the fast time keepers are updated with the correct values. + */ +#define FAST_TK_INIT \ + { \ + .clock = &dummy_clock, \ + .mask = CLOCKSOURCE_MASK(64), \ + .mult = 1, \ + .shift = 0, \ + } + static struct tk_fast tk_fast_mono ____cacheline_aligned = { .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock), - .base[0] = { .clock = &dummy_clock, }, - .base[1] = { .clock = &dummy_clock, }, + .base[0] = FAST_TK_INIT, + .base[1] = FAST_TK_INIT, }; static struct tk_fast tk_fast_raw ____cacheline_aligned = { .seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock), - .base[0] = { .clock = &dummy_clock, }, - .base[1] = { .clock = &dummy_clock, }, + .base[0] = FAST_TK_INIT, + .base[1] = FAST_TK_INIT, }; -/* flag for if timekeeping is suspended */ -int __read_mostly timekeeping_suspended; - static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { @@ -513,29 +530,29 @@ u64 notrace ktime_get_boot_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); - /* * See comment for __ktime_get_fast_ns() vs. timestamp ordering */ -static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf) +static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) { struct tk_read_base *tkr; + u64 basem, baser, delta; unsigned int seq; - u64 now; do { seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); - now = ktime_to_ns(tkr->base_real); + basem = ktime_to_ns(tkr->base); + baser = ktime_to_ns(tkr->base_real); - now += timekeeping_delta_to_ns(tkr, - clocksource_delta( - tk_clock_read(tkr), - tkr->cycle_last, - tkr->mask)); + delta = timekeeping_delta_to_ns(tkr, + clocksource_delta(tk_clock_read(tkr), + tkr->cycle_last, tkr->mask)); } while (read_seqcount_retry(&tkf->seq, seq)); - return now; + if (mono) + *mono = basem + delta; + return baser + delta; } /** @@ -543,11 +560,65 @@ static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf) */ u64 ktime_get_real_fast_ns(void) { - return __ktime_get_real_fast_ns(&tk_fast_mono); + return __ktime_get_real_fast(&tk_fast_mono, NULL); } EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); /** + * ktime_get_fast_timestamps: - NMI safe timestamps + * @snapshot: Pointer to timestamp storage + * + * Stores clock monotonic, boottime and realtime timestamps. + * + * Boot time is a racy access on 32bit systems if the sleep time injection + * happens late during resume and not in timekeeping_resume(). That could + * be avoided by expanding struct tk_read_base with boot offset for 32bit + * and adding more overhead to the update. As this is a hard to observe + * once per resume event which can be filtered with reasonable effort using + * the accurate mono/real timestamps, it's probably not worth the trouble. + * + * Aside of that it might be possible on 32 and 64 bit to observe the + * following when the sleep time injection happens late: + * + * CPU 0 CPU 1 + * timekeeping_resume() + * ktime_get_fast_timestamps() + * mono, real = __ktime_get_real_fast() + * inject_sleep_time() + * update boot offset + * boot = mono + bootoffset; + * + * That means that boot time already has the sleep time adjustment, but + * real time does not. On the next readout both are in sync again. + * + * Preventing this for 64bit is not really feasible without destroying the + * careful cache layout of the timekeeper because the sequence count and + * struct tk_read_base would then need two cache lines instead of one. + * + * Access to the time keeper clock source is disabled accross the innermost + * steps of suspend/resume. The accessors still work, but the timestamps + * are frozen until time keeping is resumed which happens very early. + * + * For regular suspend/resume there is no observable difference vs. sched + * clock, but it might affect some of the nasty low level debug printks. + * + * OTOH, access to sched clock is not guaranteed accross suspend/resume on + * all systems either so it depends on the hardware in use. + * + * If that turns out to be a real problem then this could be mitigated by + * using sched clock in a similar way as during early boot. But it's not as + * trivial as on early boot because it needs some careful protection + * against the clock monotonic timestamp jumping backwards on resume. + */ +void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot) +{ + struct timekeeper *tk = &tk_core.timekeeper; + + snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono); + snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot)); +} + +/** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. * diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a50364df1054..dda05f4b7a1f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -611,7 +611,7 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -static struct debug_obj_descr timer_debug_descr; +static const struct debug_obj_descr timer_debug_descr; static void *timer_debug_hint(void *addr) { @@ -707,7 +707,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) } } -static struct debug_obj_descr timer_debug_descr = { +static const struct debug_obj_descr timer_debug_descr = { .name = "timer_list", .debug_hint = timer_debug_hint, .is_static_object = timer_is_static_object, @@ -794,6 +794,8 @@ static void do_init_timer(struct timer_list *timer, { timer->entry.pprev = NULL; timer->function = func; + if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS)) + flags &= TIMER_INIT_FLAGS; timer->flags = flags | raw_smp_processor_id(); lockdep_init_map(&timer->lockdep_map, name, key, 0); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 275441254bb5..541453927c82 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2782,6 +2782,7 @@ static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops) { lockdep_assert_held(&ftrace_lock); list_del_rcu(&ops->list); + synchronize_rcu(); } /* @@ -2862,6 +2863,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command) __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + ftrace_trampoline_free(ops); return ret; } @@ -6990,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) - return; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; preempt_disable_notrace(); - op->func(ip, parent_ip, op, regs); + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); @@ -7531,8 +7532,7 @@ static bool is_permanent_ops_registered(void) int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f40d850ebabc..d3e5de717df2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3546,13 +3546,15 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, if (iter->ent && iter->ent != iter->temp) { if ((!iter->temp || iter->temp_size < iter->ent_size) && !WARN_ON_ONCE(iter->temp == static_temp_buf)) { - kfree(iter->temp); - iter->temp = kmalloc(iter->ent_size, GFP_KERNEL); - if (!iter->temp) + void *temp; + temp = kmalloc(iter->ent_size, GFP_KERNEL); + if (!temp) return NULL; + kfree(iter->temp); + iter->temp = temp; + iter->temp_size = iter->ent_size; } memcpy(iter->temp, iter->ent, iter->ent_size); - iter->temp_size = iter->ent_size; iter->ent = iter->temp; } entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); @@ -3782,14 +3784,14 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _------=> CPU# \n" + "# / _-----=> irqs-off \n" + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" + "# |||| / delay \n" + "# cmd pid ||||| time | caller \n" + "# \\ / ||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -3810,26 +3812,26 @@ static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; - int prec = tgid ? 10 : 2; + const char *space = " "; + int prec = tgid ? 12 : 2; print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0b933546142e..1b2ef6490229 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3865,7 +3865,6 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { - kfree(hist_data->attrs->var_defs.name[n_vars]); ret = -ENOMEM; goto free; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 4d1893564912..000e9dc224c6 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -497,7 +497,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%8.8s-%-5d %3d", + trace_seq_printf(s, "%8.8s-%-7d %3d", comm, entry->pid, cpu); return trace_print_lat_fmt(s, entry); @@ -588,15 +588,15 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); if (!tgid) - trace_seq_printf(s, "(-----) "); + trace_seq_printf(s, "(-------) "); else - trace_seq_printf(s, "(%5d) ", tgid); + trace_seq_printf(s, "(%7d) ", tgid); } trace_seq_printf(s, "[%03d] ", iter->cpu); @@ -636,7 +636,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); trace_seq_printf( - s, "%16s %5d %3d %d %08x %08lx ", + s, "%16s %7d %3d %d %08x %08lx ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx); } else { @@ -917,7 +917,7 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, - " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", + " %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n", field->prev_pid, field->prev_prio, S, delim, diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f10073e62603..f4938040c228 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -102,14 +102,14 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { + lockdep_hardirqs_off(CALLER_ADDR0); + if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, caller_addr); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); } - - lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); diff --git a/kernel/umh.c b/kernel/umh.c index fcf3ee803630..3f646613a9d3 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -14,6 +14,7 @@ #include <linux/cred.h> #include <linux/file.h> #include <linux/fdtable.h> +#include <linux/fs_struct.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> @@ -72,6 +73,14 @@ static int call_usermodehelper_exec_async(void *data) spin_unlock_irq(¤t->sighand->siglock); /* + * Initial kernel threads share ther FS with init, in order to + * get the init root directory. But we've now created a new + * thread that is going to execve a user process and has its own + * 'struct fs_struct'. Reset umask to the default. + */ + current->fs->umask = 0022; + + /* * Our parent (unbound workqueue) runs with elevated scheduling * priority. Avoid propagating that into the userspace child. */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c41c3c17b86a..ac088ce6059b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -427,7 +427,7 @@ static void show_pwq(struct pool_workqueue *pwq); #ifdef CONFIG_DEBUG_OBJECTS_WORK -static struct debug_obj_descr work_debug_descr; +static const struct debug_obj_descr work_debug_descr; static void *work_debug_hint(void *addr) { @@ -477,7 +477,7 @@ static bool work_fixup_free(void *addr, enum debug_obj_state state) } } -static struct debug_obj_descr work_debug_descr = { +static const struct debug_obj_descr work_debug_descr = { .name = "work_struct", .debug_hint = work_debug_hint, .is_static_object = work_is_static_object, |
