summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig27
-rw-r--r--kernel/trace/blktrace.c46
-rw-r--r--kernel/trace/fgraph.c4
-rw-r--r--kernel/trace/ftrace.c43
-rw-r--r--kernel/trace/ring_buffer.c7
-rw-r--r--kernel/trace/trace.c109
-rw-r--r--kernel/trace/trace.h84
-rw-r--r--kernel/trace/trace_eprobe.c54
-rw-r--r--kernel/trace/trace_events.c26
-rw-r--r--kernel/trace/trace_events_filter.c139
-rw-r--r--kernel/trace/trace_events_hist.c85
-rw-r--r--kernel/trace/trace_events_inject.c11
-rw-r--r--kernel/trace/trace_events_synth.c15
-rw-r--r--kernel/trace/trace_events_trigger.c497
-rw-r--r--kernel/trace/trace_hwlat.c6
-rw-r--r--kernel/trace/trace_kprobe.c45
-rw-r--r--kernel/trace/trace_osnoise.c114
-rw-r--r--kernel/trace/trace_output.c4
-rw-r--r--kernel/trace/trace_probe.c15
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_sched_switch.c1
-rw-r--r--kernel/trace/trace_sched_wakeup.c1
-rw-r--r--kernel/trace/trace_selftest.c6
-rw-r--r--kernel/trace/trace_syscalls.c6
-rw-r--r--kernel/trace/trace_uprobe.c39
25 files changed, 1039 insertions, 346 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 420ff4bc67fd..a5eb5e7fd624 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -70,6 +70,19 @@ config HAVE_C_RECORDMCOUNT
help
C version of recordmcount available?
+config HAVE_BUILDTIME_MCOUNT_SORT
+ bool
+ help
+ An architecture selects this if it sorts the mcount_loc section
+ at build time.
+
+config BUILDTIME_MCOUNT_SORT
+ bool
+ default y
+ depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE
+ help
+ Sort the mcount_loc section at build time.
+
config TRACER_MAX_TRACE
bool
@@ -915,6 +928,20 @@ config EVENT_TRACE_TEST_SYSCALLS
TBD - enable a way to actually call the syscalls as we test their
events
+config FTRACE_SORT_STARTUP_TEST
+ bool "Verify compile time sorting of ftrace functions"
+ depends on DYNAMIC_FTRACE
+ depends on BUILDTIME_MCOUNT_SORT
+ help
+ Sorting of the mcount_loc sections that is used to find the
+ where the ftrace knows where to patch functions for tracing
+ and other callbacks is done at compile time. But if the sort
+ is not done correctly, it will cause non-deterministic failures.
+ When this is set, the sorted sections will be verified that they
+ are in deed sorted and will warn if they are not.
+
+ If unsure, say N
+
config RING_BUFFER_STARTUP_TEST
bool "Ring buffer startup self test"
depends on RING_BUFFER
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1183c88634aa..21dea90eaa93 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -34,7 +34,7 @@ static struct trace_array *blk_tr;
static bool blk_tracer_enabled __read_mostly;
static LIST_HEAD(running_trace_list);
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(running_trace_lock);
+static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock);
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -121,12 +121,12 @@ static void trace_note_tsk(struct task_struct *tsk)
struct blk_trace *bt;
tsk->btrace_seq = blktrace_seq;
- spin_lock_irqsave(&running_trace_lock, flags);
+ raw_spin_lock_irqsave(&running_trace_lock, flags);
list_for_each_entry(bt, &running_trace_list, running_list) {
trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
sizeof(tsk->comm), 0);
}
- spin_unlock_irqrestore(&running_trace_lock, flags);
+ raw_spin_unlock_irqrestore(&running_trace_lock, flags);
}
static void trace_note_time(struct blk_trace *bt)
@@ -310,10 +310,20 @@ record_it:
local_irq_restore(flags);
}
-static void blk_trace_free(struct blk_trace *bt)
+static void blk_trace_free(struct request_queue *q, struct blk_trace *bt)
{
relay_close(bt->rchan);
- debugfs_remove(bt->dir);
+
+ /*
+ * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created
+ * under 'q->debugfs_dir', thus lookup and remove them.
+ */
+ if (!bt->dir) {
+ debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir));
+ debugfs_remove(debugfs_lookup("msg", q->debugfs_dir));
+ } else {
+ debugfs_remove(bt->dir);
+ }
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -335,10 +345,10 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
-static void blk_trace_cleanup(struct blk_trace *bt)
+static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
put_probe_ref();
}
@@ -352,7 +362,7 @@ static int __blk_trace_remove(struct request_queue *q)
return -EINVAL;
if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -572,7 +582,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
ret = 0;
err:
if (ret)
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
@@ -666,9 +676,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
blktrace_seq++;
smp_mb();
bt->trace_state = Blktrace_running;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_add(&bt->running_list, &running_trace_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
trace_note_time(bt);
ret = 0;
@@ -676,9 +686,9 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
} else {
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
ret = 0;
}
@@ -1045,7 +1055,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
}
r.device_from = cpu_to_be32(dev);
- r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
+ r.device_to = cpu_to_be32(disk_devt(rq->q->disk));
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
@@ -1608,15 +1618,15 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt->trace_state == Blktrace_running) {
bt->trace_state = Blktrace_stopped;
- spin_lock_irq(&running_trace_lock);
+ raw_spin_lock_irq(&running_trace_lock);
list_del_init(&bt->running_list);
- spin_unlock_irq(&running_trace_lock);
+ raw_spin_unlock_irq(&running_trace_lock);
relay_flush(bt->rchan);
}
put_probe_ref();
synchronize_rcu();
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return 0;
}
@@ -1647,7 +1657,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
return 0;
free_bt:
- blk_trace_free(bt);
+ blk_trace_free(q, bt);
return ret;
}
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 22061d38fc00..19028e072cdb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -415,7 +415,9 @@ free:
static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
unsigned long long timestamp;
int index;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index be5f6b32a012..8b568f57cc24 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6394,6 +6394,27 @@ static int ftrace_cmp_ips(const void *a, const void *b)
return 0;
}
+#ifdef CONFIG_FTRACE_SORT_STARTUP_TEST
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+ int i;
+
+ for (i = 1; i < count; i++) {
+ if (WARN(start[i - 1] > start[i],
+ "[%d] %pS at %lx is not sorted with %pS at %lx\n", i,
+ (void *)start[i - 1], start[i - 1],
+ (void *)start[i], start[i]))
+ break;
+ }
+ if (i == count)
+ pr_info("ftrace section at %px sorted properly\n", start);
+}
+#else
+static void test_is_sorted(unsigned long *start, unsigned long count)
+{
+}
+#endif
+
static int ftrace_process_locs(struct module *mod,
unsigned long *start,
unsigned long *end)
@@ -6412,8 +6433,17 @@ static int ftrace_process_locs(struct module *mod,
if (!count)
return 0;
- sort(start, count, sizeof(*start),
- ftrace_cmp_ips, NULL);
+ /*
+ * Sorting mcount in vmlinux at build time depend on
+ * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in
+ * modules can not be sorted at build time.
+ */
+ if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) {
+ sort(start, count, sizeof(*start),
+ ftrace_cmp_ips, NULL);
+ } else {
+ test_is_sorted(start, count);
+ }
start_pg = ftrace_allocate_pages(count);
if (!start_pg)
@@ -7161,7 +7191,6 @@ static int __init ftrace_nodyn_init(void)
core_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
-static inline void ftrace_startup_enable(int command) { }
static inline void ftrace_startup_all(int command) { }
# define ftrace_startup_sysctl() do { } while (0)
@@ -7317,7 +7346,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
@@ -7761,7 +7792,7 @@ int ftrace_is_dead(void)
/**
* register_ftrace_function - register a function for profiling
- * @ops - ops structure that holds the function for profiling.
+ * @ops: ops structure that holds the function for profiling.
*
* Register a function to be called by all functions in the
* kernel.
@@ -7788,7 +7819,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_function);
/**
* unregister_ftrace_function - unregister a function for profiling.
- * @ops - ops structure that holds the function to unregister
+ * @ops: ops structure that holds the function to unregister
*
* Unregister a function that was added to be called by ftrace profiling.
*/
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2699e9e562b1..05dfc7a12d3d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5898,16 +5898,13 @@ static __init int test_ringbuffer(void)
rb_data[cpu].buffer = buffer;
rb_data[cpu].cpu = cpu;
rb_data[cpu].cnt = cpu;
- rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
- "rbtester/%d", cpu);
+ rb_threads[cpu] = kthread_run_on_cpu(rb_test, &rb_data[cpu],
+ cpu, "rbtester/%u");
if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
pr_cont("FAILED\n");
ret = PTR_ERR(rb_threads[cpu]);
goto out_free;
}
-
- kthread_bind(rb_threads[cpu], cpu);
- wake_up_process(rb_threads[cpu]);
}
/* Now create the rb hammer! */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78ea542ce3bc..eb44418574f9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -235,7 +235,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
static int __init set_trace_boot_options(char *str)
{
strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
- return 0;
+ return 1;
}
__setup("trace_options=", set_trace_boot_options);
@@ -246,12 +246,16 @@ static int __init set_trace_boot_clock(char *str)
{
strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
trace_boot_clock = trace_boot_clock_buf;
- return 0;
+ return 1;
}
__setup("trace_clock=", set_trace_boot_clock);
static int __init set_tracepoint_printk(char *str)
{
+ /* Ignore the "tp_printk_stop_on_boot" param */
+ if (*str == '_')
+ return 0;
+
if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
tracepoint_printk = 1;
return 1;
@@ -980,6 +984,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
ring_buffer_write(buffer, event->array[0], &event->array[1]);
/* Release the temp buffer */
this_cpu_dec(trace_buffered_event_cnt);
+ /* ring_buffer_unlock_commit() enables preemption */
+ preempt_enable_notrace();
} else
ring_buffer_unlock_commit(buffer, event);
}
@@ -1468,10 +1474,12 @@ static int __init set_buf_size(char *str)
if (!str)
return 0;
buf_size = memparse(str, &str);
- /* nr_entries can not be zero */
- if (buf_size == 0)
- return 0;
- trace_buf_size = buf_size;
+ /*
+ * nr_entries can not be zero and the startup
+ * tests require some buffer space. Therefore
+ * ensure we have at least 4096 bytes of buffer.
+ */
+ trace_buf_size = max(4096UL, buf_size);
return 1;
}
__setup("trace_buf_size=", set_buf_size);
@@ -2601,6 +2609,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
trace_flags |= TRACE_FLAG_HARDIRQ;
if (in_serving_softirq())
trace_flags |= TRACE_FLAG_SOFTIRQ;
+ if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
+ trace_flags |= TRACE_FLAG_BH_OFF;
if (tif_need_resched())
trace_flags |= TRACE_FLAG_NEED_RESCHED;
@@ -2745,8 +2755,8 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
*current_rb = tr->array_buffer.buffer;
if (!tr->no_filter_buffering_ref &&
- (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
- (entry = this_cpu_read(trace_buffered_event))) {
+ (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
+ preempt_disable_notrace();
/*
* Filtering is on, so try to use the per cpu buffer first.
* This buffer will simulate a ring_buffer_event,
@@ -2764,33 +2774,38 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
* is still quicker than no copy on match, but having
* to discard out of the ring buffer on a failed match.
*/
- int max_len = PAGE_SIZE - struct_size(entry, array, 1);
+ if ((entry = __this_cpu_read(trace_buffered_event))) {
+ int max_len = PAGE_SIZE - struct_size(entry, array, 1);
- val = this_cpu_inc_return(trace_buffered_event_cnt);
+ val = this_cpu_inc_return(trace_buffered_event_cnt);
- /*
- * Preemption is disabled, but interrupts and NMIs
- * can still come in now. If that happens after
- * the above increment, then it will have to go
- * back to the old method of allocating the event
- * on the ring buffer, and if the filter fails, it
- * will have to call ring_buffer_discard_commit()
- * to remove it.
- *
- * Need to also check the unlikely case that the
- * length is bigger than the temp buffer size.
- * If that happens, then the reserve is pretty much
- * guaranteed to fail, as the ring buffer currently
- * only allows events less than a page. But that may
- * change in the future, so let the ring buffer reserve
- * handle the failure in that case.
- */
- if (val == 1 && likely(len <= max_len)) {
- trace_event_setup(entry, type, trace_ctx);
- entry->array[0] = len;
- return entry;
+ /*
+ * Preemption is disabled, but interrupts and NMIs
+ * can still come in now. If that happens after
+ * the above increment, then it will have to go
+ * back to the old method of allocating the event
+ * on the ring buffer, and if the filter fails, it
+ * will have to call ring_buffer_discard_commit()
+ * to remove it.
+ *
+ * Need to also check the unlikely case that the
+ * length is bigger than the temp buffer size.
+ * If that happens, then the reserve is pretty much
+ * guaranteed to fail, as the ring buffer currently
+ * only allows events less than a page. But that may
+ * change in the future, so let the ring buffer reserve
+ * handle the failure in that case.
+ */
+ if (val == 1 && likely(len <= max_len)) {
+ trace_event_setup(entry, type, trace_ctx);
+ entry->array[0] = len;
+ /* Return with preemption disabled */
+ return entry;
+ }
+ this_cpu_dec(trace_buffered_event_cnt);
}
- this_cpu_dec(trace_buffered_event_cnt);
+ /* __trace_buffer_lock_reserve() disables preemption */
+ preempt_enable_notrace();
}
entry = __trace_buffer_lock_reserve(*current_rb, type, len,
@@ -4183,7 +4198,7 @@ unsigned long trace_total_entries(struct trace_array *tr)
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
+ "# / _-----=> irqs-off/BH-disabled\n"
"# | / _----=> need-resched \n"
"# || / _---=> hardirq/softirq \n"
"# ||| / _--=> preempt-depth \n"
@@ -4224,7 +4239,7 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
@@ -4834,6 +4849,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
return 0;
}
+static int tracing_mark_open(struct inode *inode, struct file *filp)
+{
+ stream_open(inode, filp);
+ return tracing_open_generic_tr(inode, filp);
+}
+
static int tracing_release(struct inode *inode, struct file *file)
{
struct trace_array *tr = inode->i_private;
@@ -5635,7 +5656,7 @@ static const char readme_msg[] =
"\t - a numeric literal: e.g. ms_per_sec=1000,\n"
"\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
"\n"
- "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
+ "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
"\t multiplication(*) and division(/) operators. An operand can be either a\n"
"\t variable reference, field or numeric literal.\n"
"\n"
@@ -6718,10 +6739,9 @@ waitagain:
cnt = PAGE_SIZE - 1;
/* reset all but tr, trace, and overruns */
- memset_startat(iter, 0, seq);
+ trace_iterator_reset(iter);
cpumask_clear(iter->started);
trace_seq_init(&iter->seq);
- iter->pos = -1;
trace_event_read_lock();
trace_access_lock(iter->cpu_file);
@@ -7110,9 +7130,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (tt)
event_triggers_post_call(tr->trace_marker_file, tt);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7171,9 +7188,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
__buffer_unlock_commit(buffer, event);
- if (written > 0)
- *fpos += written;
-
return written;
}
@@ -7573,16 +7587,14 @@ static const struct file_operations tracing_free_buffer_fops = {
};
static const struct file_operations tracing_mark_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
static const struct file_operations tracing_mark_raw_fops = {
- .open = tracing_open_generic_tr,
+ .open = tracing_mark_open,
.write = tracing_mark_raw_write,
- .llseek = generic_file_llseek,
.release = tracing_release_generic_tr,
};
@@ -7734,7 +7746,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
err = kzalloc(sizeof(*err), GFP_KERNEL);
if (!err)
err = ERR_PTR(-ENOMEM);
- tr->n_err_log_entries++;
+ else
+ tr->n_err_log_entries++;
return err;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 38715aa6cfdf..c5b09c31e077 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -83,6 +83,9 @@ enum trace_type {
#undef __dynamic_array
#define __dynamic_array(type, item) type item[];
+#undef __rel_dynamic_array
+#define __rel_dynamic_array(type, item) type item[];
+
#undef F_STRUCT
#define F_STRUCT(args...) args
@@ -133,7 +136,6 @@ struct kprobe_trace_entry_head {
struct eprobe_trace_entry_head {
struct trace_entry ent;
- unsigned int type;
};
struct kretprobe_trace_entry_head {
@@ -1334,10 +1336,12 @@ __trace_event_discard_commit(struct trace_buffer *buffer,
struct ring_buffer_event *event)
{
if (this_cpu_read(trace_buffered_event) == event) {
- /* Simply release the temp buffer */
+ /* Simply release the temp buffer and enable preemption */
this_cpu_dec(trace_buffered_event_cnt);
+ preempt_enable_notrace();
return;
}
+ /* ring_buffer_discard_commit() enables preemption */
ring_buffer_discard_commit(buffer, event);
}
@@ -1465,6 +1469,7 @@ struct filter_pred {
static inline bool is_string_field(struct ftrace_event_field *field)
{
return field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING ||
field->filter_type == FILTER_STATIC_STRING ||
field->filter_type == FILTER_PTR_STRING ||
field->filter_type == FILTER_COMM;
@@ -1572,15 +1577,13 @@ extern int event_enable_trigger_print(struct seq_file *m,
struct event_trigger_data *data);
extern void event_enable_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data);
-extern int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+extern int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
extern int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
extern void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file);
extern void trigger_data_free(struct event_trigger_data *data);
@@ -1606,6 +1609,30 @@ get_named_trigger_data(struct event_trigger_data *data);
extern int register_event_command(struct event_command *cmd);
extern int unregister_event_command(struct event_command *cmd);
extern int register_trigger_hist_enable_disable_cmds(void);
+extern bool event_trigger_check_remove(const char *glob);
+extern bool event_trigger_empty_param(const char *param);
+extern int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required);
+extern struct event_trigger_data *
+event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data);
+extern int event_trigger_parse_num(char *trigger,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data);
+extern void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data);
+extern int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *trigger,
+ struct event_trigger_data *trigger_data,
+ int *n_registered);
/**
* struct event_trigger_ops - callbacks for trace event triggers
@@ -1613,10 +1640,20 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* The methods in this structure provide per-event trigger hooks for
* various trigger operations.
*
+ * The @init and @free methods are used during trigger setup and
+ * teardown, typically called from an event_command's @parse()
+ * function implementation.
+ *
+ * The @print method is used to print the trigger spec.
+ *
+ * The @trigger method is the function that actually implements the
+ * trigger and is called in the context of the triggering event
+ * whenever that event occurs.
+ *
* All the methods below, except for @init() and @free(), must be
* implemented.
*
- * @func: The trigger 'probe' function called when the triggering
+ * @trigger: The trigger 'probe' function called when the triggering
* event occurs. The data passed into this callback is the data
* that was supplied to the event_command @reg() function that
* registered the trigger (see struct event_command) along with
@@ -1645,9 +1682,10 @@ extern int register_trigger_hist_enable_disable_cmds(void);
* (see trace_event_triggers.c).
*/
struct event_trigger_ops {
- void (*func)(struct event_trigger_data *data,
- struct trace_buffer *buffer, void *rec,
- struct ring_buffer_event *rbe);
+ void (*trigger)(struct event_trigger_data *data,
+ struct trace_buffer *buffer,
+ void *rec,
+ struct ring_buffer_event *rbe);
int (*init)(struct event_trigger_ops *ops,
struct event_trigger_data *data);
void (*free)(struct event_trigger_ops *ops,
@@ -1696,7 +1734,7 @@ struct event_trigger_ops {
* All the methods below, except for @set_filter() and @unreg_all(),
* must be implemented.
*
- * @func: The callback function responsible for parsing and
+ * @parse: The callback function responsible for parsing and
* registering the trigger written to the 'trigger' file by the
* user. It allocates the trigger instance and registers it with
* the appropriate trace event. It makes use of the other
@@ -1731,21 +1769,24 @@ struct event_trigger_ops {
*
* @get_trigger_ops: The callback function invoked to retrieve the
* event_trigger_ops implementation associated with the command.
+ * This callback function allows a single event_command to
+ * support multiple trigger implementations via different sets of
+ * event_trigger_ops, depending on the value of the @param
+ * string.
*/
struct event_command {
struct list_head list;
char *name;
enum event_trigger_type trigger_type;
int flags;
- int (*func)(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *params);
+ int (*parse)(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd,
+ char *param_and_filter);
int (*reg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg)(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file);
void (*unreg_all)(struct trace_event_file *file);
@@ -1926,14 +1967,7 @@ extern struct trace_iterator *tracepoint_print_iter;
*/
static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
{
- const size_t offset = offsetof(struct trace_iterator, seq);
-
- /*
- * Keep gcc from complaining about overwriting more than just one
- * member in the structure.
- */
- memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset);
-
+ memset_startat(iter, 0, seq);
iter->pos = -1;
}
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 928867f527e7..541aa13581b9 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -242,7 +242,6 @@ static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
static int eprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret;
struct eprobe_trace_entry_head field;
struct trace_probe *tp;
@@ -250,8 +249,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
if (WARN_ON_ONCE(!tp))
return -ENOENT;
- DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
-
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
@@ -270,7 +267,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
struct trace_event_call *pevent;
struct trace_event *probed_event;
struct trace_seq *s = &iter->seq;
+ struct trace_eprobe *ep;
struct trace_probe *tp;
+ unsigned int type;
field = (struct eprobe_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
@@ -278,15 +277,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
if (WARN_ON_ONCE(!tp))
goto out;
+ ep = container_of(tp, struct trace_eprobe, tp);
+ type = ep->event->event.type;
+
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
- probed_event = ftrace_find_event(field->type);
+ probed_event = ftrace_find_event(type);
if (probed_event) {
pevent = container_of(probed_event, struct trace_event_call, event);
trace_seq_printf(s, "%s.%s", pevent->class->system,
trace_event_name(pevent));
} else {
- trace_seq_printf(s, "%u", field->type);
+ trace_seq_printf(s, "%u", type);
}
trace_seq_putc(s, ')');
@@ -489,25 +491,15 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
if (trace_trigger_soft_disabled(edata->file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = edata->file;
-
dsize = get_eprobe_size(&edata->ep->tp, rec);
- fbuffer.regs = NULL;
-
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
- call->event.type,
- sizeof(*entry) + edata->ep->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, edata->file,
+ sizeof(*entry) + edata->ep->tp.size + dsize);
+
+ if (!entry)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
- if (edata->ep->event)
- entry->type = edata->ep->event->event.type;
- else
- entry->type = 0;
store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
@@ -549,29 +541,29 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
}
static struct event_trigger_ops eprobe_trigger_ops = {
- .func = eprobe_trigger_func,
+ .trigger = eprobe_trigger_func,
.print = eprobe_trigger_print,
.init = eprobe_trigger_init,
.free = eprobe_trigger_free,
};
-static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
return -1;
}
-static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static int eprobe_trigger_reg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
return -1;
}
-static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
- struct event_trigger_data *data,
- struct trace_event_file *file)
+static void eprobe_trigger_unreg_func(char *glob,
+ struct event_trigger_data *data,
+ struct trace_event_file *file)
{
}
@@ -586,7 +578,7 @@ static struct event_command event_trigger_cmd = {
.name = "eprobe",
.trigger_type = ETT_EVENT_EPROBE,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = eprobe_trigger_cmd_func,
+ .parse = eprobe_trigger_cmd_parse,
.reg = eprobe_trigger_reg_func,
.unreg = eprobe_trigger_unreg_func,
.unreg_all = NULL,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 92be9cb1d7d4..da69519c4905 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -384,6 +384,12 @@ static void test_event_printk(struct trace_event_call *call)
if (!(dereference_flags & (1ULL << arg)))
goto next_arg;
+ /* Check for __get_sockaddr */;
+ if (str_has_prefix(fmt + i, "__get_sockaddr(")) {
+ dereference_flags &= ~(1ULL << arg);
+ goto next_arg;
+ }
+
/* Find the REC-> in the argument */
c = strchr(fmt + i, ',');
r = strstr(fmt + i, "REC->");
@@ -759,7 +765,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -783,7 +791,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- struct task_struct *prev, struct task_struct *next)
+ unsigned int prev_state,
+ struct task_struct *prev,
+ struct task_struct *next)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -3461,10 +3471,8 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events,
tr, &ftrace_tr_enable_fops);
- if (!entry) {
- pr_warn("Could not create tracefs 'enable' entry\n");
+ if (!entry)
return -ENOMEM;
- }
/* There are not as crucial, just warn if they are not created */
@@ -3480,17 +3488,13 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
/* ring buffer internal formats */
- entry = trace_create_file("header_page", TRACE_MODE_READ, d_events,
+ trace_create_file("header_page", TRACE_MODE_READ, d_events,
ring_buffer_print_page_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_page' entry\n");
- entry = trace_create_file("header_event", TRACE_MODE_READ, d_events,
+ trace_create_file("header_event", TRACE_MODE_READ, d_events,
ring_buffer_print_entry_header,
&ftrace_show_header_fops);
- if (!entry)
- pr_warn("Could not create tracefs 'header_event' entry\n");
tr->event_dir = d_events;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index c9124038b140..b458a9afa2c0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -5,6 +5,7 @@
* Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
*/
+#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
+/* user space strings temp buffer */
+#define USTRING_BUF_SIZE 1024
+
+struct ustring_buffer {
+ char buffer[USTRING_BUF_SIZE];
+};
+
+static __percpu struct ustring_buffer *ustring_per_cpu;
+
+static __always_inline char *test_string(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* For safety, do not trust the string pointer */
+ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
+ return NULL;
+ return kstr;
+}
+
+static __always_inline char *test_ustring(char *str)
+{
+ struct ustring_buffer *ubuf;
+ char __user *ustr;
+ char *kstr;
+
+ if (!ustring_per_cpu)
+ return NULL;
+
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+ /* user space address? */
+ ustr = (char __user *)str;
+ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
+ return NULL;
+
+ return kstr;
+}
+
/* Filter predicate for fixed sized arrays of characters */
static int filter_pred_string(struct filter_pred *pred, void *event)
{
@@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
return match;
}
-/* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event)
+static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
{
- char **addr = (char **)(event + pred->offset);
int cmp, match;
- int len = strlen(*addr) + 1; /* including tailing '\0' */
+ int len;
- cmp = pred->regex.match(*addr, &pred->regex, len);
+ len = strlen(str) + 1; /* including tailing '\0' */
+ cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not;
return match;
}
+/* Filter predicate for char * pointers */
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_string(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
+
+/* Filter predicate for char * pointers in user space*/
+static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
+{
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+
+ str = test_ustring(*addr);
+ if (!str)
+ return 0;
+
+ return filter_pchar(pred, str);
+}
/*
* Filter predicate for dynamic sized arrays of characters.
@@ -706,6 +777,29 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
return match;
}
+/*
+ * Filter predicate for relative dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry as same as dynamic string.
+ * The difference is that the relative one records the location offset
+ * from the field itself, not the event entry.
+ */
+static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
+{
+ u32 *item = (u32 *)(event + pred->offset);
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ int str_len = str_item >> 16;
+ char *addr = (char *)(&item[1]) + str_loc;
+ int cmp, match;
+
+ cmp = pred->regex.match(addr, &pred->regex, str_len);
+
+ match = cmp ^ pred->not;
+
+ return match;
+}
+
/* Filter predicate for CPUs. */
static int filter_pred_cpu(struct filter_pred *pred, void *event)
{
@@ -756,7 +850,7 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
*
* Note:
* - @str might not be NULL-terminated if it's of type DYN_STRING
- * or STATIC_STRING, unless @len is zero.
+ * RDYN_STRING, or STATIC_STRING, unless @len is zero.
*/
static int regex_match_full(char *str, struct regex *r, int len)
@@ -1083,6 +1177,9 @@ int filter_assign_type(const char *type)
if (strstr(type, "__data_loc") && strstr(type, "char"))
return FILTER_DYN_STRING;
+ if (strstr(type, "__rel_loc") && strstr(type, "char"))
+ return FILTER_RDYN_STRING;
+
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
@@ -1158,6 +1255,7 @@ static int parse_pred(const char *str, void *data,
struct filter_pred *pred = NULL;
char num_buf[24]; /* Big enough to hold an address */
char *field_name;
+ bool ustring = false;
char q;
u64 val;
int len;
@@ -1192,6 +1290,12 @@ static int parse_pred(const char *str, void *data,
return -EINVAL;
}
+ /* See if the field is a user space string */
+ if ((len = str_has_prefix(str + i, ".ustring"))) {
+ ustring = true;
+ i += len;
+ }
+
while (isspace(str[i]))
i++;
@@ -1318,10 +1422,24 @@ static int parse_pred(const char *str, void *data,
pred->fn = filter_pred_string;
pred->regex.field_len = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn = filter_pred_strloc;
- else
- pred->fn = filter_pred_pchar;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ pred->fn = filter_pred_strrelloc;
+ else {
+
+ if (!ustring_per_cpu) {
+ /* Once allocated, keep it around for good */
+ ustring_per_cpu = alloc_percpu(struct ustring_buffer);
+ if (!ustring_per_cpu)
+ goto err_mem;
+ }
+
+ if (ustring)
+ pred->fn = filter_pred_pchar_user;
+ else
+ pred->fn = filter_pred_pchar;
+ }
/* go past the last quote */
i++;
@@ -1387,6 +1505,9 @@ static int parse_pred(const char *str, void *data,
err_free:
kfree(pred);
return -EINVAL;
+err_mem:
+ kfree(pred);
+ return -ENOMEM;
}
enum {
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 319f9c8ca7e7..dc7f733b4cb3 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -217,6 +217,20 @@ static u64 hist_field_dynstring(struct hist_field *hist_field,
return (u64)(unsigned long)addr;
}
+static u64 hist_field_reldynstring(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ u32 *item = event + hist_field->field->offset;
+ u32 str_item = *item;
+ int str_loc = str_item & 0xffff;
+ char *addr = (char *)&item[1] + str_loc;
+
+ return (u64)(unsigned long)addr;
+}
+
static u64 hist_field_pstring(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -1956,8 +1970,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
if (field->filter_type == FILTER_STATIC_STRING) {
hist_field->fn = hist_field_string;
hist_field->size = field->size;
- } else if (field->filter_type == FILTER_DYN_STRING)
+ } else if (field->filter_type == FILTER_DYN_STRING) {
hist_field->fn = hist_field_dynstring;
+ } else if (field->filter_type == FILTER_RDYN_STRING)
+ hist_field->fn = hist_field_reldynstring;
else
hist_field->fn = hist_field_pstring;
} else {
@@ -2273,9 +2289,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
/*
* For backward compatibility, if field_name
* was "cpu", then we treat this the same as
- * common_cpu.
+ * common_cpu. This also works for "CPU".
*/
- if (strcmp(field_name, "cpu") == 0) {
+ if (field && field->filter_type == FILTER_CPU) {
*flags |= HIST_FIELD_FL_CPU;
} else {
hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
@@ -2487,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
expr->fn = hist_field_unary_minus;
expr->operands[0] = operand1;
+ expr->size = operand1->size;
+ expr->is_signed = operand1->is_signed;
expr->operator = FIELD_OP_UNARY_MINUS;
expr->name = expr_str(expr, 0);
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
@@ -2703,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
/* The operand sizes should be the same, so just pick one */
expr->size = operand1->size;
+ expr->is_signed = operand1->is_signed;
expr->operator = field_op;
expr->type = kstrdup_const(operand1->type, GFP_KERNEL);
@@ -2745,9 +2764,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data,
}
static struct event_command trigger_hist_cmd;
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param);
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
static bool compatible_keys(struct hist_trigger_data *target_hist_data,
struct hist_trigger_data *hist_data,
@@ -2950,8 +2969,8 @@ create_field_var_hist(struct hist_trigger_data *target_hist_data,
var_hist->hist_data = hist_data;
/* Create the new histogram with our variable */
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "", "hist", cmd);
if (ret) {
kfree(cmd);
kfree(var_hist->cmd);
@@ -3919,6 +3938,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
var_ref_idx = find_var_ref_idx(hist_data, var_ref);
if (WARN_ON(var_ref_idx < 0)) {
+ kfree(p);
ret = var_ref_idx;
goto err;
}
@@ -4812,7 +4832,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
cmp_fn = tracing_map_cmp_none;
- else if (!field)
+ else if (!field || hist_field->flags & HIST_FIELD_FL_CPU)
cmp_fn = tracing_map_cmp_num(hist_field->size,
hist_field->is_signed);
else if (is_string_field(field))
@@ -4961,7 +4981,8 @@ static inline void add_to_key(char *compound_key, void *key,
struct ftrace_event_field *field;
field = key_field->field;
- if (field->filter_type == FILTER_DYN_STRING)
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING)
size = *(u32 *)(rec + field->offset) >> 16;
else if (field->filter_type == FILTER_STATIC_STRING)
size = field->size;
@@ -5712,8 +5733,8 @@ static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
for (i = 0; i < hist_data->n_field_var_hists; i++) {
file = hist_data->field_var_hists[i]->hist_data->event_file;
cmd = hist_data->field_var_hists[i]->cmd;
- ret = event_hist_trigger_func(&trigger_hist_cmd, file,
- "!hist", "hist", cmd);
+ ret = event_hist_trigger_parse(&trigger_hist_cmd, file,
+ "!hist", "hist", cmd);
WARN_ON_ONCE(ret < 0);
}
}
@@ -5742,7 +5763,7 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_init,
.free = event_hist_trigger_free,
@@ -5776,7 +5797,7 @@ static void event_hist_trigger_named_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_hist_trigger_named_ops = {
- .func = event_hist_trigger,
+ .trigger = event_hist_trigger,
.print = event_hist_trigger_print,
.init = event_hist_trigger_named_init,
.free = event_hist_trigger_named_free,
@@ -5893,7 +5914,7 @@ static bool hist_trigger_match(struct event_trigger_data *data,
return true;
}
-static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
+static int hist_register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6045,7 +6066,7 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data,
return false;
}
-static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void hist_unregister_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -6129,9 +6150,9 @@ static void hist_unreg_all(struct trace_event_file *file)
}
}
-static int event_hist_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+static int event_hist_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
unsigned int hist_trigger_bits = TRACING_MAP_BITS_DEFAULT;
struct event_trigger_data *trigger_data;
@@ -6146,7 +6167,9 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
lockdep_assert_held(&event_mutex);
- if (glob && strlen(glob)) {
+ WARN_ON(!glob);
+
+ if (strlen(glob)) {
hist_err_clear();
last_cmd_set(file, param);
}
@@ -6179,7 +6202,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
continue;
}
break;
- } while (p);
+ } while (1);
if (!p)
param = NULL;
@@ -6245,7 +6268,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
@@ -6254,7 +6277,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
goto out_free;
}
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of triggers registered,
* but if it didn't register any it returns zero. Consider no
@@ -6297,7 +6320,7 @@ enable:
return ret;
out_unreg:
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
out_free:
if (cmd_ops->set_filter)
cmd_ops->set_filter(NULL, trigger_data, NULL);
@@ -6314,7 +6337,7 @@ static struct event_command trigger_hist_cmd = {
.name = "hist",
.trigger_type = ETT_EVENT_HIST,
.flags = EVENT_CMD_FL_NEEDS_REC,
- .func = event_hist_trigger_func,
+ .parse = event_hist_trigger_parse,
.reg = hist_register_trigger,
.unreg = hist_unregister_trigger,
.unreg_all = hist_unreg_all,
@@ -6366,28 +6389,28 @@ hist_enable_count_trigger(struct event_trigger_data *data,
}
static struct event_trigger_ops hist_enable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_enable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_trigger_ops = {
- .func = hist_enable_trigger,
+ .trigger = hist_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops hist_disable_count_trigger_ops = {
- .func = hist_enable_count_trigger,
+ .trigger = hist_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
@@ -6429,7 +6452,7 @@ static void hist_enable_unreg_all(struct trace_event_file *file)
static struct event_command trigger_hist_enable_cmd = {
.name = ENABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
@@ -6440,7 +6463,7 @@ static struct event_command trigger_hist_enable_cmd = {
static struct event_command trigger_hist_disable_cmd = {
.name = DISABLE_HIST_STR,
.trigger_type = ETT_HIST_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.unreg_all = hist_enable_unreg_all,
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index c188045c5f97..d6b4935a78c0 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -168,10 +168,14 @@ static void *trace_alloc_entry(struct trace_event_call *call, int *size)
continue;
if (field->filter_type == FILTER_STATIC_STRING)
continue;
- if (field->filter_type == FILTER_DYN_STRING) {
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
u32 *str_item;
int str_loc = entry_size & 0xffff;
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
+
str_item = (u32 *)(entry + field->offset);
*str_item = str_loc; /* string length is 0. */
} else {
@@ -214,7 +218,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
if (field->filter_type == FILTER_STATIC_STRING) {
strlcpy(entry + field->offset, addr, field->size);
- } else if (field->filter_type == FILTER_DYN_STRING) {
+ } else if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING) {
int str_len = strlen(addr) + 1;
int str_loc = entry_size & 0xffff;
u32 *str_item;
@@ -229,6 +234,8 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
strlcpy(entry + (entry_size - str_len), addr, str_len);
str_item = (u32 *)(entry + field->offset);
+ if (field->filter_type == FILTER_RDYN_STRING)
+ str_loc -= field->offset + field->size;
*str_item = (str_len << 16) | str_loc;
} else {
char **paddr;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index ca9c13b2ecf4..154db74dadbc 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1979,7 +1979,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_next_val);
/**
* synth_event_add_val - Add a named field's value to an open synth trace
* @field_name: The name of the synthetic event field value to set
- * @val: The value to set the next field to
+ * @val: The value to set the named field to
* @trace_state: A pointer to object tracking the piecewise trace state
*
* Set the value of the named field in an event that's been opened by
@@ -2054,6 +2054,13 @@ static int create_synth_event(const char *raw_command)
last_cmd_set(raw_command);
+ name = raw_command;
+
+ /* Don't try to process if not our system */
+ if (name[0] != 's' || name[1] != ':')
+ return -ECANCELED;
+ name += 2;
+
p = strpbrk(raw_command, " \t");
if (!p) {
synth_err(SYNTH_ERR_INVALID_CMD, 0);
@@ -2062,12 +2069,6 @@ static int create_synth_event(const char *raw_command)
fields = skip_spaces(p);
- name = raw_command;
-
- if (name[0] != 's' || name[1] != ':')
- return -ECANCELED;
- name += 2;
-
/* This interface accepts group name prefix */
if (strchr(name, '/')) {
len = str_has_prefix(name, SYNTH_SYSTEM "/");
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3d5c07239a2a..7eb9d04f1c2e 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -68,7 +68,7 @@ event_triggers_call(struct trace_event_file *file,
if (data->paused)
continue;
if (!rec) {
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
continue;
}
filter = rcu_dereference_sched(data->filter);
@@ -78,12 +78,26 @@ event_triggers_call(struct trace_event_file *file,
tt |= data->cmd_ops->trigger_type;
continue;
}
- data->ops->func(data, buffer, rec, event);
+ data->ops->trigger(data, buffer, rec, event);
}
return tt;
}
EXPORT_SYMBOL_GPL(event_triggers_call);
+bool __trace_trigger_soft_disabled(struct trace_event_file *file)
+{
+ unsigned long eflags = file->flags;
+
+ if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
+ event_triggers_call(file, NULL, NULL, NULL);
+ if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
+ return true;
+ if (eflags & EVENT_FILE_FL_PID_FILTER)
+ return trace_event_ignore_this_pid(file);
+ return false;
+}
+EXPORT_SYMBOL_GPL(__trace_trigger_soft_disabled);
+
/**
* event_triggers_post_call - Call 'post_triggers' for a trace event
* @file: The trace_event_file associated with the event
@@ -106,7 +120,7 @@ event_triggers_post_call(struct trace_event_file *file,
if (data->paused)
continue;
if (data->cmd_ops->trigger_type & tt)
- data->ops->func(data, NULL, NULL, NULL);
+ data->ops->trigger(data, NULL, NULL, NULL);
}
}
EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -245,7 +259,7 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
mutex_lock(&trigger_cmd_mutex);
list_for_each_entry(p, &trigger_commands, list) {
if (strcmp(p->name, command) == 0) {
- ret = p->func(p, file, buff, command, next);
+ ret = p->parse(p, file, buff, command, next);
goto out_unlock;
}
}
@@ -540,7 +554,6 @@ void update_cond_flag(struct trace_event_file *file)
/**
* register_trigger - Generic event_command @reg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data to associate with the trigger
* @file: The trace_event_file associated with the event
*
@@ -551,7 +564,7 @@ void update_cond_flag(struct trace_event_file *file)
*
* Return: 0 on success, errno otherwise
*/
-static int register_trigger(char *glob, struct event_trigger_ops *ops,
+static int register_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -589,7 +602,6 @@ out:
/**
* unregister_trigger - Generic event_command @unreg implementation
* @glob: The raw string used to register the trigger
- * @ops: The trigger ops associated with the trigger
* @test: Trigger-specific data used to find the trigger to remove
* @file: The trace_event_file associated with the event
*
@@ -598,7 +610,7 @@ out:
* Usually used directly as the @unreg method in event command
* implementations.
*/
-static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
+static void unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -621,8 +633,350 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
data->ops->free(data->ops, data);
}
+/*
+ * Event trigger parsing helper functions.
+ *
+ * These functions help make it easier to write an event trigger
+ * parsing function i.e. the struct event_command.parse() callback
+ * function responsible for parsing and registering a trigger command
+ * written to the 'trigger' file.
+ *
+ * A trigger command (or just 'trigger' for short) takes the form:
+ * [trigger] [if filter]
+ *
+ * The struct event_command.parse() callback (and other struct
+ * event_command functions) refer to several components of a trigger
+ * command. Those same components are referenced by the event trigger
+ * parsing helper functions defined below. These components are:
+ *
+ * cmd - the trigger command name
+ * glob - the trigger command name optionally prefaced with '!'
+ * param_and_filter - text following cmd and ':'
+ * param - text following cmd and ':' and stripped of filter
+ * filter - the optional filter text following (and including) 'if'
+ *
+ * To illustrate the use of these componenents, here are some concrete
+ * examples. For the following triggers:
+ *
+ * echo 'traceon:5 if pid == 0' > trigger
+ * - 'traceon' is both cmd and glob
+ * - '5 if pid == 0' is the param_and_filter
+ * - '5' is the param
+ * - 'if pid == 0' is the filter
+ *
+ * echo 'enable_event:sys:event:n' > trigger
+ * - 'enable_event' is both cmd and glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'hist:keys=pid if prio > 50' > trigger
+ * - 'hist' is both cmd and glob
+ * - 'keys=pid if prio > 50' is the param_and_filter
+ * - 'keys=pid' is the param
+ * - 'if prio > 50' is the filter
+ *
+ * echo '!enable_event:sys:event:n' > trigger
+ * - 'enable_event' the cmd
+ * - '!enable_event' is the glob
+ * - 'sys:event:n' is the param_and_filter
+ * - 'sys:event:n' is the param
+ * - there is no filter
+ *
+ * echo 'traceoff' > trigger
+ * - 'traceoff' is both cmd and glob
+ * - there is no param_and_filter
+ * - there is no param
+ * - there is no filter
+ *
+ * There are a few different categories of event trigger covered by
+ * these helpers:
+ *
+ * - triggers that don't require a parameter e.g. traceon
+ * - triggers that do require a parameter e.g. enable_event and hist
+ * - triggers that though they may not require a param may support an
+ * optional 'n' param (n = number of times the trigger should fire)
+ * e.g.: traceon:5 or enable_event:sys:event:n
+ * - triggers that do not support an 'n' param e.g. hist
+ *
+ * These functions can be used or ignored as necessary - it all
+ * depends on the complexity of the trigger, and the granularity of
+ * the functions supported reflects the fact that some implementations
+ * may need to customize certain aspects of their implementations and
+ * won't need certain functions. For instance, the hist trigger
+ * implementation doesn't use event_trigger_separate_filter() because
+ * it has special requirements for handling the filter.
+ */
+
+/**
+ * event_trigger_check_remove - check whether an event trigger specifies remove
+ * @glob: The trigger command string, with optional remove(!) operator
+ *
+ * The event trigger callback implementations pass in 'glob' as a
+ * parameter. This is the command name either with or without a
+ * remove(!) operator. This function simply parses the glob and
+ * determines whether the command corresponds to a trigger removal or
+ * a trigger addition.
+ *
+ * Return: true if this is a remove command, false otherwise
+ */
+bool event_trigger_check_remove(const char *glob)
+{
+ return (glob && glob[0] == '!') ? true : false;
+}
+
+/**
+ * event_trigger_empty_param - check whether the param is empty
+ * @param: The trigger param string
+ *
+ * The event trigger callback implementations pass in 'param' as a
+ * parameter. This corresponds to the string following the command
+ * name minus the command name. This function can be called by a
+ * callback implementation for any command that requires a param; a
+ * callback that doesn't require a param can ignore it.
+ *
+ * Return: true if this is an empty param, false otherwise
+ */
+bool event_trigger_empty_param(const char *param)
+{
+ return !param;
+}
+
+/**
+ * event_trigger_separate_filter - separate an event trigger from a filter
+ * @param: The param string containing trigger and possibly filter
+ * @trigger: outparam, will be filled with a pointer to the trigger
+ * @filter: outparam, will be filled with a pointer to the filter
+ * @param_required: Specifies whether or not the param string is required
+ *
+ * Given a param string of the form '[trigger] [if filter]', this
+ * function separates the filter from the trigger and returns the
+ * trigger in *trigger and the filter in *filter. Either the *trigger
+ * or the *filter may be set to NULL by this function - if not set to
+ * NULL, they will contain strings corresponding to the trigger and
+ * filter.
+ *
+ * There are two cases that need to be handled with respect to the
+ * passed-in param: either the param is required, or it is not
+ * required. If @param_required is set, and there's no param, it will
+ * return -EINVAL. If @param_required is not set and there's a param
+ * that starts with a number, that corresponds to the case of a
+ * trigger with :n (n = number of times the trigger should fire) and
+ * the parsing continues normally; otherwise the function just returns
+ * and assumes param just contains a filter and there's nothing else
+ * to do.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_separate_filter(char *param_and_filter, char **param,
+ char **filter, bool param_required)
+{
+ int ret = 0;
+
+ *param = *filter = NULL;
+
+ if (!param_and_filter) {
+ if (param_required)
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Here we check for an optional param. The only legal
+ * optional param is :n, and if that's the case, continue
+ * below. Otherwise we assume what's left is a filter and
+ * return it as the filter string for the caller to deal with.
+ */
+ if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) {
+ *filter = param_and_filter;
+ goto out;
+ }
+
+ /*
+ * Separate the param from the filter (param [if filter]).
+ * Here we have either an optional :n param or a required
+ * param and an optional filter.
+ */
+ *param = strsep(&param_and_filter, " \t");
+
+ /*
+ * Here we have a filter, though it may be empty.
+ */
+ if (param_and_filter) {
+ *filter = skip_spaces(param_and_filter);
+ if (!**filter)
+ *filter = NULL;
+ }
+out:
+ return ret;
+}
+
+/**
+ * event_trigger_alloc - allocate and init event_trigger_data for a trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @cmd: The cmd string
+ * @param: The param string
+ * @private_data: User data to associate with the event trigger
+ *
+ * Allocate an event_trigger_data instance and initialize it. The
+ * @cmd_ops are used along with the @cmd and @param to get the
+ * trigger_ops to assign to the event_trigger_data. @private_data can
+ * also be passed in and associated with the event_trigger_data.
+ *
+ * Use event_trigger_free() to free an event_trigger_data object.
+ *
+ * Return: The trigger_data object success, NULL otherwise
+ */
+struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data)
+{
+ struct event_trigger_data *trigger_data;
+ struct event_trigger_ops *trigger_ops;
+
+ trigger_ops = cmd_ops->get_trigger_ops(cmd, param);
+
+ trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
+ if (!trigger_data)
+ return NULL;
+
+ trigger_data->count = -1;
+ trigger_data->ops = trigger_ops;
+ trigger_data->cmd_ops = cmd_ops;
+ trigger_data->private_data = private_data;
+
+ INIT_LIST_HEAD(&trigger_data->list);
+ INIT_LIST_HEAD(&trigger_data->named_list);
+ RCU_INIT_POINTER(trigger_data->filter, NULL);
+
+ return trigger_data;
+}
+
+/**
+ * event_trigger_parse_num - parse and return the number param for a trigger
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Parse the :n (n = number of times the trigger should fire) param
+ * and set the count variable in the trigger_data to the parsed count.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_parse_num(char *param,
+ struct event_trigger_data *trigger_data)
+{
+ char *number;
+ int ret = 0;
+
+ if (param) {
+ number = strsep(&param, ":");
+
+ if (!strlen(number))
+ return -EINVAL;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &trigger_data->count);
+ }
+
+ return ret;
+}
+
+/**
+ * event_trigger_set_filter - set an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @param: The string containing the filter
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Set the filter for the trigger. If the filter is NULL, just return
+ * without error.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_set_filter(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *param,
+ struct event_trigger_data *trigger_data)
+{
+ if (param && cmd_ops->set_filter)
+ return cmd_ops->set_filter(param, trigger_data, file);
+
+ return 0;
+}
+
+/**
+ * event_trigger_reset_filter - reset an event trigger's filter
+ * @cmd_ops: The event_command operations for the trigger
+ * @trigger_data: The trigger_data for the trigger
+ *
+ * Reset the filter for the trigger to no filter.
+ */
+void event_trigger_reset_filter(struct event_command *cmd_ops,
+ struct event_trigger_data *trigger_data)
+{
+ if (cmd_ops->set_filter)
+ cmd_ops->set_filter(NULL, trigger_data, NULL);
+}
+
/**
- * event_trigger_callback - Generic event_command @func implementation
+ * event_trigger_register - register an event trigger
+ * @cmd_ops: The event_command operations for the trigger
+ * @file: The event file for the trigger's event
+ * @glob: The trigger command string, with optional remove(!) operator
+ * @cmd: The cmd string
+ * @param: The param string
+ * @trigger_data: The trigger_data for the trigger
+ * @n_registered: optional outparam, the number of triggers registered
+ *
+ * Register an event trigger. The @cmd_ops are used to call the
+ * cmd_ops->reg() function which actually does the registration. The
+ * cmd_ops->reg() function returns the number of triggers registered,
+ * which is assigned to n_registered, if n_registered is non-NULL.
+ *
+ * Return: 0 on success, errno otherwise
+ */
+int event_trigger_register(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob,
+ char *cmd,
+ char *param,
+ struct event_trigger_data *trigger_data,
+ int *n_registered)
+{
+ int ret;
+
+ if (n_registered)
+ *n_registered = 0;
+
+ ret = cmd_ops->reg(glob, trigger_data, file);
+ /*
+ * The above returns on success the # of functions enabled,
+ * but if it didn't find any functions it returns zero.
+ * Consider no functions a failure too.
+ */
+ if (!ret) {
+ cmd_ops->unreg(glob, trigger_data, file);
+ ret = -ENOENT;
+ } else if (ret > 0) {
+ if (n_registered)
+ *n_registered = ret;
+ /* Just return zero, not the number of enabled functions */
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * End event trigger parsing helper functions.
+ */
+
+/**
+ * event_trigger_parse - Generic event_command @parse implementation
* @cmd_ops: The command ops, used for trigger registration
* @file: The trace_event_file associated with the event
* @glob: The raw string used to register the trigger
@@ -632,15 +986,15 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
* Common implementation for event command parsing and trigger
* instantiation.
*
- * Usually used directly as the @func method in event command
+ * Usually used directly as the @parse method in event command
* implementations.
*
* Return: 0 on success, errno otherwise
*/
static int
-event_trigger_callback(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+event_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct event_trigger_data *trigger_data;
struct event_trigger_ops *trigger_ops;
@@ -673,7 +1027,7 @@ event_trigger_callback(struct event_command *cmd_ops,
INIT_LIST_HEAD(&trigger_data->named_list);
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
ret = 0;
goto out;
@@ -708,14 +1062,14 @@ event_trigger_callback(struct event_command *cmd_ops,
out_reg:
/* Up the trigger_data count to make sure reg doesn't free it on failure */
event_trigger_init(trigger_ops, trigger_data);
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
* Consider no functions a failure too.
*/
if (!ret) {
- cmd_ops->unreg(glob, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob, trigger_data, file);
ret = -ENOENT;
} else if (ret > 0)
ret = 0;
@@ -955,6 +1309,16 @@ traceon_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (tracer_tracing_is_on(file->tr))
+ return;
+
+ tracer_tracing_on(file->tr);
+ return;
+ }
+
if (tracing_is_on())
return;
@@ -966,8 +1330,15 @@ traceon_count_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- if (tracing_is_on())
- return;
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (tracer_tracing_is_on(file->tr))
+ return;
+ } else {
+ if (tracing_is_on())
+ return;
+ }
if (!data->count)
return;
@@ -975,7 +1346,10 @@ traceon_count_trigger(struct event_trigger_data *data,
if (data->count != -1)
(data->count)--;
- tracing_on();
+ if (file)
+ tracer_tracing_on(file->tr);
+ else
+ tracing_on();
}
static void
@@ -983,6 +1357,16 @@ traceoff_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (!tracer_tracing_is_on(file->tr))
+ return;
+
+ tracer_tracing_off(file->tr);
+ return;
+ }
+
if (!tracing_is_on())
return;
@@ -994,8 +1378,15 @@ traceoff_count_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- if (!tracing_is_on())
- return;
+ struct trace_event_file *file = data->private_data;
+
+ if (file) {
+ if (!tracer_tracing_is_on(file->tr))
+ return;
+ } else {
+ if (!tracing_is_on())
+ return;
+ }
if (!data->count)
return;
@@ -1003,7 +1394,10 @@ traceoff_count_trigger(struct event_trigger_data *data,
if (data->count != -1)
(data->count)--;
- tracing_off();
+ if (file)
+ tracer_tracing_off(file->tr);
+ else
+ tracing_off();
}
static int
@@ -1023,28 +1417,28 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops traceon_trigger_ops = {
- .func = traceon_trigger,
+ .trigger = traceon_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceon_count_trigger_ops = {
- .func = traceon_count_trigger,
+ .trigger = traceon_count_trigger,
.print = traceon_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_trigger_ops = {
- .func = traceoff_trigger,
+ .trigger = traceoff_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops traceoff_count_trigger_ops = {
- .func = traceoff_count_trigger,
+ .trigger = traceoff_count_trigger,
.print = traceoff_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1069,7 +1463,7 @@ onoff_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_traceon_cmd = {
.name = "traceon",
.trigger_type = ETT_TRACE_ONOFF,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1080,7 +1474,7 @@ static struct event_command trigger_traceoff_cmd = {
.name = "traceoff",
.trigger_type = ETT_TRACE_ONOFF,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = onoff_get_trigger_ops,
@@ -1116,14 +1510,14 @@ snapshot_count_trigger(struct event_trigger_data *data,
}
static int
-register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
+register_snapshot_trigger(char *glob,
struct event_trigger_data *data,
struct trace_event_file *file)
{
if (tracing_alloc_snapshot_instance(file->tr) != 0)
return 0;
- return register_trigger(glob, ops, data, file);
+ return register_trigger(glob, data, file);
}
static int
@@ -1135,14 +1529,14 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops snapshot_trigger_ops = {
- .func = snapshot_trigger,
+ .trigger = snapshot_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops snapshot_count_trigger_ops = {
- .func = snapshot_count_trigger,
+ .trigger = snapshot_count_trigger,
.print = snapshot_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1157,7 +1551,7 @@ snapshot_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_snapshot_cmd = {
.name = "snapshot",
.trigger_type = ETT_SNAPSHOT,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_snapshot_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = snapshot_get_trigger_ops,
@@ -1200,7 +1594,12 @@ stacktrace_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *event)
{
- trace_dump_stack(STACK_SKIP);
+ struct trace_event_file *file = data->private_data;
+
+ if (file)
+ __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP);
+ else
+ trace_dump_stack(STACK_SKIP);
}
static void
@@ -1226,14 +1625,14 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
}
static struct event_trigger_ops stacktrace_trigger_ops = {
- .func = stacktrace_trigger,
+ .trigger = stacktrace_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
};
static struct event_trigger_ops stacktrace_count_trigger_ops = {
- .func = stacktrace_count_trigger,
+ .trigger = stacktrace_count_trigger,
.print = stacktrace_trigger_print,
.init = event_trigger_init,
.free = event_trigger_free,
@@ -1249,7 +1648,7 @@ static struct event_command trigger_stacktrace_cmd = {
.name = "stacktrace",
.trigger_type = ETT_STACKTRACE,
.flags = EVENT_CMD_FL_POST_TRIGGER,
- .func = event_trigger_callback,
+ .parse = event_trigger_parse,
.reg = register_trigger,
.unreg = unregister_trigger,
.get_trigger_ops = stacktrace_get_trigger_ops,
@@ -1353,36 +1752,36 @@ void event_enable_trigger_free(struct event_trigger_ops *ops,
}
static struct event_trigger_ops event_enable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_enable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_trigger_ops = {
- .func = event_enable_trigger,
+ .trigger = event_enable_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
static struct event_trigger_ops event_disable_count_trigger_ops = {
- .func = event_enable_count_trigger,
+ .trigger = event_enable_count_trigger,
.print = event_enable_trigger_print,
.init = event_trigger_init,
.free = event_enable_trigger_free,
};
-int event_enable_trigger_func(struct event_command *cmd_ops,
- struct trace_event_file *file,
- char *glob, char *cmd, char *param)
+int event_enable_trigger_parse(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param)
{
struct trace_event_file *event_enable_file;
struct enable_trigger_data *enable_data;
@@ -1455,7 +1854,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
trigger_data->private_data = enable_data;
if (glob[0] == '!') {
- cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+ cmd_ops->unreg(glob+1, trigger_data, file);
kfree(trigger_data);
kfree(enable_data);
ret = 0;
@@ -1502,7 +1901,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
ret = trace_event_enable_disable(event_enable_file, 1, 1);
if (ret < 0)
goto out_put;
- ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file);
+ ret = cmd_ops->reg(glob, trigger_data, file);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
@@ -1532,7 +1931,6 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
}
int event_enable_register_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
@@ -1574,7 +1972,6 @@ out:
}
void event_enable_unregister_trigger(char *glob,
- struct event_trigger_ops *ops,
struct event_trigger_data *test,
struct trace_event_file *file)
{
@@ -1628,7 +2025,7 @@ event_enable_get_trigger_ops(char *cmd, char *param)
static struct event_command trigger_enable_cmd = {
.name = ENABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
@@ -1638,7 +2035,7 @@ static struct event_command trigger_enable_cmd = {
static struct event_command trigger_disable_cmd = {
.name = DISABLE_EVENT_STR,
.trigger_type = ETT_EVENT_ENABLE,
- .func = event_enable_trigger_func,
+ .parse = event_enable_trigger_parse,
.reg = event_enable_register_trigger,
.unreg = event_enable_unregister_trigger,
.get_trigger_ops = event_enable_get_trigger_ops,
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 56bb7b890578..d440ddd5fd8b 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -491,18 +491,14 @@ static void stop_per_cpu_kthreads(void)
static int start_cpu_kthread(unsigned int cpu)
{
struct task_struct *kthread;
- char comm[24];
- snprintf(comm, 24, "hwlatd/%d", cpu);
-
- kthread = kthread_create_on_cpu(kthread_fn, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
return -ENOMEM;
}
per_cpu(hwlat_per_cpu_data, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4e1257f50aa3..b62fd785b599 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -32,7 +32,7 @@ static int __init set_kprobe_boot_events(char *str)
strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
disable_tracing_selftest("running kprobe events");
- return 0;
+ return 1;
}
__setup("kprobe_event=", set_kprobe_boot_events);
@@ -328,11 +328,9 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
static void __disable_trace_kprobe(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_kprobe *tk;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (!trace_kprobe_is_registered(tk))
continue;
if (trace_kprobe_is_return(tk))
@@ -349,7 +347,7 @@ static void __disable_trace_kprobe(struct trace_probe *tp)
static int enable_trace_kprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_kprobe *tk;
bool enabled;
int ret = 0;
@@ -370,8 +368,7 @@ static int enable_trace_kprobe(struct trace_event_call *call,
if (enabled)
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tk = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(tk, trace_probe_probe_list(tp), tp.list) {
if (trace_kprobe_has_gone(tk))
continue;
ret = __enable_trace_kprobe(tk);
@@ -560,11 +557,9 @@ static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
struct trace_kprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_kprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (strcmp(trace_kprobe_symbol(orig),
trace_kprobe_symbol(comp)) ||
trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
@@ -1176,15 +1171,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
{
struct dyn_event *ev = v;
struct trace_kprobe *tk;
+ unsigned long nmissed;
if (!is_trace_kprobe(ev))
return 0;
tk = to_trace_kprobe(ev);
+ nmissed = trace_kprobe_is_return(tk) ?
+ tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
seq_printf(m, " %-44s %15lu %15lu\n",
trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
- tk->rp.kp.nmissed);
+ nmissed);
return 0;
}
@@ -1384,17 +1382,11 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
@@ -1431,16 +1423,11 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
if (trace_trigger_soft_disabled(trace_file))
return;
- fbuffer.trace_ctx = tracing_gen_ctx();
- fbuffer.trace_file = trace_file;
-
dsize = __get_data_size(&tk->tp, regs);
- fbuffer.event =
- trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file,
- call->event.type,
- sizeof(*entry) + tk->tp.size + dsize,
- fbuffer.trace_ctx);
- if (!fbuffer.event)
+
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+ sizeof(*entry) + tk->tp.size + dsize);
+ if (!entry)
return;
fbuffer.regs = regs;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 7520d43aed55..e9ae1f33a7f0 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -138,8 +138,7 @@ static void osnoise_unregister_instance(struct trace_array *tr)
if (!found)
return;
- synchronize_rcu();
- kfree(inst);
+ kvfree_rcu(inst);
}
/*
@@ -1168,7 +1167,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
* used to record the beginning and to report the end of a thread noise window.
*/
static void
-trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p,
+trace_sched_switch_callback(void *data, bool preempt,
+ unsigned int prev_state,
+ struct task_struct *p,
struct task_struct *n)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
@@ -1388,6 +1389,26 @@ static int run_osnoise(void)
}
/*
+ * In some cases, notably when running on a nohz_full CPU with
+ * a stopped tick PREEMPT_RCU has no way to account for QSs.
+ * This will eventually cause unwarranted noise as PREEMPT_RCU
+ * will force preemption as the means of ending the current
+ * grace period. We avoid this problem by calling
+ * rcu_momentary_dyntick_idle(), which performs a zero duration
+ * EQS allowing PREEMPT_RCU to end the current grace period.
+ * This call shouldn't be wrapped inside an RCU critical
+ * section.
+ *
+ * Note that in non PREEMPT_RCU kernels QSs are handled through
+ * cond_resched()
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
+ local_irq_disable();
+ rcu_momentary_dyntick_idle();
+ local_irq_enable();
+ }
+
+ /*
* For the non-preemptive kernel config: let threads runs, if
* they so wish.
*/
@@ -1438,6 +1459,37 @@ static struct cpumask osnoise_cpumask;
static struct cpumask save_cpumask;
/*
+ * osnoise_sleep - sleep until the next period
+ */
+static void osnoise_sleep(void)
+{
+ u64 interval;
+ ktime_t wake_time;
+
+ mutex_lock(&interface_lock);
+ interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
+ mutex_unlock(&interface_lock);
+
+ /*
+ * differently from hwlat_detector, the osnoise tracer can run
+ * without a pause because preemption is on.
+ */
+ if (!interval) {
+ /* Let synchronize_rcu_tasks() make progress */
+ cond_resched_tasks_rcu_qs();
+ return;
+ }
+
+ wake_time = ktime_add_us(ktime_get(), interval);
+ __set_current_state(TASK_INTERRUPTIBLE);
+
+ while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
+ if (kthread_should_stop())
+ break;
+ }
+}
+
+/*
* osnoise_main - The osnoise detection kernel thread
*
* Calls run_osnoise() function to measure the osnoise for the configured runtime,
@@ -1445,30 +1497,10 @@ static struct cpumask save_cpumask;
*/
static int osnoise_main(void *data)
{
- u64 interval;
while (!kthread_should_stop()) {
-
run_osnoise();
-
- mutex_lock(&interface_lock);
- interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
- mutex_unlock(&interface_lock);
-
- do_div(interval, USEC_PER_MSEC);
-
- /*
- * differently from hwlat_detector, the osnoise tracer can run
- * without a pause because preemption is on.
- */
- if (interval < 1) {
- /* Let synchronize_rcu_tasks() make progress */
- cond_resched_tasks_rcu_qs();
- continue;
- }
-
- if (msleep_interruptible(interval))
- break;
+ osnoise_sleep();
}
return 0;
@@ -1701,7 +1733,7 @@ static int start_kthread(unsigned int cpu)
snprintf(comm, 24, "osnoise/%d", cpu);
}
- kthread = kthread_create_on_cpu(main, NULL, cpu, comm);
+ kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
if (IS_ERR(kthread)) {
pr_err(BANNER "could not start sampling thread\n");
@@ -1710,7 +1742,6 @@ static int start_kthread(unsigned int cpu)
}
per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
- wake_up_process(kthread);
return 0;
}
@@ -2123,6 +2154,13 @@ out_unhook_irq:
return -EINVAL;
}
+static void osnoise_unhook_events(void)
+{
+ unhook_thread_events();
+ unhook_softirq_events();
+ unhook_irq_events();
+}
+
/*
* osnoise_workload_start - start the workload and hook to events
*/
@@ -2155,7 +2193,14 @@ static int osnoise_workload_start(void)
retval = start_per_cpu_kthreads();
if (retval) {
- unhook_irq_events();
+ trace_osnoise_callback_enabled = false;
+ /*
+ * Make sure that ftrace_nmi_enter/exit() see
+ * trace_osnoise_callback_enabled as false before continuing.
+ */
+ barrier();
+
+ osnoise_unhook_events();
return retval;
}
@@ -2177,6 +2222,17 @@ static void osnoise_workload_stop(void)
if (osnoise_has_registered_instances())
return;
+ /*
+ * If callbacks were already disabled in a previous stop
+ * call, there is no need to disable then again.
+ *
+ * For instance, this happens when tracing is stopped via:
+ * echo 0 > tracing_on
+ * echo nop > current_tracer.
+ */
+ if (!trace_osnoise_callback_enabled)
+ return;
+
trace_osnoise_callback_enabled = false;
/*
* Make sure that ftrace_nmi_enter/exit() see
@@ -2186,9 +2242,7 @@ static void osnoise_workload_stop(void)
stop_per_cpu_kthreads();
- unhook_irq_events();
- unhook_softirq_events();
- unhook_thread_events();
+ osnoise_unhook_events();
}
static void osnoise_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3547e7176ff7..8aa493d25c73 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -445,14 +445,18 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
char irqs_off;
int hardirq;
int softirq;
+ int bh_off;
int nmi;
nmi = entry->flags & TRACE_FLAG_NMI;
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ bh_off = entry->flags & TRACE_FLAG_BH_OFF;
irqs_off =
+ (entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ bh_off ? 'b' :
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
'.';
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 3ed2a3f37297..80863c6508e5 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -356,6 +356,8 @@ static int __parse_imm_string(char *str, char **pbuf, int offs)
return -EINVAL;
}
*pbuf = kstrndup(str, len - 1, GFP_KERNEL);
+ if (!*pbuf)
+ return -ENOMEM;
return 0;
}
@@ -869,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
switch (ptype) {
case PROBE_PRINT_NORMAL:
fmt = "(%lx)";
- arg = "REC->" FIELD_STRING_IP;
+ arg = ", REC->" FIELD_STRING_IP;
break;
case PROBE_PRINT_RETURN:
fmt = "(%lx <- %lx)";
- arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+ arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
break;
case PROBE_PRINT_EVENT:
- fmt = "(%u)";
- arg = "REC->" FIELD_STRING_TYPE;
+ fmt = "";
+ arg = "";
break;
default:
WARN_ON_ONCE(1);
@@ -901,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
parg->type->fmt);
}
- pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
for (i = 0; i < tp->nr_args; i++) {
parg = tp->args + i;
@@ -1138,8 +1140,7 @@ int trace_probe_remove_file(struct trace_probe *tp,
return -ENOENT;
list_del_rcu(&link->list);
- synchronize_rcu();
- kfree(link);
+ kvfree_rcu(link);
if (list_empty(&tp->event->files))
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 99e7a5df025e..92cc149af0fd 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -38,7 +38,6 @@
#define FIELD_STRING_IP "__probe_ip"
#define FIELD_STRING_RETIP "__probe_ret_ip"
#define FIELD_STRING_FUNC "__probe_func"
-#define FIELD_STRING_TYPE "__probe_type"
#undef DEFINE_FIELD
#define DEFINE_FIELD(type, item, name, is_signed) \
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e304196d7c28..993b0ed10d8c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -22,6 +22,7 @@ static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
int flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 2402de520eca..46429f9a96fa 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -426,6 +426,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt,
+ unsigned int prev_state,
struct task_struct *prev, struct task_struct *next)
{
struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index afd937a46496..abcadbe933bb 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -784,9 +784,7 @@ static struct fgraph_ops fgraph_ops __initdata = {
.retfunc = &trace_graph_return,
};
-#if defined(CONFIG_DYNAMIC_FTRACE) && \
- defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
-#define TEST_DIRECT_TRAMP
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
noinline __noclone static void trace_direct_tramp(void) { }
#endif
@@ -849,7 +847,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
goto out;
}
-#ifdef TEST_DIRECT_TRAMP
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
tracing_reset_online_cpus(&tr->array_buffer);
set_graph_array(tr);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8bfcd3b09422..f755bde42fd0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -323,8 +323,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->enter_event->event.type, size, trace_ctx);
if (!event)
return;
@@ -367,8 +366,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
trace_ctx = tracing_gen_ctx();
- buffer = tr->array_buffer.buffer;
- event = trace_buffer_lock_reserve(buffer,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
sys_data->exit_event->event.type, sizeof(*entry),
trace_ctx);
if (!event)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 4f35514a48f3..9711589273cd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -410,12 +410,10 @@ static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
struct trace_uprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
- struct trace_probe *pos;
struct inode *comp_inode = d_real_inode(comp->path.dentry);
int i;
- list_for_each_entry(pos, &tpe->probes, list) {
- orig = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(orig, &tpe->probes, tp.list) {
if (comp_inode != d_real_inode(orig->path.dentry) ||
comp->offset != orig->offset)
continue;
@@ -950,8 +948,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
- struct trace_buffer *buffer;
- struct ring_buffer_event *event;
+ struct trace_event_buffer fbuffer;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
@@ -966,12 +963,10 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, trace_file,
- call->event.type, size, 0);
- if (!event)
+ entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
+ if (!entry)
return;
- entry = ring_buffer_event_data(event);
if (is_ret_probe(tu)) {
entry->vaddr[0] = func;
entry->vaddr[1] = instruction_pointer(regs);
@@ -983,7 +978,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
memcpy(data, ucb->buf, tu->tp.size + dsize);
- event_trigger_unlock_commit(trace_file, buffer, event, entry, 0);
+ trace_event_buffer_commit(&fbuffer);
}
/* uprobe handler */
@@ -1076,14 +1071,12 @@ static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter)
static void __probe_event_disable(struct trace_probe *tp)
{
- struct trace_probe *pos;
struct trace_uprobe *tu;
tu = container_of(tp, struct trace_uprobe, tp);
WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter));
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
if (!tu->inode)
continue;
@@ -1095,7 +1088,7 @@ static void __probe_event_disable(struct trace_probe *tp)
static int probe_event_enable(struct trace_event_call *call,
struct trace_event_file *file, filter_func_t filter)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
bool enabled;
int ret;
@@ -1130,8 +1123,7 @@ static int probe_event_enable(struct trace_event_call *call,
if (ret)
goto err_flags;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = trace_uprobe_enable(tu, filter);
if (ret) {
__probe_event_disable(tp);
@@ -1276,7 +1268,7 @@ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter,
static int uprobe_perf_close(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int ret = 0;
@@ -1288,8 +1280,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
if (trace_uprobe_filter_remove(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
if (ret)
break;
@@ -1301,7 +1292,7 @@ static int uprobe_perf_close(struct trace_event_call *call,
static int uprobe_perf_open(struct trace_event_call *call,
struct perf_event *event)
{
- struct trace_probe *pos, *tp;
+ struct trace_probe *tp;
struct trace_uprobe *tu;
int err = 0;
@@ -1313,8 +1304,7 @@ static int uprobe_perf_open(struct trace_event_call *call,
if (trace_uprobe_filter_add(tu->tp.event->filter, event))
return 0;
- list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
- tu = container_of(pos, struct trace_uprobe, tp);
+ list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) {
err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
if (err) {
uprobe_perf_close(call, event);
@@ -1620,6 +1610,11 @@ create_local_trace_uprobe(char *name, unsigned long offs,
tu->path = path;
tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
+ if (!tu->filename) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
init_trace_event_call(tu);
ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;