summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2018-06-25 21:48:34 +0200
committerRichard Braun <rbraun@sceen.net>2018-06-25 21:52:26 +0200
commitbb91d0a376a71ef2c0d01a741400c367ac1a2ccf (patch)
treef7dd37b46d754bd9e3f7acb13957e2ace6a7b43e
parent64d74fe8d76c230e61b17482bb098d7f9729141d (diff)
kern/perfmon: new module
-rw-r--r--doc/intro.9.txt2
-rw-r--r--kern/Kconfig13
-rw-r--r--kern/Makefile2
-rw-r--r--kern/perfmon.c1443
-rw-r--r--kern/perfmon.h221
-rw-r--r--kern/perfmon_types.h102
-rw-r--r--kern/thread.c37
-rw-r--r--kern/thread.h8
-rw-r--r--kern/thread_i.h5
-rw-r--r--test/Kconfig12
-rw-r--r--test/Makefile3
-rw-r--r--test/test_perfmon_cpu.c225
-rw-r--r--test/test_perfmon_thread.c383
-rw-r--r--test/test_perfmon_torture.c346
14 files changed, 2798 insertions, 4 deletions
diff --git a/doc/intro.9.txt b/doc/intro.9.txt
index 281db50e..6fcd9618 100644
--- a/doc/intro.9.txt
+++ b/doc/intro.9.txt
@@ -153,6 +153,8 @@ module:kern/list::
Doubly-linked list.
module:kern/macros::
Useful generic macros.
+module:kern/perfmon::
+ Performance monitoring.
module:kern/rbtree::
Red-black tree.
module:kern/rdxtree::
diff --git a/kern/Kconfig b/kern/Kconfig
index fced67c2..977070f9 100644
--- a/kern/Kconfig
+++ b/kern/Kconfig
@@ -94,6 +94,19 @@ config THREAD_STACK_GUARD
If unsure, disable.
+config PERFMON
+ def_bool n
+
+config PERFMON_MAX_PMCS
+ int "Number of performance monitoring counters"
+ default 8
+ depends on PERFMON
+ ---help---
+ Number of performance monitoring counters.
+
+ This value affects the minimum duration of some critical sections
+ that run with interrupts disabled.
+
endmenu
menu "Debugging"
diff --git a/kern/Makefile b/kern/Makefile
index ab7d6b59..5b04fcb3 100644
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c
x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c
x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c
+
+x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c
diff --git a/kern/perfmon.c b/kern/perfmon.c
new file mode 100644
index 00000000..6fd319e8
--- /dev/null
+++ b/kern/perfmon.c
@@ -0,0 +1,1443 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Locking order :
+ *
+ * thread_runq -+
+ * |
+ * event -+-> interrupts -+-> td
+ * |
+ * +-> pmu
+ *
+ * TODO Kernel/user mode seggregation.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <kern/clock.h>
+#include <kern/init.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/macros.h>
+#include <kern/percpu.h>
+#include <kern/perfmon.h>
+#include <kern/perfmon_types.h>
+#include <kern/spinlock.h>
+#include <kern/syscnt.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/xcall.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+
+/*
+ * Minimum hardware counter poll interval, in milliseconds.
+ *
+ * The main purpose of polling hardware counters is to detect overflows
+ * when the driver is unable to reliably use overflow interrupts.
+ */
+#define PERFMON_MIN_POLL_INTERVAL 50
+
+/*
+ * Internal event flags.
+ */
+#define PERFMON_EF_TYPE_CPU 0x100
+#define PERFMON_EF_ATTACHED 0x200
+#define PERFMON_EF_PUBLIC_MASK (PERFMON_EF_KERN \
+ | PERFMON_EF_USER \
+ | PERFMON_EF_RAW)
+
+/*
+ * Per-CPU performance monitoring counter.
+ *
+ * When an event is attached to a processor, the matching per-CPU PMC get
+ * referenced. When a per-CPU PMC is referenced, its underlying hardware
+ * counter is active.
+ *
+ * Interrupts and preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmc {
+ unsigned int nr_refs;
+ unsigned int pmc_id;
+ unsigned int raw_event_id;
+ uint64_t raw_value;
+ uint64_t value;
+};
+
+/*
+ * Per-CPU performance monitoring unit.
+ *
+ * Per-CPU PMCs are indexed the same way as global PMCs.
+ *
+ * Interrupts and preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmu {
+ struct perfmon_dev *dev;
+ unsigned int cpu;
+ struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS];
+ struct timer poll_timer;
+ struct syscnt sc_nr_overflows;
+};
+
+/*
+ * Performance monitoring counter.
+ *
+ * When a PMC is used, it maps a raw event to a hardware counter.
+ * A PMC is used if and only if its reference counter isn't zero.
+ */
+struct perfmon_pmc {
+ unsigned int nr_refs;
+ unsigned int pmc_id;
+ unsigned int raw_event_id;
+};
+
+/*
+ * Performance monitoring unit.
+ *
+ * There is a single system-wide logical PMU, used to globally allocate
+ * PMCs. Reserving a counter across the entire system ensures thread
+ * migration isn't hindered by performance monitoring.
+ *
+ * Locking the global PMU is only required when allocating or releasing
+ * a PMC. Once allocated, the PMC may safely be accessed without hodling
+ * the lock.
+ */
+struct perfmon_pmu {
+ struct perfmon_dev *dev;
+ struct spinlock lock;
+ struct perfmon_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+static struct perfmon_pmu perfmon_pmu;
+static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu;
+
+static struct perfmon_pmu *
+perfmon_get_pmu(void)
+{
+ return &perfmon_pmu;
+}
+
+static struct perfmon_cpu_pmu *
+perfmon_get_local_cpu_pmu(void)
+{
+ assert(!thread_preempt_enabled());
+ return cpu_local_ptr(perfmon_cpu_pmu);
+}
+
+static struct perfmon_cpu_pmu *
+perfmon_get_cpu_pmu(unsigned int cpu)
+{
+ return percpu_ptr(perfmon_cpu_pmu, cpu);
+}
+
+static void __init
+perfmon_pmc_init(struct perfmon_pmc *pmc)
+{
+ pmc->nr_refs = 0;
+}
+
+static bool
+perfmon_pmc_used(const struct perfmon_pmc *pmc)
+{
+ return pmc->nr_refs != 0;
+}
+
+static unsigned int
+perfmon_pmc_id(const struct perfmon_pmc *pmc)
+{
+ return pmc->pmc_id;
+}
+
+static unsigned int
+perfmon_pmc_raw_event_id(const struct perfmon_pmc *pmc)
+{
+ return pmc->raw_event_id;
+}
+
+static void
+perfmon_pmc_use(struct perfmon_pmc *pmc, unsigned int pmc_id,
+ unsigned int raw_event_id)
+{
+ assert(!perfmon_pmc_used(pmc));
+
+ pmc->nr_refs = 1;
+ pmc->pmc_id = pmc_id;
+ pmc->raw_event_id = raw_event_id;
+}
+
+static void
+perfmon_pmc_ref(struct perfmon_pmc *pmc)
+{
+ assert(perfmon_pmc_used(pmc));
+ pmc->nr_refs++;
+}
+
+static void
+perfmon_pmc_unref(struct perfmon_pmc *pmc)
+{
+ assert(perfmon_pmc_used(pmc));
+ pmc->nr_refs--;
+}
+
+static unsigned int
+perfmon_pmu_get_pmc_index(const struct perfmon_pmu *pmu,
+ const struct perfmon_pmc *pmc)
+{
+ size_t pmc_index;
+
+ pmc_index = pmc - pmu->pmcs;
+ assert(pmc_index < ARRAY_SIZE(pmu->pmcs));
+ return pmc_index;
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_get_pmc(struct perfmon_pmu *pmu, unsigned int index)
+{
+ assert(index < ARRAY_SIZE(pmu->pmcs));
+ return &pmu->pmcs[index];
+}
+
+static void __init
+perfmon_pmu_init(struct perfmon_pmu *pmu)
+{
+ pmu->dev = NULL;
+ spinlock_init(&pmu->lock);
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+ perfmon_pmc_init(perfmon_pmu_get_pmc(pmu, i));
+ }
+}
+
+static void __init
+perfmon_pmu_set_dev(struct perfmon_pmu *pmu, struct perfmon_dev *dev)
+{
+ assert(dev);
+ assert(!pmu->dev);
+ pmu->dev = dev;
+}
+
+static struct perfmon_dev *
+perfmon_pmu_get_dev(const struct perfmon_pmu *pmu)
+{
+ return pmu->dev;
+}
+
+static void
+perfmon_pmu_handle_overflow_intr(const struct perfmon_pmu *pmu)
+{
+ pmu->dev->ops->handle_overflow_intr();
+}
+
+static int
+perfmon_pmu_translate(const struct perfmon_pmu *pmu,
+ unsigned int *raw_event_id,
+ unsigned int event_id)
+{
+ if (!pmu->dev) {
+ return ENODEV;
+ }
+
+ return pmu->dev->ops->translate(raw_event_id, event_id);
+}
+
+static int
+perfmon_pmu_alloc_pmc_id(const struct perfmon_pmu *pmu,
+ unsigned int *pmc_idp,
+ unsigned int pmc_index,
+ unsigned int raw_event_id)
+{
+ unsigned int pmc_id;
+ int error;
+
+ if (!pmu->dev) {
+ return ENODEV;
+ }
+
+ error = pmu->dev->ops->alloc(&pmc_id, pmc_index, raw_event_id);
+
+ if (error) {
+ return error;
+ }
+
+ *pmc_idp = pmc_id;
+ return 0;
+}
+
+static void
+perfmon_pmu_free_pmc_id(const struct perfmon_pmu *pmu, unsigned int pmc_id)
+{
+ assert(pmu->dev);
+ pmu->dev->ops->free(pmc_id);
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_find_unused_pmc(struct perfmon_pmu *pmu)
+{
+ struct perfmon_pmc *pmc;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+ pmc = perfmon_pmu_get_pmc(pmu, i);
+
+ if (!perfmon_pmc_used(pmc)) {
+ return pmc;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+perfmon_pmu_alloc_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp,
+ unsigned int raw_event_id)
+{
+ unsigned int pmc_id = 0, pmc_index;
+ struct perfmon_pmc *pmc;
+ int error;
+
+ pmc = perfmon_pmu_find_unused_pmc(pmu);
+
+ if (!pmc) {
+ return EAGAIN;
+ }
+
+ pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc);
+ error = perfmon_pmu_alloc_pmc_id(pmu, &pmc_id, pmc_index, raw_event_id);
+
+ if (error) {
+ return error;
+ }
+
+ perfmon_pmc_use(pmc, pmc_id, raw_event_id);
+ *pmcp = pmc;
+ return 0;
+}
+
+static void
+perfmon_pmu_free_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc)
+{
+ unsigned int pmc_id;
+
+ assert(!perfmon_pmc_used(pmc));
+ pmc_id = perfmon_pmc_id(pmc);
+ perfmon_pmu_free_pmc_id(pmu, pmc_id);
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_get_pmc_by_raw_event_id(struct perfmon_pmu *pmu,
+ unsigned int raw_event_id)
+{
+ struct perfmon_pmc *pmc;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+ pmc = perfmon_pmu_get_pmc(pmu, i);
+
+ if (!perfmon_pmc_used(pmc)) {
+ continue;
+ }
+
+ if (perfmon_pmc_raw_event_id(pmc) == raw_event_id) {
+ return pmc;
+ }
+ }
+
+ return NULL;
+}
+
+static int
+perfmon_pmu_take_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp,
+ unsigned int raw_event_id)
+{
+ struct perfmon_pmc *pmc;
+ int error;
+
+ spinlock_lock(&pmu->lock);
+
+ pmc = perfmon_pmu_get_pmc_by_raw_event_id(pmu, raw_event_id);
+
+ if (pmc) {
+ perfmon_pmc_ref(pmc);
+ error = 0;
+ } else {
+ error = perfmon_pmu_alloc_pmc(pmu, &pmc, raw_event_id);
+
+ if (error) {
+ pmc = NULL;
+ }
+ }
+
+ spinlock_unlock(&pmu->lock);
+
+ if (error) {
+ return error;
+ }
+
+ *pmcp = pmc;
+ return 0;
+}
+
+static void
+perfmon_pmu_put_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc)
+{
+ spinlock_lock(&pmu->lock);
+
+ perfmon_pmc_unref(pmc);
+
+ if (!perfmon_pmc_used(pmc)) {
+ perfmon_pmu_free_pmc(pmu, pmc);
+ }
+
+ spinlock_unlock(&pmu->lock);
+}
+
+static int
+perfmon_check_event_args(unsigned int id, unsigned int flags)
+{
+ if (!((flags & PERFMON_EF_PUBLIC_MASK) == flags)
+ || !((flags & PERFMON_EF_RAW) || (id < PERFMON_NR_GENERIC_EVENTS))
+ || !((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)))) {
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+int
+perfmon_event_init(struct perfmon_event *event, unsigned int id,
+ unsigned int flags)
+{
+ int error;
+
+ error = perfmon_check_event_args(id, flags);
+
+ if (error) {
+ return error;
+ }
+
+ spinlock_init(&event->lock);
+ event->flags = flags;
+ event->id = id;
+ event->value = 0;
+ return 0;
+}
+
+static bool
+perfmon_event_type_cpu(const struct perfmon_event *event)
+{
+ return event->flags & PERFMON_EF_TYPE_CPU;
+}
+
+static void
+perfmon_event_set_type_cpu(struct perfmon_event *event)
+{
+ event->flags |= PERFMON_EF_TYPE_CPU;
+}
+
+static void
+perfmon_event_clear_type_cpu(struct perfmon_event *event)
+{
+ event->flags &= ~PERFMON_EF_TYPE_CPU;
+}
+
+static bool
+perfmon_event_attached(const struct perfmon_event *event)
+{
+ return event->flags & PERFMON_EF_ATTACHED;
+}
+
+static unsigned int
+perfmon_event_pmc_index(const struct perfmon_event *event)
+{
+ assert(perfmon_event_attached(event));
+ return event->pmc_index;
+}
+
+static void __init
+perfmon_cpu_pmc_init(struct perfmon_cpu_pmc *cpu_pmc)
+{
+ cpu_pmc->nr_refs = 0;
+}
+
+static bool
+perfmon_cpu_pmc_used(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+ return cpu_pmc->nr_refs != 0;
+}
+
+static void
+perfmon_cpu_pmc_use(struct perfmon_cpu_pmc *cpu_pmc, unsigned int pmc_id,
+ unsigned int raw_event_id, uint64_t raw_value)
+{
+ assert(!perfmon_cpu_pmc_used(cpu_pmc));
+
+ cpu_pmc->nr_refs = 1;
+ cpu_pmc->pmc_id = pmc_id;
+ cpu_pmc->raw_event_id = raw_event_id;
+ cpu_pmc->raw_value = raw_value;
+ cpu_pmc->value = 0;
+}
+
+static void
+perfmon_cpu_pmc_ref(struct perfmon_cpu_pmc *cpu_pmc)
+{
+ assert(perfmon_cpu_pmc_used(cpu_pmc));
+ cpu_pmc->nr_refs++;
+}
+
+static void
+perfmon_cpu_pmc_unref(struct perfmon_cpu_pmc *cpu_pmc)
+{
+ assert(perfmon_cpu_pmc_used(cpu_pmc));
+ cpu_pmc->nr_refs--;
+}
+
+static unsigned int
+perfmon_cpu_pmc_id(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+ return cpu_pmc->pmc_id;
+}
+
+static bool
+perfmon_cpu_pmc_update(struct perfmon_cpu_pmc *cpu_pmc, uint64_t raw_value,
+ unsigned int pmc_width)
+{
+ bool overflowed;
+ uint64_t delta;
+
+ delta = raw_value - cpu_pmc->raw_value;
+
+ if (pmc_width == 64) {
+ overflowed = false;
+ } else {
+ if (raw_value >= cpu_pmc->raw_value) {
+ overflowed = false;
+ } else {
+ overflowed = true;
+ delta += (uint64_t)1 << pmc_width;
+ }
+ }
+
+ cpu_pmc->value += delta;
+ cpu_pmc->raw_value = raw_value;
+ return overflowed;
+}
+
+static uint64_t
+perfmon_cpu_pmc_get_value(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+ return cpu_pmc->value;
+}
+
+static struct perfmon_cpu_pmc *
+perfmon_cpu_pmu_get_pmc(struct perfmon_cpu_pmu *cpu_pmu, unsigned int index)
+{
+ assert(index < ARRAY_SIZE(cpu_pmu->pmcs));
+ return &cpu_pmu->pmcs[index];
+}
+
+static void
+perfmon_cpu_pmu_start(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id,
+ unsigned int raw_event_id)
+{
+ cpu_pmu->dev->ops->start(pmc_id, raw_event_id);
+}
+
+static void
+perfmon_cpu_pmu_stop(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id)
+{
+ cpu_pmu->dev->ops->stop(pmc_id);
+}
+
+static uint64_t
+perfmon_cpu_pmu_read(const struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id)
+{
+ return cpu_pmu->dev->ops->read(pmc_id);
+}
+
+static void
+perfmon_cpu_pmu_use_pmc(struct perfmon_cpu_pmu *cpu_pmu,
+ struct perfmon_cpu_pmc *cpu_pmc,
+ unsigned int pmc_id,
+ unsigned int raw_event_id)
+{
+ uint64_t raw_value;
+
+ perfmon_cpu_pmu_start(cpu_pmu, pmc_id, raw_event_id);
+ raw_value = perfmon_cpu_pmu_read(cpu_pmu, pmc_id);
+ perfmon_cpu_pmc_use(cpu_pmc, pmc_id, raw_event_id, raw_value);
+}
+
+static void
+perfmon_cpu_pmu_update_pmc(struct perfmon_cpu_pmu *cpu_pmu,
+ struct perfmon_cpu_pmc *cpu_pmc)
+{
+ uint64_t raw_value;
+ bool overflowed;
+
+ raw_value = perfmon_cpu_pmu_read(cpu_pmu, perfmon_cpu_pmc_id(cpu_pmc));
+ overflowed = perfmon_cpu_pmc_update(cpu_pmc, raw_value,
+ cpu_pmu->dev->pmc_width);
+
+ if (overflowed) {
+ syscnt_inc(&cpu_pmu->sc_nr_overflows);
+ }
+}
+
+static void
+perfmon_cpu_pmu_check_overflow(void *arg)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ assert(!cpu_intr_enabled());
+
+ cpu_pmu = arg;
+ assert(cpu_pmu->cpu == cpu_id());
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+ cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, i);
+
+ if (!perfmon_cpu_pmc_used(cpu_pmc)) {
+ continue;
+ }
+
+ perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+ }
+}
+
+static void
+perfmon_cpu_pmu_poll(struct timer *timer)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+
+ cpu_pmu = structof(timer, struct perfmon_cpu_pmu, poll_timer);
+ xcall_call(perfmon_cpu_pmu_check_overflow, cpu_pmu, cpu_pmu->cpu);
+ timer_schedule(timer, timer_get_time(timer) + cpu_pmu->dev->poll_interval);
+}
+
+static void __init
+perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu, unsigned int cpu,
+ struct perfmon_dev *dev)
+{
+ char name[SYSCNT_NAME_SIZE];
+
+ cpu_pmu->dev = dev;
+ cpu_pmu->cpu = cpu;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+ perfmon_cpu_pmc_init(perfmon_cpu_pmu_get_pmc(cpu_pmu, i));
+ }
+
+ if (dev->ops->handle_overflow_intr == NULL) {
+ assert(dev->poll_interval != 0);
+
+ /*
+ * XXX Ideally, this would be an interrupt timer instead of a high
+ * priority one, but it can't be because the handler performs
+ * cross-calls to remote processors, which requires that interrupts
+ * be enabled. This is one potential user of CPU-bound timers.
+ */
+ timer_init(&cpu_pmu->poll_timer, perfmon_cpu_pmu_poll, TIMER_HIGH_PRIO);
+ timer_schedule(&cpu_pmu->poll_timer, dev->poll_interval);
+ }
+
+ snprintf(name, sizeof(name), "perfmon_nr_overflows/%u", cpu);
+ syscnt_register(&cpu_pmu->sc_nr_overflows, name);
+}
+
+static uint64_t
+perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index,
+ unsigned int pmc_id, unsigned int raw_event_id)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ assert(!cpu_intr_enabled());
+
+ cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+
+ if (perfmon_cpu_pmc_used(cpu_pmc)) {
+ perfmon_cpu_pmc_ref(cpu_pmc);
+ perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+ } else {
+ perfmon_cpu_pmu_use_pmc(cpu_pmu, cpu_pmc, pmc_id, raw_event_id);
+ }
+
+ return perfmon_cpu_pmc_get_value(cpu_pmc);
+}
+
+static uint64_t
+perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+ unsigned int pmc_id;
+ uint64_t value;
+
+ assert(!cpu_intr_enabled());
+
+ cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+ pmc_id = perfmon_cpu_pmc_id(cpu_pmc);
+
+ perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+ value = perfmon_cpu_pmc_get_value(cpu_pmc);
+
+ perfmon_cpu_pmc_unref(cpu_pmc);
+
+ if (!perfmon_cpu_pmc_used(cpu_pmc)) {
+ perfmon_cpu_pmu_stop(cpu_pmu, pmc_id);
+ }
+
+ return value;
+}
+
+static uint64_t
+perfmon_cpu_pmu_sync(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ assert(!cpu_intr_enabled());
+
+ cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+ perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+ return perfmon_cpu_pmc_get_value(cpu_pmc);
+}
+
+static void
+perfmon_td_pmc_init(struct perfmon_td_pmc *td_pmc)
+{
+ td_pmc->nr_refs = 0;
+ td_pmc->loaded = false;
+ td_pmc->value = 0;
+}
+
+static bool
+perfmon_td_pmc_used(const struct perfmon_td_pmc *td_pmc)
+{
+ return td_pmc->nr_refs != 0;
+}
+
+static void
+perfmon_td_pmc_use(struct perfmon_td_pmc *td_pmc, unsigned int pmc_id,
+ unsigned int raw_event_id)
+{
+ assert(!perfmon_td_pmc_used(td_pmc));
+
+ td_pmc->nr_refs = 1;
+ td_pmc->loaded = false;
+ td_pmc->pmc_id = pmc_id;
+ td_pmc->raw_event_id = raw_event_id;
+ td_pmc->value = 0;
+}
+
+static unsigned int
+perfmon_td_pmc_id(const struct perfmon_td_pmc *td_pmc)
+{
+ return td_pmc->pmc_id;
+}
+
+static unsigned int
+perfmon_td_pmc_raw_event_id(const struct perfmon_td_pmc *td_pmc)
+{
+ return td_pmc->raw_event_id;
+}
+
+static void
+perfmon_td_pmc_ref(struct perfmon_td_pmc *td_pmc)
+{
+ assert(perfmon_td_pmc_used(td_pmc));
+ td_pmc->nr_refs++;
+}
+
+static void
+perfmon_td_pmc_unref(struct perfmon_td_pmc *td_pmc)
+{
+ assert(perfmon_td_pmc_used(td_pmc));
+ td_pmc->nr_refs--;
+}
+
+static bool
+perfmon_td_pmc_loaded(const struct perfmon_td_pmc *td_pmc)
+{
+ return td_pmc->loaded;
+}
+
+static void
+perfmon_td_pmc_load(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+ assert(!perfmon_td_pmc_loaded(td_pmc));
+
+ td_pmc->cpu_pmc_value = cpu_pmc_value;
+ td_pmc->loaded = true;
+}
+
+static void
+perfmon_td_pmc_update(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+ uint64_t delta;
+
+ assert(perfmon_td_pmc_loaded(td_pmc));
+
+ delta = cpu_pmc_value - td_pmc->cpu_pmc_value;
+ td_pmc->cpu_pmc_value = cpu_pmc_value;
+ td_pmc->value += delta;
+}
+
+static void
+perfmon_td_pmc_unload(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+ perfmon_td_pmc_update(td_pmc, cpu_pmc_value);
+ td_pmc->loaded = false;
+}
+
+static uint64_t
+perfmon_td_pmc_read(const struct perfmon_td_pmc *td_pmc)
+{
+ return td_pmc->value;
+}
+
+static unsigned int
+perfmon_td_get_pmc_index(const struct perfmon_td *td,
+ const struct perfmon_td_pmc *td_pmc)
+{
+ size_t pmc_index;
+
+ pmc_index = td_pmc - td->pmcs;
+ assert(pmc_index < ARRAY_SIZE(td->pmcs));
+ return pmc_index;
+}
+
+static struct perfmon_td_pmc *
+perfmon_td_get_pmc(struct perfmon_td *td, unsigned int index)
+{
+ assert(index < ARRAY_SIZE(td->pmcs));
+ return &td->pmcs[index];
+}
+
+void
+perfmon_td_init(struct perfmon_td *td)
+{
+ spinlock_init(&td->lock);
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+ perfmon_td_pmc_init(perfmon_td_get_pmc(td, i));
+ }
+}
+
+static void
+perfmon_td_load_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+ unsigned int pmc_index, pmc_id, raw_event_id;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ uint64_t cpu_pmc_value;
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+ pmc_id = perfmon_td_pmc_id(td_pmc);
+ raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc);
+ cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+ pmc_id, raw_event_id);
+ perfmon_td_pmc_load(td_pmc, cpu_pmc_value);
+}
+
+static void
+perfmon_td_unload_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+ cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+ perfmon_td_pmc_unload(td_pmc, cpu_pmc_value);
+}
+
+static void
+perfmon_td_update_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+ cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index);
+ perfmon_td_pmc_update(td_pmc, cpu_pmc_value);
+}
+
+void
+perfmon_td_load(struct perfmon_td *td)
+{
+ unsigned int pmc_index, pmc_id, raw_event_id;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_td_pmc *td_pmc;
+ uint64_t cpu_pmc_value;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+
+ spinlock_lock(&td->lock);
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+ td_pmc = perfmon_td_get_pmc(td, i);
+
+ if (!perfmon_td_pmc_used(td_pmc) || perfmon_td_pmc_loaded(td_pmc)) {
+ continue;
+ }
+
+ pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+ pmc_id = perfmon_td_pmc_id(td_pmc);
+ raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc);
+ cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+ pmc_id, raw_event_id);
+ perfmon_td_pmc_load(td_pmc, cpu_pmc_value);
+ }
+
+ spinlock_unlock(&td->lock);
+}
+
+void
+perfmon_td_unload(struct perfmon_td *td)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_td_pmc *td_pmc;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+
+ spinlock_lock(&td->lock);
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+ td_pmc = perfmon_td_get_pmc(td, i);
+
+ if (!perfmon_td_pmc_loaded(td_pmc)) {
+ continue;
+ }
+
+ pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+ cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+ perfmon_td_pmc_unload(td_pmc, cpu_pmc_value);
+ }
+
+ spinlock_unlock(&td->lock);
+}
+
+static void
+perfmon_event_load(struct perfmon_event *event, uint64_t pmc_value)
+{
+ event->pmc_value = pmc_value;
+}
+
+static void
+perfmon_event_update(struct perfmon_event *event, uint64_t pmc_value)
+{
+ uint64_t delta;
+
+ delta = pmc_value - event->pmc_value;
+ event->value += delta;
+ event->pmc_value = pmc_value;
+}
+
+static void
+perfmon_event_load_cpu_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ const struct perfmon_pmc *pmc;
+ struct perfmon_pmu *pmu;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ event = arg;
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmu = perfmon_get_pmu();
+ pmc_index = perfmon_event_pmc_index(event);
+ pmc = perfmon_pmu_get_pmc(pmu, pmc_index);
+ cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+ perfmon_pmc_id(pmc),
+ perfmon_pmc_raw_event_id(pmc));
+ perfmon_event_load(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_load_cpu(struct perfmon_event *event, unsigned int cpu)
+{
+ perfmon_event_set_type_cpu(event);
+ event->cpu = cpu;
+ xcall_call(perfmon_event_load_cpu_remote, event, cpu);
+}
+
+static void
+perfmon_event_load_thread_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_td_pmc *td_pmc;
+ struct perfmon_td *td;
+ unsigned int pmc_index;
+ uint64_t td_pmc_value;
+
+ event = arg;
+ pmc_index = perfmon_event_pmc_index(event);
+ td = thread_get_perfmon_td(event->thread);
+ td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+ spinlock_lock(&td->lock);
+
+ if (thread_self() == event->thread) {
+
+ if (perfmon_td_pmc_loaded(td_pmc)) {
+ perfmon_td_update_pmc(td, td_pmc);
+ } else {
+ perfmon_td_load_pmc(td, td_pmc);
+ }
+ }
+
+ td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+ spinlock_unlock(&td->lock);
+
+ perfmon_event_load(event, td_pmc_value);
+}
+
+static void
+perfmon_event_load_thread(struct perfmon_event *event, struct thread *thread)
+{
+ struct perfmon_td_pmc *td_pmc;
+ struct perfmon_td *td;
+ struct perfmon_pmu *pmu;
+ const struct perfmon_pmc *pmc;
+ unsigned int pmc_index;
+ unsigned long flags;
+
+ pmu = perfmon_get_pmu();
+
+ thread_ref(thread);
+ event->thread = thread;
+
+ pmc_index = perfmon_event_pmc_index(event);
+ pmc = perfmon_pmu_get_pmc(pmu, pmc_index);
+ td = thread_get_perfmon_td(thread);
+ td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+ spinlock_lock_intr_save(&td->lock, &flags);
+
+ if (perfmon_td_pmc_used(td_pmc)) {
+ perfmon_td_pmc_ref(td_pmc);
+ } else {
+ perfmon_td_pmc_use(td_pmc, perfmon_pmc_id(pmc),
+ perfmon_pmc_raw_event_id(pmc));
+ }
+
+ spinlock_unlock_intr_restore(&td->lock, flags);
+
+ xcall_call(perfmon_event_load_thread_remote, event, thread_cpu(thread));
+}
+
+static void
+perfmon_event_unload_cpu_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ event = arg;
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmc_index = perfmon_event_pmc_index(event);
+ cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+ perfmon_event_update(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_unload_cpu(struct perfmon_event *event)
+{
+ xcall_call(perfmon_event_unload_cpu_remote, event, event->cpu);
+ perfmon_event_clear_type_cpu(event);
+}
+
+static void
+perfmon_event_unload_thread_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_td_pmc *td_pmc;
+ struct perfmon_td *td;
+ unsigned int pmc_index;
+ uint64_t td_pmc_value;
+
+ event = arg;
+ pmc_index = perfmon_event_pmc_index(event);
+ td = thread_get_perfmon_td(event->thread);
+ td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+ spinlock_lock(&td->lock);
+
+ if ((thread_self() == event->thread) && perfmon_td_pmc_loaded(td_pmc)) {
+ if (perfmon_td_pmc_used(td_pmc)) {
+ perfmon_td_update_pmc(td, td_pmc);
+ } else {
+ perfmon_td_unload_pmc(td, td_pmc);
+ }
+ }
+
+ td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+ spinlock_unlock(&td->lock);
+
+ perfmon_event_update(event, td_pmc_value);
+}
+
+static void
+perfmon_event_unload_thread(struct perfmon_event *event)
+{
+ struct perfmon_td_pmc *td_pmc;
+ struct perfmon_td *td;
+ unsigned int pmc_index;
+ unsigned long flags;
+
+ pmc_index = perfmon_event_pmc_index(event);
+ td = thread_get_perfmon_td(event->thread);
+ td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+ spinlock_lock_intr_save(&td->lock, &flags);
+ perfmon_td_pmc_unref(td_pmc);
+ spinlock_unlock_intr_restore(&td->lock, flags);
+
+ xcall_call(perfmon_event_unload_thread_remote, event,
+ thread_cpu(event->thread));
+
+ thread_unref(event->thread);
+ event->thread = NULL;
+}
+
+static void
+perfmon_event_sync_cpu_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ unsigned int pmc_index;
+ uint64_t cpu_pmc_value;
+
+ event = arg;
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ pmc_index = perfmon_event_pmc_index(event);
+ cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index);
+ perfmon_event_update(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_sync_cpu(struct perfmon_event *event)
+{
+ xcall_call(perfmon_event_sync_cpu_remote, event, event->cpu);
+}
+
+static void
+perfmon_event_sync_thread_remote(void *arg)
+{
+ struct perfmon_event *event;
+ struct perfmon_td_pmc *td_pmc;
+ struct perfmon_td *td;
+ unsigned int pmc_index;
+ uint64_t td_pmc_value;
+
+ event = arg;
+ pmc_index = perfmon_event_pmc_index(event);
+ td = thread_get_perfmon_td(event->thread);
+ td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+ spinlock_lock(&td->lock);
+
+ if (thread_self() == event->thread) {
+ perfmon_td_update_pmc(td, td_pmc);
+ }
+
+ td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+ spinlock_unlock(&td->lock);
+
+ perfmon_event_update(event, td_pmc_value);
+}
+
+static void
+perfmon_event_sync_thread(struct perfmon_event *event)
+{
+ xcall_call(perfmon_event_sync_thread_remote, event,
+ thread_cpu(event->thread));
+}
+
+static int
+perfmon_event_attach_pmu(struct perfmon_event *event)
+{
+ unsigned int raw_event_id = 0;
+ struct perfmon_pmu *pmu;
+ struct perfmon_pmc *pmc;
+ int error;
+
+ pmu = perfmon_get_pmu();
+
+ if (!(event->flags & PERFMON_EF_RAW)) {
+ error = perfmon_pmu_translate(pmu, &raw_event_id, event->id);
+
+ if (error) {
+ return error;
+ }
+ }
+
+ error = perfmon_pmu_take_pmc(pmu, &pmc, raw_event_id);
+
+ if (error) {
+ return error;
+ }
+
+ event->pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc);
+ event->flags |= PERFMON_EF_ATTACHED;
+ event->value = 0;
+ return 0;
+}
+
+static void
+perfmon_event_detach_pmu(struct perfmon_event *event)
+{
+ struct perfmon_pmu *pmu;
+ struct perfmon_pmc *pmc;
+
+ pmu = perfmon_get_pmu();
+ pmc = perfmon_pmu_get_pmc(pmu, perfmon_event_pmc_index(event));
+ perfmon_pmu_put_pmc(pmu, pmc);
+ event->flags &= ~PERFMON_EF_ATTACHED;
+}
+
+int
+perfmon_event_attach(struct perfmon_event *event, struct thread *thread)
+{
+ int error;
+
+ spinlock_lock(&event->lock);
+
+ if (perfmon_event_attached(event)) {
+ error = EINVAL;
+ goto error;
+ }
+
+ error = perfmon_event_attach_pmu(event);
+
+ if (error) {
+ goto error;
+ }
+
+ perfmon_event_load_thread(event, thread);
+
+ spinlock_unlock(&event->lock);
+
+ return 0;
+
+error:
+ spinlock_unlock(&event->lock);
+
+ return error;
+}
+
+int
+perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu)
+{
+ int error;
+
+ if (cpu >= cpu_count()) {
+ return EINVAL;
+ }
+
+ spinlock_lock(&event->lock);
+
+ if (perfmon_event_attached(event)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = perfmon_event_attach_pmu(event);
+
+ if (error) {
+ goto out;
+ }
+
+ perfmon_event_load_cpu(event, cpu);
+ error = 0;
+
+out:
+ spinlock_unlock(&event->lock);
+
+ return error;
+}
+
+int
+perfmon_event_detach(struct perfmon_event *event)
+{
+ int error;
+
+ spinlock_lock(&event->lock);
+
+ if (!perfmon_event_attached(event)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (perfmon_event_type_cpu(event)) {
+ perfmon_event_unload_cpu(event);
+ } else {
+ perfmon_event_unload_thread(event);
+ }
+
+ perfmon_event_detach_pmu(event);
+ error = 0;
+
+out:
+ spinlock_unlock(&event->lock);
+
+ return error;
+}
+
+uint64_t
+perfmon_event_read(struct perfmon_event *event)
+{
+ uint64_t value;
+
+ spinlock_lock(&event->lock);
+
+ if (perfmon_event_attached(event)) {
+ if (perfmon_event_type_cpu(event)) {
+ perfmon_event_sync_cpu(event);
+ } else {
+ perfmon_event_sync_thread(event);
+ }
+ }
+
+ value = event->value;
+
+ spinlock_unlock(&event->lock);
+
+ return value;
+}
+
+static uint64_t __init
+perfmon_compute_poll_interval(uint64_t pmc_width)
+{
+ uint64_t cycles, time;
+
+ if (pmc_width == 64) {
+ cycles = (uint64_t)-1;
+ } else {
+ cycles = (uint64_t)1 << pmc_width;
+ }
+
+ /*
+ * Assume an unrealistically high upper bound on the number of
+ * events per cycle to otbain a comfortable margin of safety.
+ */
+ cycles /= 100;
+ time = cycles / (cpu_get_freq() / 1000);
+
+ if (time < PERFMON_MIN_POLL_INTERVAL) {
+ log_warning("perfmon: invalid poll interval %llu, forced to %llu",
+ (unsigned long long)time,
+ (unsigned long long)PERFMON_MIN_POLL_INTERVAL);
+ time = PERFMON_MIN_POLL_INTERVAL;
+ }
+
+ return clock_ticks_from_ms(time);
+}
+
+void __init
+perfmon_register(struct perfmon_dev *dev)
+{
+ const struct perfmon_dev_ops *ops;
+
+ ops = dev->ops;
+ assert(ops->translate && ops->alloc && ops->free
+ && ops->start && ops->stop && ops->read);
+ assert(dev->pmc_width <= 64);
+
+ if ((dev->ops->handle_overflow_intr == NULL) && (dev->poll_interval == 0)) {
+ dev->poll_interval = perfmon_compute_poll_interval(dev->pmc_width);
+ }
+
+ perfmon_pmu_set_dev(perfmon_get_pmu(), dev);
+}
+
+void
+perfmon_overflow_intr(void)
+{
+ perfmon_pmu_handle_overflow_intr(perfmon_get_pmu());
+}
+
+void
+perfmon_report_overflow(unsigned int pmc_index)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cpu_pmu = perfmon_get_local_cpu_pmu();
+ cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+ perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+}
+
+static int __init
+perfmon_bootstrap(void)
+{
+ perfmon_pmu_init(perfmon_get_pmu());
+ return 0;
+}
+
+INIT_OP_DEFINE(perfmon_bootstrap,
+ INIT_OP_DEP(log_setup, true),
+ INIT_OP_DEP(spinlock_setup, true));
+
+static int __init
+perfmon_setup(void)
+{
+ struct perfmon_dev *dev;
+
+ dev = perfmon_pmu_get_dev(perfmon_get_pmu());
+
+ if (!dev) {
+ return ENODEV;
+ }
+
+ for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) {
+ perfmon_cpu_pmu_init(perfmon_get_cpu_pmu(cpu), cpu, dev);
+ }
+
+ return 0;
+}
+
+INIT_OP_DEFINE(perfmon_setup,
+ INIT_OP_DEP(boot_setup_pmu, true),
+ INIT_OP_DEP(cpu_mp_probe, true),
+ INIT_OP_DEP(cpu_setup, true),
+ INIT_OP_DEP(percpu_setup, true),
+ INIT_OP_DEP(perfmon_bootstrap, true),
+ INIT_OP_DEP(spinlock_setup, true),
+ INIT_OP_DEP(syscnt_setup, true));
diff --git a/kern/perfmon.h b/kern/perfmon.h
new file mode 100644
index 00000000..0c17752c
--- /dev/null
+++ b/kern/perfmon.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Performance monitoring based on hardware performance counters.
+ *
+ * The hardware layer is represented by a performance monitoring unit (PMU),
+ * which provides performance monitoring counters (PMCs).
+ */
+
+#ifndef KERN_PERFMON_H
+#define KERN_PERFMON_H
+
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/perfmon_types.h>
+#include <kern/thread.h>
+
+/*
+ * IDs of generic performance monitoring events.
+ */
+#define PERFMON_EV_CYCLE 0
+#define PERFMON_EV_REF_CYCLE 1
+#define PERFMON_EV_INSTRUCTION 2
+#define PERFMON_EV_CACHE_REF 3
+#define PERFMON_EV_CACHE_MISS 4
+#define PERFMON_EV_BRANCH 5
+#define PERFMON_EV_BRANCH_MISS 6
+#define PERFMON_NR_GENERIC_EVENTS 7
+
+/*
+ * Event flags.
+ */
+#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */
+#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */
+#define PERFMON_EF_RAW 0x4 /* Raw event ID, generic if unset */
+
+/*
+ * Performance monitoring operations.
+ *
+ * This is a public structure.
+ *
+ * All operations are either global but serialized by the caller, or
+ * processor-local and called with interrupts and preemption disabled.
+ *
+ * If the hardware doesn't efficiently support overflow interrupts, the
+ * handler must be set to NULL, making the perfmon module perdiocally
+ * check the raw value of the hardware counters.
+ */
+struct perfmon_dev_ops {
+ /*
+ * Convert a generic event ID into a raw event ID.
+ *
+ * Global operation.
+ */
+ int (*translate)(unsigned int *raw_event_idp, unsigned int event_id);
+
+ /*
+ * Allocate a performance monitoring counter globally for the given
+ * raw event ID, and return the counter ID through the given pointer.
+ * The range of IDs must start from 0 and increase contiguously.
+ *
+ * The PMC index is to be used by the driver when reporting overflows,
+ * if a custom overflow interrupt handler.
+ *
+ * Global operation.
+ */
+ int (*alloc)(unsigned int *pmc_idp, unsigned int pmc_index,
+ unsigned int raw_event_id);
+
+ /*
+ * Free an allocated performance monitoring counter.
+ *
+ * Global operation.
+ */
+ void (*free)(unsigned int pmc_id);
+
+ /*
+ * Start a performance monitoring counter for the given raw event ID.
+ *
+ * Processor-local operation.
+ */
+ void (*start)(unsigned int pmc_id, unsigned int raw_event_id);
+
+ /*
+ * Stop a performance monitoring counter.
+ *
+ * Processor-local operation.
+ */
+ void (*stop)(unsigned int pmc_id);
+
+ /*
+ * Read the value of a performance monitoring counter.
+ *
+ * Processor-local operation.
+ */
+ uint64_t (*read)(unsigned int pmc_id);
+
+ /*
+ * Custom overflow interrupt handler.
+ *
+ * Processor-local operation.
+ */
+ void (*handle_overflow_intr)(void);
+};
+
+/*
+ * Performance monitoring device.
+ *
+ * This is a public structure.
+ *
+ * The PMC width is expressed in bits.
+ *
+ * If the driver doesn't provide an overflow interrupt handler, it may set
+ * the poll interval, in ticks, to a duration that safely allows the detection
+ * of a single overflow. A value of 0 lets the perfmon module compute a poll
+ * interval itself.
+ */
+struct perfmon_dev {
+ const struct perfmon_dev_ops *ops;
+ unsigned int pmc_width;
+ uint64_t poll_interval;
+};
+
+/*
+ * Performance monitoring thread data.
+ */
+struct perfmon_td;
+
+/*
+ * Performance monitoring event.
+ *
+ * An event describes a single, well-defined hardware condition and tracks
+ * its occurrences over a period of time.
+ */
+struct perfmon_event;
+
+/*
+ * Initialize thread-specific data.
+ */
+void perfmon_td_init(struct perfmon_td *td);
+
+/*
+ * Load/unload events attached to a thread on the current processor.
+ *
+ * These functions should only be used by the scheduler on a context switch.
+ * Interrupts and preemption must be disabled when calling these functions.
+ */
+void perfmon_td_load(struct perfmon_td *td);
+void perfmon_td_unload(struct perfmon_td *td);
+
+/*
+ * Initialize an event.
+ */
+int perfmon_event_init(struct perfmon_event *event, unsigned int id,
+ unsigned int flags);
+
+/*
+ * Attach/detach an event to/from a thread or a processor.
+ *
+ * Attaching an event allocates hardware resources and enables monitoring.
+ * The number of occurrences for the given event is reset.
+ *
+ * An event can only be attached to one thread or processor at a time.
+ */
+int perfmon_event_attach(struct perfmon_event *event, struct thread *thread);
+int perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu);
+int perfmon_event_detach(struct perfmon_event *event);
+
+/*
+ * Obtain the number of occurrences of an event.
+ */
+uint64_t perfmon_event_read(struct perfmon_event *event);
+
+/*
+ * Register a PMU device.
+ *
+ * Currently, there can only be a single system-wide PMU device, which
+ * assumes the driver is the same for all processors.
+ */
+void perfmon_register(struct perfmon_dev *dev);
+
+/*
+ * Handle an overflow interrupt.
+ *
+ * This function must be called in interrupt context.
+ */
+void perfmon_overflow_intr(void);
+
+/*
+ * Report a PMC overflow.
+ *
+ * This function is intended to be used by PMU drivers using a custom
+ * overflow interrupt handler.
+ *
+ * This function must be called in interrupt context.
+ */
+void perfmon_report_overflow(unsigned int pmc_index);
+
+/*
+ * This init operation provides :
+ * - PMU device registration
+ */
+INIT_OP_DECLARE(perfmon_bootstrap);
+
+#endif /* KERN_PERFMON_H */
diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h
new file mode 100644
index 00000000..c316312a
--- /dev/null
+++ b/kern/perfmon_types.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definitions used to avoid inclusion circular dependencies.
+ */
+
+#ifndef KERN_PERFMON_TYPES_H
+#define KERN_PERFMON_TYPES_H
+
+#ifdef CONFIG_PERFMON
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/spinlock_types.h>
+
+/*
+ * Maximum number of supported hardware counters.
+ */
+#define PERFMON_MAX_PMCS CONFIG_PERFMON_MAX_PMCS
+
+/*
+ * Performance monitoring event.
+ *
+ * An event may be unattached, attached to a thread, or attached to a CPU.
+ * When it is loaded, the current value of the underlying PMC is saved.
+ * When it is updated, the delta between the current and saved PMC values
+ * is added to the event value.
+ */
+struct perfmon_event {
+ struct spinlock lock;
+ unsigned int flags;
+ unsigned int id;
+ uint64_t pmc_value;
+ uint64_t value;
+
+ union {
+ struct thread *thread;
+ unsigned int cpu;
+ };
+
+ unsigned int pmc_index;
+};
+
+/*
+ * Per-thread performance monitoring counter.
+ *
+ * Per-thread PMCs are indexed the same way as global PMCs.
+ *
+ * A per-thread PMC is referenced when an event is attached to a thread.
+ * The PMC may only be loaded if the thread is running on a processor,
+ * as a result of an event being attached to the thread, or the thread
+ * being dispatched by the scheduler. Note that this allows a transient
+ * state to be seen where a per-thread PMC is both unused and loaded.
+ * This happens after detaching an event from a thread, resulting in
+ * the underlying per-thread PMC to become unused, but if the thread
+ * is running concurrently, the counter is still loaded. The implementation
+ * resolves the situation by unloading the counter, which is either
+ * done by an explicit unload cross-call, or when the scheduler preempts
+ * the thread and unloads its thread data.
+ *
+ * When a per-thread PMC is loaded, the current value of the underlying
+ * PMC is saved, and when it's updated, the delta between the current
+ * and saved PMC values is added to the per-thread PMC value.
+ */
+struct perfmon_td_pmc {
+ unsigned int nr_refs;
+ bool loaded;
+ unsigned int pmc_id;
+ unsigned int raw_event_id;
+ uint64_t cpu_pmc_value;
+ uint64_t value;
+};
+
+/*
+ * Per-thread performance monitoring data.
+ *
+ * Interrupts must be disabled when locking thread data.
+ */
+struct perfmon_td {
+ struct spinlock lock;
+ struct perfmon_td_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+#endif /* CONFIG_PERFMON */
+
+#endif /* KERN_PERFMON_TYPES_H */
diff --git a/kern/thread.c b/kern/thread.c
index 3b6ed3a0..a8f58b39 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -100,6 +100,7 @@
#include <kern/macros.h>
#include <kern/panic.h>
#include <kern/percpu.h>
+#include <kern/perfmon.h>
#include <kern/rcu.h>
#include <kern/shell.h>
#include <kern/sleepq.h>
@@ -605,9 +606,23 @@ thread_runq_wakeup_balancer(struct thread_runq *runq)
}
static void
-thread_runq_schedule_prepare(struct thread *thread)
+thread_runq_schedule_load(struct thread *thread)
{
pmap_load(thread->task->map->pmap);
+
+#ifdef CONFIG_PERFMON
+ perfmon_td_load(thread_get_perfmon_td(thread));
+#endif
+}
+
+static void
+thread_runq_schedule_unload(struct thread *thread)
+{
+#ifdef CONFIG_PERFMON
+ perfmon_td_unload(thread_get_perfmon_td(thread));
+#else
+ (void)thread;
+#endif
}
static struct thread_runq *
@@ -639,6 +654,8 @@ thread_runq_schedule(struct thread_runq *runq)
assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL);
if (likely(prev != next)) {
+ thread_runq_schedule_unload(prev);
+
rcu_report_context_switch(thread_rcu_reader(prev));
spinlock_transfer_owner(&runq->lock, next);
@@ -660,10 +677,10 @@ thread_runq_schedule(struct thread_runq *runq)
* - The current thread may have been migrated to another processor.
*/
barrier();
+ thread_runq_schedule_load(prev);
+
next = NULL;
runq = thread_runq_local();
-
- thread_runq_schedule_prepare(prev);
} else {
next = NULL;
}
@@ -1750,7 +1767,7 @@ thread_main(void (*fn)(void *), void *arg)
assert(!thread_preempt_enabled());
thread = thread_self();
- thread_runq_schedule_prepare(thread);
+ thread_runq_schedule_load(thread);
spinlock_unlock(&thread_runq_local()->lock);
cpu_intr_enable();
@@ -1843,6 +1860,10 @@ thread_init(struct thread *thread, void *stack,
thread->stack = stack;
strlcpy(thread->name, attr->name, sizeof(thread->name));
+#ifdef CONFIG_PERFMON
+ perfmon_td_init(thread_get_perfmon_td(thread));
+#endif
+
if (attr->flags & THREAD_ATTR_DETACHED) {
thread->flags |= THREAD_DETACHED;
}
@@ -2310,6 +2331,13 @@ thread_setup(void)
#define THREAD_STACK_GUARD_INIT_OP_DEPS
#endif /* CONFIG_THREAD_STACK_GUARD */
+#ifdef CONFIG_PERFMON
+#define THREAD_PERFMON_INIT_OP_DEPS \
+ INIT_OP_DEP(perfmon_bootstrap, true),
+#else /* CONFIG_PERFMON */
+#define THREAD_PERFMON_INIT_OP_DEPS
+#endif /* CONFIG_PERFMON */
+
INIT_OP_DEFINE(thread_setup,
INIT_OP_DEP(cpumap_setup, true),
INIT_OP_DEP(kmem_setup, true),
@@ -2319,6 +2347,7 @@ INIT_OP_DEFINE(thread_setup,
INIT_OP_DEP(thread_bootstrap, true),
INIT_OP_DEP(turnstile_setup, true),
THREAD_STACK_GUARD_INIT_OP_DEPS
+ THREAD_PERFMON_INIT_OP_DEPS
);
void __init
diff --git a/kern/thread.h b/kern/thread.h
index 38ee29b3..5b5729ce 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -712,6 +712,14 @@ thread_get_specific(unsigned int key)
return thread_tsd_get(thread_self(), key);
}
+#ifdef CONFIG_PERFMON
+static inline struct perfmon_td *
+thread_get_perfmon_td(struct thread *thread)
+{
+ return &thread->perfmon_td;
+}
+#endif /* CONFIG_PERFMON */
+
/*
* Return the last CPU on which the thread has been scheduled.
*
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 95af1d18..9c9a705b 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -24,6 +24,7 @@
#include <kern/atomic.h>
#include <kern/cpumap.h>
#include <kern/list_types.h>
+#include <kern/perfmon_types.h>
#include <kern/rcu_types.h>
#include <kern/spinlock_types.h>
#include <kern/turnstile_types.h>
@@ -175,6 +176,10 @@ struct thread {
struct list task_node; /* (T) */
void *stack; /* (-) */
char name[THREAD_NAME_SIZE]; /* ( ) */
+
+#ifdef CONFIG_PERFMON
+ struct perfmon_td perfmon_td; /* ( ) */
+#endif
};
#define THREAD_ATTR_DETACHED 0x1
diff --git a/test/Kconfig b/test/Kconfig
index 3f1c3b69..9f0faf44 100644
--- a/test/Kconfig
+++ b/test/Kconfig
@@ -34,6 +34,18 @@ config TEST_MODULE_MUTEX
config TEST_MODULE_MUTEX_PI
bool "mutex_pi"
+config TEST_MODULE_PERFMON_CPU
+ bool "perfmon_cpu"
+ depends on PERFMON
+
+config TEST_MODULE_PERFMON_THREAD
+ bool "perfmon_thread"
+ depends on PERFMON
+
+config TEST_MODULE_PERFMON_TORTURE
+ bool "perfmon_torture"
+ depends on PERFMON
+
config TEST_MODULE_PMAP_UPDATE_MP
bool "pmap_update_mp"
diff --git a/test/Makefile b/test/Makefile
index cdce6130..76edbf0e 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -2,6 +2,9 @@ x15_SOURCES-$(CONFIG_TEST_MODULE_ATOMIC) += test/test_atomic.c
x15_SOURCES-$(CONFIG_TEST_MODULE_BULLETIN) += test/test_bulletin.c
x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX) += test/test_mutex.c
x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI) += test/test_mutex_pi.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c
x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP) += test/test_pmap_update_mp.c
x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER) += test/test_rcu_defer.c
x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES) += test/test_sref_dirty_zeroes.c
diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c
new file mode 100644
index 00000000..75f69d3f
--- /dev/null
+++ b/test/test_perfmon_cpu.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test checks the behavior of performance monitoring on a CPU.
+ * It creates a group with two events, cycle and instruction, and attaches
+ * that group to CPU1, where a thread is bound and runs a tight loop to
+ * make sure the target CPU is never idle. After some time, the measurement
+ * stops and values are reported.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <machine/cpu.h>
+#include <test/test.h>
+
+#define TEST_WAIT_DELAY_MS 1000
+
+/*
+ * Using another CPU than the BSP as the monitored CPU checks that PMUs are
+ * correctly initialized on APs.
+ */
+#define TEST_CONTROL_CPU 0
+#define TEST_MONITORED_CPU (TEST_CONTROL_CPU + 1)
+#define TEST_MIN_CPUS (TEST_MONITORED_CPU + 1)
+
+#define TEST_EVENT_NAME_MAX_SIZE 32
+
+struct test_event {
+ struct list node;
+ struct perfmon_event pm_event;
+ char name[TEST_EVENT_NAME_MAX_SIZE];
+};
+
+struct test_group {
+ struct list events;
+};
+
+static unsigned int test_run_stop;
+
+static void
+test_wait(void)
+{
+ thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+}
+
+static void
+test_event_init(struct test_event *event, unsigned int id, const char *name)
+{
+ int error;
+
+ error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN);
+ error_check(error, "perfmon_event_init");
+ strlcpy(event->name, name, sizeof(event->name));
+}
+
+static void
+test_event_report(struct test_event *event)
+{
+ uint64_t count;
+ int error;
+
+ count = perfmon_event_read(&event->pm_event);
+ error = (count == 0) ? EINVAL : 0;
+ error_check(error, __func__);
+ log_info("test: %s: %llu", event->name, (unsigned long long)count);
+}
+
+static void
+test_event_attach_cpu(struct test_event *event, unsigned int cpu)
+{
+ int error;
+
+ error = perfmon_event_attach_cpu(&event->pm_event, cpu);
+ error_check(error, "perfmon_event_attach_cpu");
+}
+
+static void
+test_event_detach(struct test_event *event)
+{
+ int error;
+
+ error = perfmon_event_detach(&event->pm_event);
+ error_check(error, "perfmon_event_detach");
+}
+
+static void
+test_group_init(struct test_group *group)
+{
+ list_init(&group->events);
+}
+
+static void
+test_group_add(struct test_group *group, struct test_event *event)
+{
+ list_insert_tail(&group->events, &event->node);
+}
+
+static void
+test_group_attach_cpu(struct test_group *group, unsigned int cpu)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_attach_cpu(event, cpu);
+ }
+}
+
+static void
+test_group_detach(struct test_group *group)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_detach(event);
+ }
+}
+
+static void
+test_group_report(struct test_group *group)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_report(event);
+ }
+}
+
+static void
+test_run(void *arg)
+{
+ unsigned int stop;
+
+ (void)arg;
+
+ do {
+ stop = atomic_load(&test_run_stop, ATOMIC_RELAXED);
+ } while (!stop);
+}
+
+static void
+test_control(void *arg)
+{
+ struct test_event cycle, instruction;
+ struct test_group group;
+ struct thread *thread;
+
+ thread = arg;
+
+ test_event_init(&cycle, PERFMON_EV_CYCLE, "cycle");
+ test_event_init(&instruction, PERFMON_EV_INSTRUCTION, "instruction");
+ test_group_init(&group);
+ test_group_add(&group, &cycle);
+ test_group_add(&group, &instruction);
+ test_group_attach_cpu(&group, TEST_MONITORED_CPU);
+ test_wait();
+ test_group_report(&group);
+ test_wait();
+ test_group_detach(&group);
+ test_group_report(&group);
+
+ atomic_store(&test_run_stop, 1, ATOMIC_RELAXED);
+ thread_join(thread);
+ log_info("test: done");
+}
+
+void
+test_setup(void)
+{
+ struct thread *thread;
+ struct thread_attr attr;
+ struct cpumap *cpumap;
+ int error;
+
+ if (cpu_count() < TEST_MIN_CPUS) {
+ panic("test: %u processors required", TEST_MIN_CPUS);
+ }
+
+ error = cpumap_create(&cpumap);
+ error_check(error, "cpumap_create");
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run");
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, TEST_MONITORED_CPU);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_run, NULL);
+ error_check(error, "thread_create");
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+ thread_attr_set_detached(&attr);
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, TEST_CONTROL_CPU);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(NULL, &attr, test_control, thread);
+ error_check(error, "thread_create");
+
+ cpumap_destroy(cpumap);
+}
diff --git a/test/test_perfmon_thread.c b/test/test_perfmon_thread.c
new file mode 100644
index 00000000..0213777b
--- /dev/null
+++ b/test/test_perfmon_thread.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test checks the behavior of performance monitoring on a thread.
+ * It creates a group with a single event, cycle, and attaches that group to
+ * a runner thread. Two checks are then performed :
+ * - the first makes sure the number of cycles changes when the runner
+ * thread is running
+ * - the second makes sure the number of cycles doesn't change when the
+ * runner thread is sleeping
+ *
+ * Another group with a cycle event is created and attached to CPU0 to make
+ * sure that a shared event is correctly handled, and the runner thread is
+ * bound to CPU0 to force sharing. A third thread is created to fill CPU0
+ * time with cycles so that the cycle counter of the CPU-attached group
+ * changes while the runner thread is sleeping.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/condition.h>
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+#define TEST_WAIT_DELAY_MS 1000
+
+#define TEST_EVENT_NAME_MAX_SIZE 32
+
+struct test_event {
+ struct list node;
+ struct perfmon_event pm_event;
+ uint64_t last_value;
+ char name[TEST_EVENT_NAME_MAX_SIZE];
+};
+
+struct test_group {
+ struct list events;
+};
+
+enum test_state {
+ TEST_STATE_RUNNING,
+ TEST_STATE_SUSPENDED,
+ TEST_STATE_TERMINATED,
+};
+
+static struct condition test_condition;
+static struct mutex test_mutex;
+static enum test_state test_state;
+
+static void
+test_wait(void)
+{
+ log_info("test: controller waiting");
+ thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+ log_info("test: controller resuming");
+}
+
+static void
+test_event_init(struct test_event *event, unsigned int id, const char *name)
+{
+ int error;
+
+ error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN);
+ error_check(error, "perfmon_event_init");
+ strlcpy(event->name, name, sizeof(event->name));
+}
+
+static void
+test_event_attach(struct test_event *event, struct thread *thread)
+{
+ int error;
+
+ error = perfmon_event_attach(&event->pm_event, thread);
+ error_check(error, "perfmon_event_attach");
+}
+
+static void
+test_event_attach_cpu(struct test_event *event, unsigned int cpu)
+{
+ int error;
+
+ error = perfmon_event_attach_cpu(&event->pm_event, cpu);
+ error_check(error, "perfmon_event_attach_cpu");
+}
+
+static void
+test_event_detach(struct test_event *event)
+{
+ int error;
+
+ error = perfmon_event_detach(&event->pm_event);
+ error_check(error, "perfmon_event_detach");
+}
+
+static uint64_t
+test_event_read(struct test_event *event)
+{
+ uint64_t value;
+
+ value = perfmon_event_read(&event->pm_event);
+ log_info("test: %s: %llu", event->name, (unsigned long long)value);
+ return value;
+}
+
+static void
+test_event_save(struct test_event *event)
+{
+ event->last_value = test_event_read(event);
+}
+
+static void
+test_event_check(struct test_event *event, bool change_expected)
+{
+ uint64_t value;
+ bool changed;
+
+ value = test_event_read(event);
+ changed = (value != event->last_value);
+
+ if (changed != change_expected) {
+ panic("test: invalid value");
+ }
+
+ event->last_value = value;
+}
+
+static void
+test_group_init(struct test_group *group)
+{
+ list_init(&group->events);
+}
+
+static void
+test_group_add(struct test_group *group, struct test_event *event)
+{
+ list_insert_tail(&group->events, &event->node);
+}
+
+static void
+test_group_attach(struct test_group *group, struct thread *thread)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_attach(event, thread);
+ }
+}
+
+static void
+test_group_attach_cpu(struct test_group *group, unsigned int cpu)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_attach_cpu(event, cpu);
+ }
+}
+
+static void
+test_group_detach(struct test_group *group)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_detach(event);
+ }
+}
+
+static void
+test_group_save(struct test_group *group)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_save(event);
+ }
+}
+
+static void
+test_group_check(struct test_group *group, bool change_expected)
+{
+ struct test_event *event;
+
+ list_for_each_entry(&group->events, event, node) {
+ test_event_check(event, change_expected);
+ }
+}
+
+static void
+test_run(void *arg)
+{
+ bool report;
+
+ (void)arg;
+
+ report = true;
+
+ mutex_lock(&test_mutex);
+
+ while (test_state != TEST_STATE_TERMINATED) {
+ if (test_state == TEST_STATE_SUSPENDED) {
+ log_info("test: runner suspended");
+ report = true;
+ condition_wait(&test_condition, &test_mutex);
+ } else {
+ mutex_unlock(&test_mutex);
+
+ if (report) {
+ log_info("test: runner running");
+ report = false;
+ }
+
+ mutex_lock(&test_mutex);
+ }
+ }
+
+ mutex_unlock(&test_mutex);
+}
+
+static void
+test_fill(void *arg)
+{
+ enum test_state state;
+
+ (void)arg;
+
+ do {
+ state = atomic_load(&test_state, ATOMIC_RELAXED);
+ } while (state != TEST_STATE_TERMINATED);
+}
+
+static void
+test_wait_state(const struct thread *thread, unsigned short state)
+{
+ for (;;) {
+ if (thread_state(thread) == state) {
+ break;
+ }
+
+ thread_delay(1, false);
+ }
+}
+
+static void
+test_resume(struct thread *thread)
+{
+ test_wait_state(thread, THREAD_SLEEPING);
+
+ mutex_lock(&test_mutex);
+ assert(test_state == TEST_STATE_SUSPENDED);
+ atomic_store(&test_state, TEST_STATE_RUNNING, ATOMIC_RELAXED);
+ condition_signal(&test_condition);
+ mutex_unlock(&test_mutex);
+
+ test_wait_state(thread, THREAD_RUNNING);
+}
+
+static void
+test_suspend(struct thread *thread)
+{
+ test_wait_state(thread, THREAD_RUNNING);
+
+ mutex_lock(&test_mutex);
+ assert(test_state == TEST_STATE_RUNNING);
+ atomic_store(&test_state, TEST_STATE_SUSPENDED, ATOMIC_RELAXED);
+ mutex_unlock(&test_mutex);
+
+ test_wait_state(thread, THREAD_SLEEPING);
+}
+
+static void
+test_terminate(void)
+{
+ mutex_lock(&test_mutex);
+ test_state = TEST_STATE_TERMINATED;
+ condition_signal(&test_condition);
+ mutex_unlock(&test_mutex);
+}
+
+static void
+test_control(void *arg)
+{
+ struct test_event thread_cycle, cpu_cycle;
+ struct test_group thread_group, cpu_group;
+ struct thread *runner;
+
+ runner = arg;
+
+ test_event_init(&thread_cycle, PERFMON_EV_CYCLE, "thread_cycle");
+ test_group_init(&thread_group);
+ test_group_add(&thread_group, &thread_cycle);
+
+ test_event_init(&cpu_cycle, PERFMON_EV_CYCLE, "cpu_cycle");
+ test_group_init(&cpu_group);
+ test_group_add(&cpu_group, &cpu_cycle);
+
+ test_group_attach(&thread_group, runner);
+ test_group_attach_cpu(&cpu_group, 0);
+
+ test_group_save(&thread_group);
+ test_group_save(&cpu_group);
+ test_resume(runner);
+ test_wait();
+ test_suspend(runner);
+ test_group_check(&thread_group, true);
+ test_group_check(&cpu_group, true);
+ test_wait();
+ test_group_check(&thread_group, false);
+ test_group_check(&cpu_group, true);
+ test_terminate();
+
+ test_group_detach(&cpu_group);
+ test_group_detach(&thread_group);
+
+ thread_join(runner);
+ log_info("test: done");
+}
+
+void
+test_setup(void)
+{
+ struct thread_attr attr;
+ struct thread *runner;
+ struct cpumap *cpumap;
+ int error;
+
+ condition_init(&test_condition);
+ mutex_init(&test_mutex);
+ test_state = TEST_STATE_SUSPENDED;
+
+ error = cpumap_create(&cpumap);
+ error_check(error, "cpumap_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 0);
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run");
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&runner, &attr, test_run, NULL);
+ error_check(error, "thread_create");
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_fill");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_cpumap(&attr, cpumap);
+ thread_attr_set_priority(&attr, THREAD_SCHED_FS_PRIO_MIN);
+ error = thread_create(NULL, &attr, test_fill, NULL);
+ error_check(error, "thread_create");
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+ thread_attr_set_detached(&attr);
+ error = thread_create(NULL, &attr, test_control, runner);
+ error_check(error, "thread_create");
+
+ cpumap_destroy(cpumap);
+}
diff --git a/test/test_perfmon_torture.c b/test/test_perfmon_torture.c
new file mode 100644
index 00000000..171cb99c
--- /dev/null
+++ b/test/test_perfmon_torture.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This module is a stress test, expected to never terminate, of the
+ * performance monitoring module. It creates a control thread which
+ * maintains a couple of test threads running while toggling performance
+ * monitoring on them, attempting to produce many regular and corner
+ * cases. In particular, the thread pool is randomly resized by destroying
+ * and creating the underlying kernel threads.
+ *
+ * The control thread regularly prints some stats about the thread pool
+ * and the associated performance monitoring events to report that it's
+ * making progress.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/error.h>
+#include <kern/kmem.h>
+#include <kern/log.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+struct test_thread {
+ unsigned int id;
+ struct thread *thread;
+ struct perfmon_event event;
+ unsigned int must_stop;
+ bool monitored;
+ unsigned long long count;
+};
+
+struct test_controller {
+ struct test_thread **threads;
+ unsigned int nr_threads;
+ unsigned int monitoring_lid;
+ unsigned int state_lid;
+ unsigned long nr_current_events;
+ unsigned long nr_total_events;
+ unsigned long nr_current_threads;
+ unsigned long nr_total_threads;
+};
+
+#define TEST_WAIT_DELAY_MS 100
+#define TEST_LOOPS_PER_PRINT 20
+
+#define TEST_MONITORING_SEED 12345
+#define TEST_STATE_SEED 23456
+
+static void
+test_wait(void)
+{
+ thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+}
+
+static unsigned int
+test_rand(unsigned int x)
+{
+ /* Basic 32-bit xorshift PRNG */
+ x ^= x << 13;
+ x ^= x >> 17;
+ x ^= x << 5;
+ return x;
+}
+
+static bool
+test_thread_monitored(const struct test_thread *thread)
+{
+ return thread->monitored;
+}
+
+static void
+test_thread_start_monitoring(struct test_thread *thread)
+{
+ int error;
+
+ error = perfmon_event_attach(&thread->event, thread->thread);
+ error_check(error, __func__);
+ thread->monitored = true;
+}
+
+static void
+test_thread_stop_monitoring(struct test_thread *thread)
+{
+ int error;
+
+ thread->count += perfmon_event_read(&thread->event);
+ error = perfmon_event_detach(&thread->event);
+ error_check(error, __func__);
+ thread->monitored = false;
+}
+
+static void
+test_thread_report(const struct test_thread *thread)
+{
+ log_info("test: thread:%u count:%llu", thread->id, thread->count);
+}
+
+static void
+test_run(void *arg)
+{
+ struct test_thread *thread;
+
+ thread = arg;
+
+ for (;;) {
+ if (atomic_load(&thread->must_stop, ATOMIC_RELAXED)) {
+ break;
+ }
+ }
+}
+
+static bool
+test_thread_started(const struct test_thread *thread)
+{
+ return thread->thread;
+}
+
+static void
+test_thread_start(struct test_thread *thread)
+{
+ char name[THREAD_NAME_SIZE];
+ struct thread_attr attr;
+ int error;
+
+ assert(!thread->monitored);
+
+ if (test_thread_started(thread)) {
+ return;
+ }
+
+ thread->must_stop = 0;
+
+ snprintf(name, sizeof(name),
+ THREAD_KERNEL_PREFIX "test_run:%u", thread->id);
+ thread_attr_init(&attr, name);
+ error = thread_create(&thread->thread, &attr, test_run, thread);
+ error_check(error, "thread_create");
+}
+
+static void
+test_thread_request_stop(struct test_thread *thread)
+{
+ atomic_store(&thread->must_stop, 1, ATOMIC_RELAXED);
+}
+
+static void
+test_thread_join(struct test_thread *thread)
+{
+ assert(test_thread_started(thread));
+ assert(!test_thread_monitored(thread));
+
+ thread_join(thread->thread);
+ thread->thread = NULL;
+}
+
+static struct test_thread *
+test_thread_create(unsigned int id)
+{
+ struct test_thread *thread;
+
+ thread = kmem_alloc(sizeof(*thread));
+
+ if (thread == NULL) {
+ panic("thread allocation failed");
+ }
+
+ thread->id = id;
+ thread->thread = NULL;
+ thread->must_stop = 0;
+ thread->monitored = false;
+ thread->count = 0;
+
+ perfmon_event_init(&thread->event, PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+ test_thread_start(thread);
+
+ return thread;
+}
+
+static struct test_thread *
+test_controller_get(struct test_controller *controller, unsigned int id)
+{
+ assert(id < controller->nr_threads);
+ return controller->threads[id];
+}
+
+static struct test_thread *
+test_controller_get_by_lid(struct test_controller *controller, unsigned int lid)
+{
+ return test_controller_get(controller, lid % controller->nr_threads);
+}
+
+static void
+test_toggle_monitoring(struct test_controller *controller,
+ struct test_thread *thread)
+{
+ if (!test_thread_started(thread)) {
+ return;
+ }
+
+ if (thread->monitored) {
+ test_thread_stop_monitoring(thread);
+ controller->nr_current_events--;
+ } else {
+ test_thread_start_monitoring(thread);
+ controller->nr_total_events++;
+ controller->nr_current_events++;
+ }
+}
+
+static void
+test_toggle_state(struct test_controller *controller,
+ struct test_thread *thread)
+{
+ if (test_thread_started(thread)) {
+ /*
+ * Make the thread stop asynchronously with monitoring to test
+ * thread referencing.
+ */
+ test_thread_request_stop(thread);
+
+ if (test_thread_monitored(thread)) {
+ test_thread_stop_monitoring(thread);
+ controller->nr_current_events--;
+ }
+
+ test_thread_join(thread);
+ controller->nr_current_threads--;
+ } else {
+ test_thread_start(thread);
+ controller->nr_total_threads++;
+ controller->nr_current_threads++;
+ }
+}
+
+static void
+test_controller_report(struct test_controller *controller)
+{
+ log_info("test: events:%lu total:%lu threads:%lu total:%lu",
+ controller->nr_current_events, controller->nr_total_events,
+ controller->nr_current_threads, controller->nr_total_threads);
+
+ for (unsigned int i = 0; i < controller->nr_threads; i++) {
+ test_thread_report(test_controller_get(controller, i));
+ }
+}
+
+static void
+test_control(void *arg)
+{
+ struct test_controller *controller;
+ struct test_thread *thread;
+
+ controller = arg;
+
+ log_info("test: %u threads", controller->nr_threads);
+
+ for (unsigned long nr_loops = 1; /* no condition */; nr_loops++) {
+ controller->monitoring_lid = test_rand(controller->monitoring_lid);
+ thread = test_controller_get_by_lid(controller,
+ controller->monitoring_lid);
+ test_toggle_monitoring(controller, thread);
+
+ controller->state_lid = test_rand(controller->state_lid);
+ thread = test_controller_get_by_lid(controller,
+ controller->state_lid);
+ test_toggle_state(controller, thread);
+
+ test_wait();
+
+ if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) {
+ test_controller_report(controller);
+ }
+ }
+}
+
+static void
+test_controller_create(void)
+{
+ struct test_controller *controller;
+ struct thread_attr attr;
+ int error;
+
+ controller = kmem_alloc(sizeof(*controller));
+
+ if (!controller) {
+ panic("test: unable to create controller");
+ }
+
+ /*
+ * At least two threads are required by the monitoring/state toggling
+ * operations, otherwise they always apply to the same thread, severely
+ * restricting their usefulness.
+ */
+ controller->nr_threads = MAX(cpu_count() - 1, 2);
+ controller->threads = kmem_alloc(controller->nr_threads
+ * sizeof(*controller->threads));
+
+ if (!controller->threads) {
+ panic("test: unable to allocate thread array");
+ }
+
+ for (unsigned int i = 0; i < controller->nr_threads; i++) {
+ controller->threads[i] = test_thread_create(i);
+ }
+
+ controller->monitoring_lid = TEST_MONITORING_SEED;
+ controller->state_lid = TEST_STATE_SEED;
+ controller->nr_current_events = 0;
+ controller->nr_total_events = 0;
+ controller->nr_current_threads = controller->nr_threads;
+ controller->nr_total_threads = controller->nr_threads;
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+ thread_attr_set_detached(&attr);
+ error = thread_create(NULL, &attr, test_control, controller);
+ error_check(error, "thread_create");
+}
+
+void
+test_setup(void)
+{
+ test_controller_create();
+}