summaryrefslogtreecommitdiff
path: root/kern
diff options
context:
space:
mode:
Diffstat (limited to 'kern')
-rw-r--r--kern/Kconfig6
-rw-r--r--kern/Makefile2
-rw-r--r--kern/perfmon.c1388
-rw-r--r--kern/perfmon.h251
-rw-r--r--kern/perfmon_i.h42
-rw-r--r--kern/perfmon_types.h26
-rw-r--r--kern/thread.c54
-rw-r--r--kern/thread.h8
-rw-r--r--kern/thread_i.h5
9 files changed, 1778 insertions, 4 deletions
diff --git a/kern/Kconfig b/kern/Kconfig
index fced67c..5e0e5eb 100644
--- a/kern/Kconfig
+++ b/kern/Kconfig
@@ -94,6 +94,12 @@ config THREAD_STACK_GUARD
If unsure, disable.
+config PERFMON
+ bool "Performances monitoring counters"
+ ---help---
+ Enable the performance monitoring counters (perfmon API). While in use,
+ it might lengthen threads scheduling critical section a bit.
+
endmenu
menu "Debugging"
diff --git a/kern/Makefile b/kern/Makefile
index ab7d6b5..5b04fcb 100644
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c
x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c
x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c
+
+x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c
diff --git a/kern/perfmon.c b/kern/perfmon.c
new file mode 100644
index 0000000..c910069
--- /dev/null
+++ b/kern/perfmon.c
@@ -0,0 +1,1388 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2015 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The perfomance monitoring modules allows to manage performance monitoring as
+ * event groups. Each physical performance monitoring counter (pmc) may be
+ * referenced by perfmon events, which are theelves groupped in perfmon groups.
+ * Groups can then be attached to either threads or cpus into perfmon
+ * grouplists.
+ *
+ * In order to guarantee that thread relocation, is properly handled, events
+ * types are reseved on perfomance monitoring units (pmu) for all cpus for every
+ * event of a group when it is attached. Therefore a group attach may fail if no
+ * compatible pmc is available globally.
+ *
+ * Locking order : interrupts -> thread runq -> grouplist -> group
+ *
+ * TODO API to differenciate user and kernel events.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/error.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/macros.h>
+#include <kern/panic.h>
+#include <kern/percpu.h>
+#include <kern/perfmon.h>
+#include <kern/perfmon_i.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/xcall.h>
+#include <machine/cpu.h>
+#include <machine/pmu.h>
+
+/*
+ * Performance monitoring event.
+ *
+ * When a group is attached, each of its events is associated to a PMC,
+ * adding a reference in the process.
+ */
+struct perfmon_event {
+ uint64_t count;
+ uint64_t prev;
+ uint64_t overflow_id;
+ struct list node;
+ int flags;
+ unsigned int type;
+ unsigned int id;
+ unsigned int pmc_index;
+#ifdef CONFIG_PERFMON_TEST
+ uint64_t value;
+ bool set_value;
+#endif
+};
+
+#define PERFMON_INVALID_CPU ((unsigned int)-1)
+
+/*
+ * Group types.
+ */
+#define PERFMON_GT_UNKNOWN 0
+#define PERFMON_GT_CPU 1
+#define PERFMON_GT_THREAD 2
+
+/*
+ * Group States flags.
+ */
+#define PERFMON_GF_ATTACHED 1
+#define PERFMON_GF_ENABLED 2
+#define PERFMON_GF_LOADED 4
+#define PERFMON_GF_PENDING_DISABLE 8
+
+/*
+ * Group possible states are handled through the flags attribute.
+ * - A group can either be unattached or attached(to a thread or a cpu)
+ * - An attached group may be enabled or not.
+ * - An enabled group may be loaded or not e.g. have actual running
+ * performance counters on a CPU.
+ *
+ * When a group is attached, some ressources are reserved for it so it can be
+ * monitored at any time.
+ * When a group is enabled it will get loaded when needed:
+ * -Cpu groups stay loaded as long as they are enabled.
+ * -Thread groups are loaded when running.
+ * When a group is loaded its performance counters are currently enabled.
+ *
+ * The PENDING_DISABLE is here so that a remote thread can be disabled when it
+ * unscedule itself.
+ *
+ * Note that a non-attached group can only be referenced from the api. Since
+ * manipulating the same group from different threads as the same time is not
+ * supported, the code does not bother to lock it when the group is assumed
+ * un-attached.
+ *
+ * About thread-type group counters synchronization:
+ * - The groups are synchronized when their thread is unscheduled which
+ * means their counter value is updated and pending counter changes
+ * (like unloading) are performed.
+ * - Since all operations requires the group to be locked, it is mandatory
+ * to unlock the group before xcalling any remote operation in order to
+ * avoid a deadlock.
+ * - Any remote thread operation that gets executed after the thread got
+ * unscheduled will have nothing to do if the current thread is not the
+ * target one since the target thread have been unloaded inbetween.
+ */
+struct perfmon_group {
+ struct list node;
+ struct list events;
+ struct thread *thread;
+ struct spinlock lock;
+ unsigned int cpu;
+ short flags;
+ unsigned short type;
+};
+
+/*
+ * List of all groups attached to a single monitored object, either a CPU
+ * or a thread.
+ */
+struct perfmon_grouplist {
+ struct list groups;
+ struct spinlock lock;
+};
+
+/*
+ * Maximum number of supported hardware counters.
+ */
+#define PERFMON_MAX_PMCS 64
+
+/*
+ * Performance monitoring counter.
+ *
+ * When a PMC is valid, it maps a raw event to a hardware counter.
+ * A PMC is valid if and only if its reference count isn't zero.
+ */
+struct perfmon_pmc {
+ unsigned int nr_refs;
+ unsigned int raw_event_id;
+ unsigned int id;
+};
+
+/*
+ * Performance monitoring unit.
+ *
+ * There is a single system-wide logical PMU, used to globally allocate
+ * PMCs. Reserving a counter across the entire system ensures thread
+ * migration isn't hindered by performance monitoring.
+ */
+struct perfmon_pmu {
+ struct spinlock lock;
+ unsigned int nr_pmcs;
+ struct perfmon_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+/*
+ * Per-CPU performance monitoring counter.
+ *
+ * When a group is loaded on a processor, the per-CPU PMCs of its events
+ * get referenced. When a per-CPU PMC is referenced, its underlying hardware
+ * counter is active.
+ *
+ * Preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmc {
+ unsigned int nr_refs;
+ uint64_t prev_value;
+ uint64_t overflow_id;
+};
+
+/*
+ * Per-CPU performance monitoring unit.
+ *
+ * The per-CPU PMCs are indexed the same way as the global PMCs.
+ *
+ * Preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmu {
+ struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS];
+ struct timer of_timer;
+ unsigned int cpu_id;
+};
+
+static struct perfmon_pmu_driver pmu_driver __read_mostly;
+
+static struct perfmon_pmu perfmon_pmu;
+static unsigned int perfmon_pmc_id_to_index[PERFMON_MAX_PMCS];
+
+static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu;
+
+/*
+ * Cache of thread-specific group lists.
+ */
+static struct kmem_cache perfmon_grouplist_cache;
+
+/*
+ * CPU specific group lists.
+ */
+static struct perfmon_grouplist *perfmon_cpu_grouplist __percpu;
+
+static inline int
+perfmon_translate(unsigned int *raw_event_idp, unsigned int event_type,
+ unsigned int event_id)
+{
+ switch (event_type) {
+ case PERFMON_ET_RAW:
+ *raw_event_idp = event_id;
+ return 0;
+ case PERFMON_ET_GENERIC:
+ return pmu_driver.ops.translate(raw_event_idp, event_id);
+ default:
+ panic("perfmon: unsupported event type");
+ }
+}
+
+static int
+perfmon_pmc_alloc(struct perfmon_pmc **pmcp, unsigned int raw_event_id)
+{
+ struct perfmon_pmc *pmc;
+ size_t i;
+ int error;
+
+ if (perfmon_pmu.nr_pmcs == ARRAY_SIZE(perfmon_pmu.pmcs)) {
+ return EAGAIN;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+ pmc = &perfmon_pmu.pmcs[i];
+
+ if (pmc->nr_refs == 0) {
+ break;
+ }
+ }
+
+ assert(i < ARRAY_SIZE(perfmon_pmu.pmcs));
+ error = pmu_driver.ops.alloc(&pmc->id, raw_event_id);
+
+ if (error) {
+ return error;
+ }
+
+ pmc->raw_event_id = raw_event_id;
+ perfmon_pmu.nr_pmcs++;
+ *pmcp = pmc;
+
+ return 0;
+}
+
+static struct perfmon_pmc *
+perfmon_pmc_lookup(unsigned int raw_event_id)
+{
+ struct perfmon_pmc *pmc;
+ size_t i;
+
+ if (perfmon_pmu.nr_pmcs == 0) {
+ return NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+ pmc = &perfmon_pmu.pmcs[i];
+
+ if ((pmc->nr_refs != 0) && (pmc->raw_event_id == raw_event_id)) {
+ return pmc;
+ }
+ }
+
+ return NULL;
+}
+
+static inline unsigned int
+perfmon_pmc_index(const struct perfmon_pmc *pmc)
+{
+ unsigned int index;
+
+ index = pmc - perfmon_pmu.pmcs;
+ assert(index < ARRAY_SIZE(perfmon_pmu.pmcs));
+ return index;
+}
+
+/*
+ * Obtain a reference on a PMC for the given event.
+ *
+ * If there is no existing PMC suitable for this event, allocate one.
+ */
+static int
+perfmon_pmc_get(struct perfmon_pmc **pmcp, const struct perfmon_event *event)
+{
+ struct perfmon_pmc *pmc;
+ unsigned int raw_event_id;
+ unsigned int pmc_index;
+ int error;
+
+ error = perfmon_translate(&raw_event_id, event->type, event->id);
+
+ if (error) {
+ return error;
+ }
+
+ spinlock_lock(&perfmon_pmu.lock);
+
+ pmc = perfmon_pmc_lookup(raw_event_id);
+
+ if (pmc == NULL) {
+ error = perfmon_pmc_alloc(&pmc, raw_event_id);
+
+ if (error) {
+ goto out;
+ }
+ pmc_index = perfmon_pmc_index(pmc);
+ assert(perfmon_pmc_id_to_index[pmc->id] == UINT32_MAX);
+ perfmon_pmc_id_to_index[pmc->id] = pmc_index;
+ }
+
+ pmc->nr_refs++;
+
+out:
+ spinlock_unlock(&perfmon_pmu.lock);
+
+ if (error) {
+ return error;
+ }
+ *pmcp = pmc;
+
+ return 0;
+}
+
+/*
+ * Release a reference on a PMC.
+ */
+static void
+perfmon_pmc_put(struct perfmon_pmc *pmc)
+{
+ spinlock_lock(&perfmon_pmu.lock);
+
+ assert(pmc->nr_refs != 0);
+ pmc->nr_refs--;
+
+ if (pmc->nr_refs == 0) {
+ pmu_driver.ops.free(pmc->id);
+ assert(perfmon_pmc_id_to_index[pmc->id] != UINT32_MAX);
+ perfmon_pmc_id_to_index[pmc->id] = UINT32_MAX;
+ }
+
+ spinlock_unlock(&perfmon_pmu.lock);
+}
+
+static inline struct perfmon_pmc *
+perfmon_pmc_from_index(unsigned int index)
+{
+ assert(index < ARRAY_SIZE(perfmon_pmu.pmcs));
+ return &perfmon_pmu.pmcs[index];
+}
+
+static void
+perfmon_grouplist_ctor(void *arg)
+{
+ struct perfmon_grouplist *grouplist;
+
+ grouplist = arg;
+
+ list_init(&grouplist->groups);
+ spinlock_init(&grouplist->lock);
+}
+
+static struct perfmon_grouplist *
+perfmon_grouplist_create(void)
+{
+ return kmem_cache_alloc(&perfmon_grouplist_cache);
+}
+
+static void
+perfmon_grouplist_destroy(struct perfmon_grouplist *grouplist)
+{
+ kmem_cache_free(&perfmon_grouplist_cache, grouplist);
+}
+
+static void perfmon_check_of(struct timer *timer);
+
+static void __init
+perfmon_cpu_pmu_init(unsigned int cpuid)
+{
+ unsigned int i;
+ struct perfmon_cpu_pmu *cpu_pmu;
+
+ cpu_pmu = percpu_ptr(perfmon_cpu_pmu, cpuid);
+ cpu_pmu->cpu_id = cpuid;
+ if (!pmu_driver.ops.handle_of_intr) {
+ /* XXX: using high prio instead or INTR because we might xcall from the
+ * callbacks.
+ */
+ timer_init(&cpu_pmu->of_timer, &perfmon_check_of, TIMER_HIGH_PRIO);
+ timer_schedule(&cpu_pmu->of_timer, pmu_driver.of_max_ticks);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+ struct perfmon_cpu_pmc *pmc;
+
+ pmc = &cpu_pmu->pmcs[i];
+
+ pmc->nr_refs = 0;
+ pmc->overflow_id = 0;
+
+ }
+}
+
+static struct perfmon_cpu_pmc *
+perfmon_cpu_pmu_get_pmc_from_id(unsigned int pmc_id)
+{
+ unsigned int pmc_index;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ assert(perfmon_pmc_id_to_index[pmc_id] != UINT32_MAX);
+ pmc_index = perfmon_pmc_id_to_index[pmc_id];
+
+ /* TODO: this may be called many times in a row. We may want to have it
+ * passed to the function.
+ */
+ cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+ cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+
+ assert(cpu_pmc->nr_refs != 0);
+
+ return cpu_pmc;
+}
+
+void
+perfmon_cpu_on_pmc_of(unsigned int pmc_id)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ cpu_pmc = perfmon_cpu_pmu_get_pmc_from_id(pmc_id);
+ cpu_pmc->overflow_id++;
+}
+
+static void
+perfmon_check_of_remote(void *arg)
+{
+ perfmon_check_of(arg);
+}
+
+static void
+perfmon_check_pmc_of(struct perfmon_cpu_pmc *cpu_pmc, uint64_t value)
+{
+ uint64_t prev;
+
+ prev = cpu_pmc->prev_value;
+ if (prev > value) {
+ /* Overflow */
+ cpu_pmc->overflow_id++;
+ }
+ cpu_pmc->prev_value = value;
+}
+
+static void
+perfmon_check_of(struct timer *timer)
+{
+ struct perfmon_pmc *pmc;
+ struct perfmon_cpu_pmc *cpu_pmc;
+ struct perfmon_cpu_pmu *cpu_pmu;
+ uint64_t value;
+
+ cpu_pmu = structof(timer, struct perfmon_cpu_pmu, of_timer);
+ if (cpu_pmu->cpu_id != cpu_id())
+ {
+ xcall_call(perfmon_check_of_remote, timer, cpu_pmu->cpu_id);
+ return;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+ pmc = perfmon_pmc_from_index(i);
+ if (pmc->nr_refs == 0) {
+ continue;
+ }
+
+ cpu_pmc = &cpu_pmu->pmcs[i];
+ value = pmu_driver.ops.read(pmc->id);
+
+ perfmon_check_pmc_of(cpu_pmc, value);
+ }
+
+ timer_schedule(timer, pmu_driver.of_max_ticks);
+}
+
+static void
+perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+
+ if (cpu_pmc->nr_refs == 0) {
+ pmu_driver.ops.start(perfmon_pmu.pmcs[pmc_index].id,
+ perfmon_pmu.pmcs[pmc_index].raw_event_id);
+ cpu_pmc->prev_value = pmu_driver.ops.read(perfmon_pmu.pmcs[pmc_index].id);
+ }
+
+ cpu_pmc->nr_refs++;
+}
+
+static void
+perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+ struct perfmon_cpu_pmc *cpu_pmc;
+
+ cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+ assert(cpu_pmc->nr_refs != 0);
+ cpu_pmc->nr_refs--;
+
+ if (cpu_pmc->nr_refs == 0) {
+ pmu_driver.ops.stop(perfmon_pmu.pmcs[pmc_index].id);
+ }
+}
+
+void
+perfmon_of_intr(void)
+{
+ assert(pmu_driver.ops.handle_of_intr);
+ pmu_driver.ops.handle_of_intr();
+}
+
+int
+perfmon_pmu_register(struct perfmon_pmu_driver *driver)
+{
+ struct perfmon_pmu_ops *ops = &driver->ops;
+
+ assert(ops->info && ops->translate && ops->alloc
+ && ops->free && ops->start && ops->stop);
+ assert(!ops->handle_of_intr != !driver->of_max_ticks);
+
+ if (pmu_driver.ops.info) {
+ /* Already initialized */
+ assert(0);
+ return EINVAL;
+ }
+ pmu_driver = *driver;
+
+ return 0;
+}
+
+static int __init
+perfmon_bootstrap(void)
+{
+ kmem_cache_init(&perfmon_grouplist_cache, "perfmon_grouplist",
+ sizeof(struct perfmon_grouplist), 0,
+ perfmon_grouplist_ctor, 0);
+
+ return 0;
+}
+
+INIT_OP_DEFINE(perfmon_bootstrap,
+ INIT_OP_DEP(kmem_setup, true));
+
+static int __init
+perfmon_setup(void)
+{
+ struct perfmon_grouplist *grouplist;
+ unsigned int i;
+
+ spinlock_init(&perfmon_pmu.lock);
+ perfmon_pmu.nr_pmcs = 0;
+
+ for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+ perfmon_pmu.pmcs[i].nr_refs = 0;
+ }
+ for (i = 0; i < ARRAY_SIZE(perfmon_pmc_id_to_index); i++) {
+ perfmon_pmc_id_to_index[i] = UINT32_MAX;
+ }
+
+ for (i = 0; i < cpu_count(); i++) {
+ perfmon_cpu_pmu_init(i);
+ }
+
+ for (i = 0; i < cpu_count(); i++) {
+ grouplist = perfmon_grouplist_create();
+
+ if (grouplist == NULL) {
+ panic("perfmon: unable to create cpu grouplists");
+ }
+
+ percpu_var(perfmon_cpu_grouplist, i) = grouplist;
+ }
+
+ if (!pmu_driver.ops.info) {
+ log_err("unable to start perfmon: no compatible pmu driver available");
+ return ENODEV;
+ }
+ pmu_driver.ops.info();
+
+ return 0;
+}
+
+INIT_OP_DEFINE(perfmon_setup,
+ INIT_OP_DEP(cpu_setup, true),
+ INIT_OP_DEP(kmem_setup, true),
+ INIT_OP_DEP(panic_setup, true),
+ INIT_OP_DEP(percpu_setup, true),
+ INIT_OP_DEP(perfmon_bootstrap, true),
+ INIT_OP_DEP(pmu_amd_setup, false),
+ INIT_OP_DEP(pmu_intel_setup, false),
+ INIT_OP_DEP(spinlock_setup, true),
+ INIT_OP_DEP(thread_setup, true),
+ INIT_OP_DEP(trap_setup, true));
+
+static void
+perfmon_check_event_args(unsigned int type, unsigned int id, int flags)
+{
+ (void)type;
+ (void)id;
+ (void)flags;
+ assert((type == PERFMON_ET_RAW) || (type == PERFMON_ET_GENERIC));
+ assert((type != PERFMON_ET_GENERIC) || (id < PERFMON_NR_GENERIC_EVENTS));
+ assert((flags & PERFMON_EF_MASK) == flags);
+ assert((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)));
+}
+
+int
+perfmon_event_create(struct perfmon_event **eventp, unsigned int type,
+ unsigned int id, int flags)
+{
+ struct perfmon_event *event;
+
+ perfmon_check_event_args(type, id, flags);
+
+ event = kmem_alloc(sizeof(*event));
+
+ if (event == NULL) {
+ return ENOMEM;
+ }
+
+ event->count = 0;
+ list_node_init(&event->node);
+ event->flags = flags;
+ event->type = type;
+ event->id = id;
+ *eventp = event;
+ return 0;
+}
+
+void
+perfmon_event_destroy(struct perfmon_event *event)
+{
+ kmem_free(event, sizeof(*event));
+}
+
+uint64_t
+perfmon_event_read(const struct perfmon_event *event)
+{
+ return event->count;
+}
+
+#ifdef CONFIG_PERFMON_TEST
+
+int
+perfmon_event_write(struct perfmon_event *event, uint64_t value)
+{
+ if (!pmu_driver.ops.write) {
+ return ENODEV;
+ }
+ event->value = value;
+ event->set_value = true;
+
+ return 0;
+}
+
+int
+perfmon_get_pmc_width(void)
+{
+ return pmu_driver.pmc_width;
+}
+
+#endif /* CONFIG_PERFMON_TEST */
+
+void
+perfmon_event_reset(struct perfmon_event *event)
+{
+ event->count = 0;
+}
+
+static void
+perfmon_event_sync(struct perfmon_cpu_pmu *cpu_pmu,
+ struct perfmon_event *event)
+{
+ struct perfmon_pmc *pmc;
+ struct perfmon_cpu_pmc *cpu_pmc;
+ uint64_t count;
+ int diff;
+
+ pmc = perfmon_pmc_from_index(event->pmc_index);
+ cpu_pmc = &cpu_pmu->pmcs[event->pmc_index];
+ count = pmu_driver.ops.read(pmc->id);
+
+ if (!pmu_driver.ops.handle_of_intr) {
+ /* Force pmc overflow status update */
+ perfmon_check_pmc_of(cpu_pmc, count);
+ }
+
+ if (unlikely(event->overflow_id != cpu_pmc->overflow_id)) {
+ assert(cpu_pmc->overflow_id > event->overflow_id);
+ diff = cpu_pmc->overflow_id > event->overflow_id;
+ /* diff is very likely 1. */
+ event->count += (1ULL << pmu_driver.pmc_width) * diff
+ - event->prev + count;
+ event->overflow_id = cpu_pmc->overflow_id;
+ } else {
+ event->count += count - event->prev;
+ }
+ event->prev = count;
+}
+
+static inline int
+perfmon_group_attached(const struct perfmon_group *group)
+{
+ return group->flags & PERFMON_GF_ATTACHED;
+}
+
+static inline int
+perfmon_group_enabled(const struct perfmon_group *group)
+{
+ return group->flags & PERFMON_GF_ENABLED;
+}
+
+static inline int
+perfmon_group_loaded(const struct perfmon_group *group)
+{
+ return group->flags & PERFMON_GF_LOADED;
+}
+
+static inline int
+perfmon_group_stopping(const struct perfmon_group *group)
+{
+ return group->flags & PERFMON_GF_PENDING_DISABLE;
+}
+
+int
+perfmon_group_create(struct perfmon_group **groupp)
+{
+ struct perfmon_group *group;
+
+ group = kmem_alloc(sizeof(*group));
+
+ if (group == NULL) {
+ return ENOMEM;
+ }
+
+ list_init(&group->events);
+ spinlock_init(&group->lock);
+ group->cpu = PERFMON_INVALID_CPU;
+ group->flags = 0;
+ group->type = PERFMON_GT_UNKNOWN;
+ *groupp = group;
+ return 0;
+}
+
+int
+perfmon_group_destroy(struct perfmon_group *group)
+{
+ struct perfmon_event *event;
+
+ if (perfmon_group_attached(group)) {
+ return EINVAL;
+ }
+ assert (!perfmon_group_enabled(group));
+
+ while (!list_empty(&group->events)) {
+ event = list_first_entry(&group->events, struct perfmon_event, node);
+ list_remove(&event->node);
+ perfmon_event_destroy(event);
+ }
+
+ kmem_free(group, sizeof(*group));
+ return 0;
+}
+
+void
+perfmon_group_add(struct perfmon_group *group, struct perfmon_event *event)
+{
+ assert(list_node_unlinked(&event->node));
+ assert(!perfmon_group_attached(group));
+
+ /* TODO: check that we we do not have the same event twice. */
+
+ list_insert_tail(&group->events, &event->node);
+}
+
+/*
+ * Attach a group to the global logical PMU.
+ *
+ * For each event in the group, obtain a reference on a PMC.
+ */
+static int
+perfmon_group_attach_pmu(struct perfmon_group *group)
+{
+ struct perfmon_event *event, *tmp;
+ struct perfmon_pmc *pmc = NULL;
+ int error;
+
+ assert(!perfmon_group_attached(group));
+
+ list_for_each_entry(&group->events, event, node) {
+ error = perfmon_pmc_get(&pmc, event);
+
+ if (error) {
+ goto error_pmc;
+ }
+
+ event->pmc_index = perfmon_pmc_index(pmc);
+ }
+
+ return 0;
+
+error_pmc:
+ list_for_each_entry(&group->events, tmp, node) {
+ if (tmp == event) {
+ break;
+ }
+
+ perfmon_pmc_put(perfmon_pmc_from_index(tmp->pmc_index));
+ }
+
+ return error;
+}
+
+static void
+perfmon_group_detach_pmu(struct perfmon_group *group)
+{
+ struct perfmon_event *event;
+
+ assert(perfmon_group_attached(group));
+
+ list_for_each_entry(&group->events, event, node) {
+ perfmon_pmc_put(perfmon_pmc_from_index(event->pmc_index));
+ }
+}
+
+int
+perfmon_group_attach(struct perfmon_group *group, struct thread *thread)
+{
+ struct perfmon_grouplist *grouplist;
+ unsigned long flags;
+ int error;
+
+ assert(group->type == PERFMON_GT_UNKNOWN);
+
+ error = perfmon_group_attach_pmu(group);
+
+ if (error) {
+ return error;
+ }
+
+ thread_ref(thread);
+ group->thread = thread;
+ group->type = PERFMON_GT_THREAD;
+ group->flags |= PERFMON_GF_ATTACHED;
+
+ grouplist = thread->perfmon_groups;
+
+ spinlock_lock_intr_save(&grouplist->lock, &flags);
+ list_insert_tail(&grouplist->groups, &group->node);
+ spinlock_unlock_intr_restore(&grouplist->lock, flags);
+
+ return 0;
+}
+
+int
+perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu)
+{
+ int error;
+ struct perfmon_grouplist *grouplist;
+
+ assert(cpu < cpu_count());
+ assert(group->type == PERFMON_GT_UNKNOWN);
+
+ error = perfmon_group_attach_pmu(group);
+
+ if (error) {
+ return error;
+ }
+
+ group->cpu = cpu;
+ group->type = PERFMON_GT_CPU;
+ group->flags |= PERFMON_GF_ATTACHED;
+
+ grouplist = percpu_var(perfmon_cpu_grouplist, cpu);
+
+ spinlock_lock(&grouplist->lock);
+ list_insert_tail(&grouplist->groups, &group->node);
+ spinlock_unlock(&grouplist->lock);
+
+ return 0;
+}
+
+int
+perfmon_group_detach(struct perfmon_group *group)
+{
+ unsigned long flags;
+ unsigned long grouplist_flags;
+ struct perfmon_grouplist *grouplist;
+ struct thread *prev_thread;
+ unsigned int type;
+ int ret;
+
+ type = group->type;
+ grouplist_flags = 0; /* silence Wmaybe-uninitialized warning. */
+ ret = 0;
+ prev_thread = NULL;
+
+ switch (type) {
+ case PERFMON_GT_THREAD:
+ grouplist = group->thread->perfmon_groups;
+ spinlock_lock_intr_save(&grouplist->lock, &grouplist_flags);
+ prev_thread = group->thread;
+ break;
+ case PERFMON_GT_CPU:
+ grouplist = percpu_var(perfmon_cpu_grouplist, group->cpu);
+ spinlock_lock(&grouplist->lock);
+ break;
+ default:
+ panic("perfmon: invalid group type on detach");
+ }
+ spinlock_lock_intr_save(&group->lock, &flags);
+
+
+ if (perfmon_group_enabled(group)) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (!perfmon_group_attached(group)) {
+ goto out;
+ }
+
+ perfmon_group_detach_pmu(group);
+ list_remove(&group->node);
+
+ group->thread = NULL;
+ group->cpu = PERFMON_INVALID_CPU;
+ group->type = PERFMON_GT_UNKNOWN;
+ group->flags &= ~PERFMON_GF_ATTACHED;
+ assert(!group->flags);
+
+ goto out;
+
+out:
+ spinlock_unlock_intr_restore(&group->lock, flags);
+ switch (type) {
+ case PERFMON_GT_THREAD:
+ spinlock_unlock_intr_restore(&grouplist->lock, grouplist_flags);
+ break;
+ case PERFMON_GT_CPU:
+ spinlock_unlock(&grouplist->lock);
+ break;
+ }
+
+ if (prev_thread) {
+ /* Late unref as it might destroy the thread and lock the runq. */
+ thread_unref(prev_thread);
+ }
+
+ return ret;
+}
+
+static void
+perfmon_group_load(struct perfmon_group *group)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_event *event;
+ struct perfmon_pmc *pmc;
+ uint64_t prev;
+
+ assert(!thread_preempt_enabled());
+ assert(perfmon_group_enabled(group));
+ assert(!perfmon_group_loaded(group));
+
+ cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+#ifdef CONFIG_PERFMON_TEST
+ /* XXX: could be done in the loading loop, but performance does not
+ * matters in the functional tests using this feature.
+ */
+ list_for_each_entry(&group->events, event, node) {
+ if (!event->set_value) {
+ continue;
+ }
+ pmc = perfmon_pmc_from_index(event->pmc_index);
+ pmu_driver.ops.write(pmc->id, event->value);
+ event->set_value = false;
+ }
+#endif
+
+ list_for_each_entry(&group->events, event, node) {
+ pmc = perfmon_pmc_from_index(event->pmc_index);
+ prev = pmu_driver.ops.read(pmc->id);
+
+ perfmon_cpu_pmu_load(cpu_pmu, event->pmc_index);
+ event->prev = prev;
+ event->overflow_id = cpu_pmu->pmcs[event->pmc_index].overflow_id;
+ }
+
+ group->cpu = cpu_id();
+ group->flags |= PERFMON_GF_LOADED;
+}
+
+static void
+perfmon_cpu_load_remote(void *arg)
+{
+ struct perfmon_group *group;
+
+ group = arg;
+ assert (group->cpu == cpu_id());
+
+ spinlock_lock(&group->lock);
+
+ perfmon_group_load(group);
+
+ spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_group_unload(struct perfmon_group *group)
+{
+ struct perfmon_cpu_pmu *cpu_pmu;
+ struct perfmon_event *event;
+
+ assert(!thread_preempt_enabled());
+ assert(perfmon_group_enabled(group));
+ assert(perfmon_group_loaded(group));
+
+ cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+ list_for_each_entry(&group->events, event, node) {
+ perfmon_cpu_pmu_unload(cpu_pmu, event->pmc_index);
+ perfmon_event_sync(cpu_pmu, event);
+ }
+
+ group->flags &= ~PERFMON_GF_LOADED;
+}
+
+static void
+perfmon_cpu_unload_remote(void *arg)
+{
+ struct perfmon_group *group;
+
+ group = arg;
+ assert (group->cpu == cpu_id());
+ assert (perfmon_group_stopping(group));
+
+ spinlock_lock(&group->lock);
+
+ perfmon_group_unload(group);
+
+ group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+ group->flags &= ~PERFMON_GF_ENABLED;
+
+ spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_thread_load_remote(void *arg)
+{
+ struct perfmon_group *group;
+ struct thread *thread;
+
+ assert (!cpu_intr_enabled());
+
+ group = arg;
+ thread = thread_self();
+
+ if (thread != group->thread) {
+ return;
+ }
+
+ spinlock_lock(&group->lock);
+
+ if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) {
+ perfmon_group_load(group);
+ }
+
+ spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_thread_unload_remote(void *arg)
+{
+ struct perfmon_group *group;
+ struct thread *thread;
+
+ assert (!cpu_intr_enabled());
+
+ group = arg;
+ thread = thread_self();
+
+ if (thread != group->thread) {
+ return;
+ }
+
+ spinlock_lock(&group->lock);
+
+ if (perfmon_group_enabled(group)) {
+ assert (perfmon_group_stopping(group));
+ if (perfmon_group_loaded(group)) {
+ perfmon_group_unload(group);
+ }
+ group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+ group->flags &= ~PERFMON_GF_ENABLED;
+ }
+
+ spinlock_unlock(&group->lock);
+}
+
+int
+perfmon_group_start(struct perfmon_group *group)
+{
+ unsigned long flags;
+ unsigned int cpu;
+ int ret;
+
+ ret = 0;
+ spinlock_lock_intr_save(&group->lock, &flags);
+
+ if (!perfmon_group_attached(group) || perfmon_group_loaded(group)) {
+ ret = EINVAL;
+ goto end;
+ }
+ assert(!perfmon_group_enabled(group));
+
+ group->flags |= PERFMON_GF_ENABLED;
+
+ if (group->type == PERFMON_GT_CPU) {
+ spinlock_unlock_intr_restore(&group->lock, flags);
+
+ xcall_call(perfmon_cpu_load_remote, group, group->cpu);
+
+ return 0;
+ } else if (group->thread == thread_self()) {
+ perfmon_group_load(group);
+ } else if (group->thread->state == THREAD_RUNNING) {
+ spinlock_unlock_intr_restore(&group->lock, flags);
+
+ cpu = thread_cpu(group->thread);
+
+ xcall_call(perfmon_thread_load_remote, group, cpu);
+
+ return 0;
+ }
+end:
+ spinlock_unlock_intr_restore(&group->lock, flags);
+
+ return ret;
+}
+
+static void
+perfmon_group_sync_local(struct perfmon_group *group)
+{
+ struct perfmon_event *event;
+ struct perfmon_cpu_pmu *cpu_pmu;
+
+ cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+ /* The group sync duration *should be* limited as a group may only have a
+ * limited amount of *different* events.
+ */
+ list_for_each_entry(&group->events, event, node) {
+ perfmon_event_sync(cpu_pmu, event);
+ }
+}
+
+static void
+perfmon_cpu_sync_remote(void *arg)
+{
+ struct perfmon_group *group;
+
+ group = arg;
+ assert (group->type == PERFMON_GT_CPU);
+ assert (group->cpu == cpu_id());
+
+ perfmon_group_sync_local(group);
+}
+
+static void
+perfmon_thread_sync_remote(void *arg)
+{
+ struct perfmon_group *group;
+ unsigned long flags;
+
+ group = arg;
+
+ assert (group->type == PERFMON_GT_THREAD);
+ if (thread_self() != group->thread) {
+ return;
+ }
+ spinlock_lock_intr_save(&group->lock, &flags);
+
+ perfmon_group_sync_local(group);
+
+ spinlock_unlock_intr_restore(&group->lock, flags);
+}
+
+void
+perfmon_group_update(struct perfmon_group *group)
+{
+ unsigned long flags;
+ unsigned int cpu;
+
+ assert(perfmon_group_enabled(group));
+
+ spinlock_lock_intr_save(&group->lock, &flags);
+
+ assert(perfmon_group_attached(group));
+ assert(perfmon_group_enabled(group));
+
+ if (!perfmon_group_loaded(group)) {
+ goto end;
+ }
+
+ if (group->type == PERFMON_GT_CPU) {
+ if (group->cpu == cpu_id())
+ perfmon_group_sync_local(group);
+ else {
+ xcall_call(perfmon_cpu_sync_remote, group, group->cpu);
+ }
+ } else {
+ if (group->thread == thread_self()) {
+ assert (perfmon_group_loaded(group));
+ perfmon_group_sync_local(group);
+ } else if (group->thread->state == THREAD_RUNNING) {
+ spinlock_unlock_intr_restore(&group->lock, flags);
+ cpu = thread_cpu(group->thread);
+ xcall_call(perfmon_thread_sync_remote, group, cpu);
+ return;
+ }
+ }
+end:
+ spinlock_unlock_intr_restore(&group->lock, flags);
+}
+
+int
+perfmon_group_stop(struct perfmon_group *group)
+{
+ int ret;
+ unsigned long flags;
+ unsigned int cpu;
+
+ ret = 0;
+ spinlock_lock_intr_save(&group->lock, &flags);
+
+ if (!perfmon_group_attached(group) || !perfmon_group_enabled(group)) {
+ ret = EINVAL;
+ goto end;
+ }
+
+ if (!perfmon_group_loaded(group)) {
+ goto disable;
+ }
+
+ group->flags |= PERFMON_GF_PENDING_DISABLE;
+
+ if (group->type == PERFMON_GT_CPU) {
+ spinlock_unlock_intr_restore(&group->lock, flags);
+
+ xcall_call(perfmon_cpu_unload_remote, group, group->cpu);
+ return 0;
+ } else if (group->thread == thread_self()) {
+ perfmon_group_unload(group);
+ } else {
+ /* If the thead is not running (but still loaded), the unload is
+ * (probably) getting called when we release the group lock, but we
+ * still need a blocking xcall to guarantee the group is disabled when
+ * the function returns.
+ */
+ spinlock_unlock_intr_restore(&group->lock, flags);
+
+ cpu = thread_cpu(group->thread);
+
+ xcall_call(perfmon_thread_unload_remote, group, cpu);
+ return 0;
+ }
+
+disable:
+ group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+ group->flags &= ~PERFMON_GF_ENABLED;
+
+end:
+ spinlock_unlock_intr_restore(&group->lock, flags);
+ return ret;
+}
+
+int
+perfmon_thread_init(struct thread *thread)
+{
+ struct perfmon_grouplist *grouplist;
+
+ grouplist = perfmon_grouplist_create();
+
+ if (grouplist == NULL) {
+ return ENOMEM;
+ }
+
+ thread->perfmon_groups = grouplist;
+ return 0;
+}
+
+void
+perfmon_thread_destroy(struct thread *thread)
+{
+ perfmon_grouplist_destroy(thread->perfmon_groups);
+}
+
+void
+perfmon_thread_load(struct thread *thread)
+{
+ struct perfmon_grouplist *grouplist;
+ struct perfmon_group *group;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ grouplist = thread->perfmon_groups;
+
+ spinlock_lock(&grouplist->lock);
+
+ list_for_each_entry(&grouplist->groups, group, node) {
+ spinlock_lock(&group->lock);
+
+ if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) {
+ perfmon_group_load(group);
+ }
+
+ spinlock_unlock(&group->lock);
+ }
+
+ spinlock_unlock(&grouplist->lock);
+}
+
+void
+perfmon_thread_unload(struct thread *thread)
+{
+ struct perfmon_grouplist *grouplist;
+ struct perfmon_group *group;
+
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ grouplist = thread->perfmon_groups;
+
+ spinlock_lock(&grouplist->lock);
+
+ list_for_each_entry(&grouplist->groups, group, node) {
+ spinlock_lock(&group->lock);
+ /* TODO: we may want to prevent long looping on the groups.
+ * One way to do this would be to maintain events mapping in the
+ * grouplist in order to have a finite operation upon schedueling.
+ */
+
+ if (perfmon_group_loaded(group)) {
+ perfmon_group_unload(group);
+ if (perfmon_group_stopping(group)) {
+ group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+ group->flags &= ~PERFMON_GF_ENABLED;
+ }
+ }
+
+ spinlock_unlock(&group->lock);
+ }
+
+ spinlock_unlock(&grouplist->lock);
+}
diff --git a/kern/perfmon.h b/kern/perfmon.h
new file mode 100644
index 0000000..b1da4ec
--- /dev/null
+++ b/kern/perfmon.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Performance monitoring based on hardware performance counters.
+ */
+
+#ifndef KERN_PERFMON_H
+#define KERN_PERFMON_H
+
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/thread.h>
+
+/*
+ * Performance event types.
+ */
+#define PERFMON_ET_GENERIC 0
+#define PERFMON_ET_RAW 1
+
+/*
+ * IDs of generic performance events.
+ */
+#define PERFMON_EV_CYCLE 0
+#define PERFMON_EV_REF_CYCLE 1
+#define PERFMON_EV_INSTRUCTION 2
+#define PERFMON_EV_CACHE_REF 3
+#define PERFMON_EV_CACHE_MISS 4
+#define PERFMON_EV_BRANCH 5
+#define PERFMON_EV_BRANCH_MISS 6
+#define PERFMON_NR_GENERIC_EVENTS 7
+
+/*
+ * Event flags.
+ */
+#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */
+#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */
+#define PERFMON_EF_MASK (PERFMON_EF_KERN | PERFMON_EF_USER)
+
+/*
+ * Pmu operations.
+ *
+ * Set by calling perfmon_register_pmu_ops.
+ */
+struct perfmon_pmu_ops {
+ void (*info)(void);
+ int (*translate)(unsigned int *raw_event_idp, unsigned int event_id);
+ int (*alloc)(unsigned int *pmc_idp, unsigned int raw_event_id);
+ void (*free)(unsigned int pmc_id);
+ void (*start)(unsigned int pmc_id, unsigned int raw_event_id);
+ void (*stop)(unsigned int pmc_id);
+ uint64_t (*read)(unsigned int pmc_id);
+ void (*write)(unsigned int pmc_id, uint64_t value);
+ /* If set, of_max_ticks should be set to 0. */
+ void (*handle_of_intr)(void);
+};
+
+/*
+ * Pmu device description.
+ */
+struct perfmon_pmu_driver {
+ uint8_t pmc_width; /* width in bits of a pmc */
+ /*
+ * Maximum number of clock ticks between two overflow ckecks.
+ * Should be set to 0 if handle_of_intr is set.
+ */
+ uint64_t of_max_ticks;
+ struct perfmon_pmu_ops ops;
+};
+
+/*
+ * Performance monitoring event.
+ *
+ * An event describes a single, well-defined state and records its
+ * occurrences over a period of time. It must be added to exactly
+ * one group before being used.
+ */
+struct perfmon_event;
+
+/*
+ * Group of performance monitoring events.
+ *
+ * A group must be attached to either a thread or a processor, and abstracts
+ * all operations on hardware counters.
+ *
+ * Until a group is actually attached, it is assumed there is only one
+ * reference on it, owned by the caller.
+ *
+ * For a thread-attached group, it is the user's responsability to make sure
+ * that perfmon_stop is always called before the monitored thread is deleted.
+ */
+struct perfmon_group;
+
+/*
+ * Create an event.
+ */
+int perfmon_event_create(struct perfmon_event **eventp, unsigned int type,
+ unsigned int id, int flags);
+
+/*
+ * Destroy an event.
+ *
+ * Once an event is added to a group, it can only be destroyed by destroying
+ * the group.
+ */
+void perfmon_event_destroy(struct perfmon_event *event);
+
+/*
+ * Obtain the number of occurrences of an event.
+ *
+ * Events are updated at specific points in time, which means the value
+ * returned by this function can be outdated.
+ *
+ * See perfmon_group_update() and perfmon_group_stop().
+ */
+uint64_t perfmon_event_read(const struct perfmon_event *event);
+
+/*
+ * Reset the number of occurrences of an event to 0.
+ *
+ * The group containing the given event should be stopped when calling
+ * this function.
+ */
+void perfmon_event_reset(struct perfmon_event *event);
+
+/*
+ * Create an event group.
+ *
+ * Events must be added to the group, which must then be attached to a
+ * processor or a thread.
+ */
+int perfmon_group_create(struct perfmon_group **groupp);
+
+/*
+ * Destroy a group and all its events.
+ *
+ * A group can only be destroyed once stopped and detached.
+ *
+ * Will return EINVAL if the group is not detached.
+ */
+int perfmon_group_destroy(struct perfmon_group *group);
+
+/*
+ * Add an event into a group.
+ *
+ * Events can only be added when a group isn't attached.
+ */
+void perfmon_group_add(struct perfmon_group *group,
+ struct perfmon_event *event);
+
+/*
+ * Attach a group to, respectively, a thread or a processor, reserving
+ * associated logical counter.
+ *
+ * A group can only be attached to one thread or processor at a time.
+ */
+int perfmon_group_attach(struct perfmon_group *group, struct thread *thread);
+int perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu);
+
+/*
+ * Detach a group from a thread or a processor.
+ *
+ * It frees associated logical counters..
+ *
+ * returns EINVAL if the group is still enabled (not stopped).
+ */
+int perfmon_group_detach(struct perfmon_group *group);
+
+/*
+ * Start performance monitoring.
+ *
+ * A group must be attached before being started.
+ */
+int perfmon_group_start(struct perfmon_group *group);
+
+/*
+ * Update all events in the given group.
+ */
+void perfmon_group_update(struct perfmon_group *group);
+
+/*
+ * Stop performance monitoring.
+ *
+ * A group can't be detached before it's stopped. Events are implicitely
+ * updated when calling this function.
+ */
+int perfmon_group_stop(struct perfmon_group *group);
+
+/*
+ * Initialize perfmon thread-specific data for the given thread.
+ */
+int perfmon_thread_init(struct thread *thread);
+
+/*
+ * Destroy perfmon thread-specific data for the given thread.
+ */
+void perfmon_thread_destroy(struct thread *thread);
+
+/*
+ * Load/unload the events associated to a thread on the current processor.
+ *
+ * These functions should only be used by the scheduler during context switch.
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void perfmon_thread_load(struct thread *thread);
+void perfmon_thread_unload(struct thread *thread);
+
+/*
+ * This init operation provides :
+ * - perfmon_thread_init()
+ */
+INIT_OP_DECLARE(perfmon_bootstrap);
+
+/*
+ * This init operation provides :
+ * - module fully initialized
+ */
+INIT_OP_DECLARE(perfmon_setup);
+
+/*
+ * Handle overflow interrupt.
+ */
+void perfmon_of_intr(void);
+
+/*
+ * Register an architecture-specific driver.
+ */
+int perfmon_pmu_register(struct perfmon_pmu_driver *driver);
+
+/*
+ * Signal overflow for given pmc.
+ *
+ * Should be called from a pmu driver custom overflow interrupt handler.
+ */
+void perfmon_cpu_on_pmc_of(unsigned int pmc_id);
+
+#endif /* KERN_PERFMON_H */
diff --git a/kern/perfmon_i.h b/kern/perfmon_i.h
new file mode 100644
index 0000000..3072171
--- /dev/null
+++ b/kern/perfmon_i.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Performance monitoring based on performance counters internal functions.
+ */
+
+#ifndef KERN_PERFMON_I_H
+#define KERN_PERFMON_I_H
+
+#include <kern/perfmon.h>
+
+#ifdef CONFIG_PERFMON_TEST
+
+/*
+ * Set a running event hardware counter value for overflow tests purposes.
+ *
+ * Beware, this will affect all events associated to the same hardware counter.
+ */
+int perfmon_event_write(struct perfmon_event *event, uint64_t value);
+
+/*
+ * Returns the bit width of the register used in perfmon.
+ */
+int perfmon_get_pmc_width(void);
+
+#endif /* CONFIG_PERFMON_TEST */
+
+#endif /* KERN_PERFMON_I_H */
diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h
new file mode 100644
index 0000000..6f9be0b
--- /dev/null
+++ b/kern/perfmon_types.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef KERN_PERFMON_TYPES_H
+#define KERN_PERFMON_TYPES_H
+
+struct perfmon_grouplist;
+
+#endif /* KERN_PERFMON_TYPES_H */
diff --git a/kern/thread.c b/kern/thread.c
index 85e557d..77960ec 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -100,6 +100,7 @@
#include <kern/macros.h>
#include <kern/panic.h>
#include <kern/percpu.h>
+#include <kern/perfmon.h>
#include <kern/rcu.h>
#include <kern/shell.h>
#include <kern/sleepq.h>
@@ -605,9 +606,22 @@ thread_runq_wakeup_balancer(struct thread_runq *runq)
}
static void
-thread_runq_schedule_prepare(struct thread *thread)
+thread_runq_schedule_load(struct thread *thread)
{
pmap_load(thread->task->map->pmap);
+#ifdef CONFIG_PERFMON
+ perfmon_thread_load(thread);
+#endif
+}
+
+static void
+thread_runq_schedule_unload(struct thread *thread)
+{
+#ifdef CONFIG_PERFMON
+ perfmon_thread_unload(thread);
+#else
+ (void)thread;
+#endif
}
static struct thread_runq *
@@ -639,6 +653,8 @@ thread_runq_schedule(struct thread_runq *runq)
assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL);
if (likely(prev != next)) {
+ thread_runq_schedule_unload(prev);
+
rcu_report_context_switch(thread_rcu_reader(prev));
spinlock_transfer_owner(&runq->lock, next);
@@ -660,10 +676,10 @@ thread_runq_schedule(struct thread_runq *runq)
* - The current thread may have been migrated to another processor.
*/
barrier();
+ thread_runq_schedule_load(prev);
+
next = NULL;
runq = thread_runq_local();
-
- thread_runq_schedule_prepare(prev);
} else {
next = NULL;
}
@@ -1750,7 +1766,7 @@ thread_main(void (*fn)(void *), void *arg)
assert(!thread_preempt_enabled());
thread = thread_self();
- thread_runq_schedule_prepare(thread);
+ thread_runq_schedule_load(thread);
spinlock_unlock(&thread_runq_local()->lock);
cpu_intr_enable();
@@ -1847,6 +1863,14 @@ thread_init(struct thread *thread, void *stack,
thread->flags |= THREAD_DETACHED;
}
+#ifdef CONFIG_PERFMON
+ error = perfmon_thread_init(thread);
+
+ if (error) {
+ goto error_perfmon;
+ }
+#endif /* CONFIG_PERFMON */
+
error = tcb_build(&thread->tcb, stack, fn, arg);
if (error) {
@@ -1858,6 +1882,10 @@ thread_init(struct thread *thread, void *stack,
return 0;
error_tcb:
+#ifdef CONFIG_PERFMON
+ perfmon_thread_destroy(thread);
+error_perfmon:
+#endif /* CONFIG_PERFMON */
thread_destroy_tsd(thread);
turnstile_destroy(thread->priv_turnstile);
error_turnstile:
@@ -1977,6 +2005,9 @@ thread_destroy(struct thread *thread)
/* See task_info() */
task_remove_thread(thread->task, thread);
+#ifdef CONFIG_PERFMON
+ perfmon_thread_destroy(thread);
+#endif
thread_destroy_tsd(thread);
turnstile_destroy(thread->priv_turnstile);
sleepq_destroy(thread->priv_sleepq);
@@ -2309,6 +2340,13 @@ thread_setup(void)
#define THREAD_STACK_GUARD_INIT_OP_DEPS
#endif /* CONFIG_THREAD_STACK_GUARD */
+#ifdef CONFIG_PERFMON
+#define THREAD_PERFMON_INIT_OP_DEPS \
+ INIT_OP_DEP(perfmon_bootstrap, true),
+#else /* CONFIG_PERFMON */
+#define THREAD_PERFMON_INIT_OP_DEPS
+#endif /* CONFIG_PERFMON */
+
INIT_OP_DEFINE(thread_setup,
INIT_OP_DEP(cpumap_setup, true),
INIT_OP_DEP(kmem_setup, true),
@@ -2317,6 +2355,7 @@ INIT_OP_DEFINE(thread_setup,
INIT_OP_DEP(task_setup, true),
INIT_OP_DEP(thread_bootstrap, true),
INIT_OP_DEP(turnstile_setup, true),
+ THREAD_PERFMON_INIT_OP_DEPS
THREAD_STACK_GUARD_INIT_OP_DEPS
);
@@ -2696,6 +2735,13 @@ thread_report_periodic_event(void)
spinlock_unlock(&runq->lock);
}
+unsigned int
+thread_cpu(const struct thread *thread)
+{
+ assert(thread->runq);
+ return thread->runq->cpu;
+}
+
char
thread_state_to_chr(const struct thread *thread)
{
diff --git a/kern/thread.h b/kern/thread.h
index 4bead75..787ccf5 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -283,6 +283,14 @@ void thread_setscheduler(struct thread *thread, unsigned char policy,
void thread_pi_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority);
+/*
+ * Return the last CPU on which the thread has been scheduled.
+ *
+ * This call is not synchronized with respect to migration. The caller
+ * may obtain an outdated value.
+ */
+unsigned int thread_cpu(const struct thread *thread);
+
static inline void
thread_ref(struct thread *thread)
{
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 0be1e77..9c24d3a 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -24,6 +24,7 @@
#include <kern/atomic.h>
#include <kern/cpumap.h>
#include <kern/list_types.h>
+#include <kern/perfmon_types.h>
#include <kern/rcu_types.h>
#include <kern/spinlock_types.h>
#include <kern/turnstile_types.h>
@@ -185,6 +186,10 @@ struct thread {
struct list task_node; /* (T) */
void *stack; /* (-) */
char name[THREAD_NAME_SIZE]; /* ( ) */
+
+#ifdef CONFIG_PERFMON
+ struct perfmon_grouplist *perfmon_groups;
+#endif
};
#define THREAD_ATTR_DETACHED 0x1