diff options
author | Richard Braun <rbraun@sceen.net> | 2018-06-25 21:56:01 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2018-06-25 21:56:01 +0200 |
commit | 30dd97fb786ef5f7ca28049684b17bdc2ee7a718 (patch) | |
tree | 330f0514edcfaaa4e3266edb1191c90a39edcb80 /kern | |
parent | 0a7c73d2e06172a1210e2bbdfba5718040f4f007 (diff) | |
parent | 7686bfcb703049db5d3711e59133ca4b2259e1f1 (diff) |
Merge branch 'perfmon'
Diffstat (limited to 'kern')
-rw-r--r-- | kern/Kconfig | 13 | ||||
-rw-r--r-- | kern/Makefile | 2 | ||||
-rw-r--r-- | kern/percpu.c | 30 | ||||
-rw-r--r-- | kern/percpu.h | 31 | ||||
-rw-r--r-- | kern/perfmon.c | 1443 | ||||
-rw-r--r-- | kern/perfmon.h | 221 | ||||
-rw-r--r-- | kern/perfmon_types.h | 102 | ||||
-rw-r--r-- | kern/task.c | 2 | ||||
-rw-r--r-- | kern/thread.c | 69 | ||||
-rw-r--r-- | kern/thread.h | 31 | ||||
-rw-r--r-- | kern/thread_i.h | 17 |
11 files changed, 1936 insertions, 25 deletions
diff --git a/kern/Kconfig b/kern/Kconfig index 7dd04a6a..ea61937f 100644 --- a/kern/Kconfig +++ b/kern/Kconfig @@ -94,6 +94,19 @@ config THREAD_STACK_GUARD If unsure, disable. +config PERFMON + def_bool n + +config PERFMON_MAX_PMCS + int "Number of performance monitoring counters" + default 8 + depends on PERFMON + ---help--- + Number of performance monitoring counters. + + This value affects the minimum duration of some critical sections + that run with interrupts disabled. + endmenu menu "Debugging" diff --git a/kern/Makefile b/kern/Makefile index ab7d6b59..5b04fcb3 100644 --- a/kern/Makefile +++ b/kern/Makefile @@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c + +x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c diff --git a/kern/percpu.c b/kern/percpu.c index 53861a30..f344bd70 100644 --- a/kern/percpu.c +++ b/kern/percpu.c @@ -26,6 +26,7 @@ #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> +#include <kern/slist.h> #include <machine/cpu.h> #include <vm/vm_kmem.h> #include <vm/vm_page.h> @@ -36,6 +37,14 @@ static void *percpu_area_content __initdata; static size_t percpu_area_size __initdata; static int percpu_skip_warning __initdata; +static struct slist percpu_ops __initdata; + +static void __init +percpu_op_run(const struct percpu_op *op) +{ + op->fn(); +} + static int __init percpu_bootstrap(void) { @@ -51,6 +60,8 @@ percpu_setup(void) struct vm_page *page; unsigned int order; + slist_init(&percpu_ops); + percpu_area_size = &_percpu_end - &_percpu; log_info("percpu: max_cpus: %u, section size: %zuk", CONFIG_MAX_CPUS, percpu_area_size >> 10); @@ -76,6 +87,15 @@ INIT_OP_DEFINE(percpu_setup, INIT_OP_DEP(percpu_bootstrap, true), INIT_OP_DEP(vm_page_setup, true)); +void __init +percpu_register_op(struct percpu_op *op) +{ + slist_insert_tail(&percpu_ops, &op->node); + + /* Run on BSP */ + percpu_op_run(op); +} + int __init percpu_add(unsigned int cpu) { @@ -116,6 +136,16 @@ out: return 0; } +void __init +percpu_ap_setup(void) +{ + struct percpu_op *op; + + slist_for_each_entry(&percpu_ops, op, node) { + percpu_op_run(op); + } +} + static int __init percpu_cleanup(void) { diff --git a/kern/percpu.h b/kern/percpu.h index 96f706ea..f77e7fd8 100644 --- a/kern/percpu.h +++ b/kern/percpu.h @@ -59,10 +59,26 @@ #include <kern/init.h> #include <kern/macros.h> +#include <kern/slist_types.h> #define PERCPU_SECTION .percpu #define __percpu __section(QUOTE(PERCPU_SECTION)) +typedef void (*percpu_op_fn_t)(void); + +/* + * Per-CPU operation. + * + * These operations allow initialization code to register functions to be run + * on APs when they're started. + */ +struct percpu_op { + struct slist_node node; + percpu_op_fn_t fn; +}; + +#define PERCPU_OP_INITIALIZER(op_fn) { .fn = op_fn } + /* * Boundaries of the percpu section. * @@ -96,6 +112,15 @@ percpu_area(unsigned int cpu) } /* + * Register a percpu operation to be run on all processors when + * they're started. + * + * The operation is run on the BSP when it's registered. It's run as late as + * possible on APs, normally right before scheduling is enabled. + */ +void percpu_register_op(struct percpu_op *op); + +/* * Register a processor. * * This function creates a percpu area from kernel virtual memory for the @@ -105,6 +130,11 @@ percpu_area(unsigned int cpu) int percpu_add(unsigned int cpu); /* + * Run registered percpu operations on an AP. + */ +void percpu_ap_setup(void); + +/* * This init operation provides : * - access to percpu variables on processor 0 */ @@ -112,6 +142,7 @@ INIT_OP_DECLARE(percpu_bootstrap); /* * This init operation provides : + * - percpu operations can be registered * - new percpu areas can be created * * The dependency that provides access to percpu variables on all processors diff --git a/kern/perfmon.c b/kern/perfmon.c new file mode 100644 index 00000000..6fd319e8 --- /dev/null +++ b/kern/perfmon.c @@ -0,0 +1,1443 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Locking order : + * + * thread_runq -+ + * | + * event -+-> interrupts -+-> td + * | + * +-> pmu + * + * TODO Kernel/user mode seggregation. + */ + +#include <assert.h> +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include <kern/clock.h> +#include <kern/init.h> +#include <kern/list.h> +#include <kern/log.h> +#include <kern/macros.h> +#include <kern/percpu.h> +#include <kern/perfmon.h> +#include <kern/perfmon_types.h> +#include <kern/spinlock.h> +#include <kern/syscnt.h> +#include <kern/thread.h> +#include <kern/timer.h> +#include <kern/xcall.h> +#include <machine/boot.h> +#include <machine/cpu.h> + +/* + * Minimum hardware counter poll interval, in milliseconds. + * + * The main purpose of polling hardware counters is to detect overflows + * when the driver is unable to reliably use overflow interrupts. + */ +#define PERFMON_MIN_POLL_INTERVAL 50 + +/* + * Internal event flags. + */ +#define PERFMON_EF_TYPE_CPU 0x100 +#define PERFMON_EF_ATTACHED 0x200 +#define PERFMON_EF_PUBLIC_MASK (PERFMON_EF_KERN \ + | PERFMON_EF_USER \ + | PERFMON_EF_RAW) + +/* + * Per-CPU performance monitoring counter. + * + * When an event is attached to a processor, the matching per-CPU PMC get + * referenced. When a per-CPU PMC is referenced, its underlying hardware + * counter is active. + * + * Interrupts and preemption must be disabled on access. + */ +struct perfmon_cpu_pmc { + unsigned int nr_refs; + unsigned int pmc_id; + unsigned int raw_event_id; + uint64_t raw_value; + uint64_t value; +}; + +/* + * Per-CPU performance monitoring unit. + * + * Per-CPU PMCs are indexed the same way as global PMCs. + * + * Interrupts and preemption must be disabled on access. + */ +struct perfmon_cpu_pmu { + struct perfmon_dev *dev; + unsigned int cpu; + struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS]; + struct timer poll_timer; + struct syscnt sc_nr_overflows; +}; + +/* + * Performance monitoring counter. + * + * When a PMC is used, it maps a raw event to a hardware counter. + * A PMC is used if and only if its reference counter isn't zero. + */ +struct perfmon_pmc { + unsigned int nr_refs; + unsigned int pmc_id; + unsigned int raw_event_id; +}; + +/* + * Performance monitoring unit. + * + * There is a single system-wide logical PMU, used to globally allocate + * PMCs. Reserving a counter across the entire system ensures thread + * migration isn't hindered by performance monitoring. + * + * Locking the global PMU is only required when allocating or releasing + * a PMC. Once allocated, the PMC may safely be accessed without hodling + * the lock. + */ +struct perfmon_pmu { + struct perfmon_dev *dev; + struct spinlock lock; + struct perfmon_pmc pmcs[PERFMON_MAX_PMCS]; +}; + +static struct perfmon_pmu perfmon_pmu; +static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu; + +static struct perfmon_pmu * +perfmon_get_pmu(void) +{ + return &perfmon_pmu; +} + +static struct perfmon_cpu_pmu * +perfmon_get_local_cpu_pmu(void) +{ + assert(!thread_preempt_enabled()); + return cpu_local_ptr(perfmon_cpu_pmu); +} + +static struct perfmon_cpu_pmu * +perfmon_get_cpu_pmu(unsigned int cpu) +{ + return percpu_ptr(perfmon_cpu_pmu, cpu); +} + +static void __init +perfmon_pmc_init(struct perfmon_pmc *pmc) +{ + pmc->nr_refs = 0; +} + +static bool +perfmon_pmc_used(const struct perfmon_pmc *pmc) +{ + return pmc->nr_refs != 0; +} + +static unsigned int +perfmon_pmc_id(const struct perfmon_pmc *pmc) +{ + return pmc->pmc_id; +} + +static unsigned int +perfmon_pmc_raw_event_id(const struct perfmon_pmc *pmc) +{ + return pmc->raw_event_id; +} + +static void +perfmon_pmc_use(struct perfmon_pmc *pmc, unsigned int pmc_id, + unsigned int raw_event_id) +{ + assert(!perfmon_pmc_used(pmc)); + + pmc->nr_refs = 1; + pmc->pmc_id = pmc_id; + pmc->raw_event_id = raw_event_id; +} + +static void +perfmon_pmc_ref(struct perfmon_pmc *pmc) +{ + assert(perfmon_pmc_used(pmc)); + pmc->nr_refs++; +} + +static void +perfmon_pmc_unref(struct perfmon_pmc *pmc) +{ + assert(perfmon_pmc_used(pmc)); + pmc->nr_refs--; +} + +static unsigned int +perfmon_pmu_get_pmc_index(const struct perfmon_pmu *pmu, + const struct perfmon_pmc *pmc) +{ + size_t pmc_index; + + pmc_index = pmc - pmu->pmcs; + assert(pmc_index < ARRAY_SIZE(pmu->pmcs)); + return pmc_index; +} + +static struct perfmon_pmc * +perfmon_pmu_get_pmc(struct perfmon_pmu *pmu, unsigned int index) +{ + assert(index < ARRAY_SIZE(pmu->pmcs)); + return &pmu->pmcs[index]; +} + +static void __init +perfmon_pmu_init(struct perfmon_pmu *pmu) +{ + pmu->dev = NULL; + spinlock_init(&pmu->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + perfmon_pmc_init(perfmon_pmu_get_pmc(pmu, i)); + } +} + +static void __init +perfmon_pmu_set_dev(struct perfmon_pmu *pmu, struct perfmon_dev *dev) +{ + assert(dev); + assert(!pmu->dev); + pmu->dev = dev; +} + +static struct perfmon_dev * +perfmon_pmu_get_dev(const struct perfmon_pmu *pmu) +{ + return pmu->dev; +} + +static void +perfmon_pmu_handle_overflow_intr(const struct perfmon_pmu *pmu) +{ + pmu->dev->ops->handle_overflow_intr(); +} + +static int +perfmon_pmu_translate(const struct perfmon_pmu *pmu, + unsigned int *raw_event_id, + unsigned int event_id) +{ + if (!pmu->dev) { + return ENODEV; + } + + return pmu->dev->ops->translate(raw_event_id, event_id); +} + +static int +perfmon_pmu_alloc_pmc_id(const struct perfmon_pmu *pmu, + unsigned int *pmc_idp, + unsigned int pmc_index, + unsigned int raw_event_id) +{ + unsigned int pmc_id; + int error; + + if (!pmu->dev) { + return ENODEV; + } + + error = pmu->dev->ops->alloc(&pmc_id, pmc_index, raw_event_id); + + if (error) { + return error; + } + + *pmc_idp = pmc_id; + return 0; +} + +static void +perfmon_pmu_free_pmc_id(const struct perfmon_pmu *pmu, unsigned int pmc_id) +{ + assert(pmu->dev); + pmu->dev->ops->free(pmc_id); +} + +static struct perfmon_pmc * +perfmon_pmu_find_unused_pmc(struct perfmon_pmu *pmu) +{ + struct perfmon_pmc *pmc; + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + pmc = perfmon_pmu_get_pmc(pmu, i); + + if (!perfmon_pmc_used(pmc)) { + return pmc; + } + } + + return NULL; +} + +static int +perfmon_pmu_alloc_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp, + unsigned int raw_event_id) +{ + unsigned int pmc_id = 0, pmc_index; + struct perfmon_pmc *pmc; + int error; + + pmc = perfmon_pmu_find_unused_pmc(pmu); + + if (!pmc) { + return EAGAIN; + } + + pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc); + error = perfmon_pmu_alloc_pmc_id(pmu, &pmc_id, pmc_index, raw_event_id); + + if (error) { + return error; + } + + perfmon_pmc_use(pmc, pmc_id, raw_event_id); + *pmcp = pmc; + return 0; +} + +static void +perfmon_pmu_free_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc) +{ + unsigned int pmc_id; + + assert(!perfmon_pmc_used(pmc)); + pmc_id = perfmon_pmc_id(pmc); + perfmon_pmu_free_pmc_id(pmu, pmc_id); +} + +static struct perfmon_pmc * +perfmon_pmu_get_pmc_by_raw_event_id(struct perfmon_pmu *pmu, + unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + pmc = perfmon_pmu_get_pmc(pmu, i); + + if (!perfmon_pmc_used(pmc)) { + continue; + } + + if (perfmon_pmc_raw_event_id(pmc) == raw_event_id) { + return pmc; + } + } + + return NULL; +} + +static int +perfmon_pmu_take_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp, + unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + int error; + + spinlock_lock(&pmu->lock); + + pmc = perfmon_pmu_get_pmc_by_raw_event_id(pmu, raw_event_id); + + if (pmc) { + perfmon_pmc_ref(pmc); + error = 0; + } else { + error = perfmon_pmu_alloc_pmc(pmu, &pmc, raw_event_id); + + if (error) { + pmc = NULL; + } + } + + spinlock_unlock(&pmu->lock); + + if (error) { + return error; + } + + *pmcp = pmc; + return 0; +} + +static void +perfmon_pmu_put_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc) +{ + spinlock_lock(&pmu->lock); + + perfmon_pmc_unref(pmc); + + if (!perfmon_pmc_used(pmc)) { + perfmon_pmu_free_pmc(pmu, pmc); + } + + spinlock_unlock(&pmu->lock); +} + +static int +perfmon_check_event_args(unsigned int id, unsigned int flags) +{ + if (!((flags & PERFMON_EF_PUBLIC_MASK) == flags) + || !((flags & PERFMON_EF_RAW) || (id < PERFMON_NR_GENERIC_EVENTS)) + || !((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)))) { + return EINVAL; + } + + return 0; +} + +int +perfmon_event_init(struct perfmon_event *event, unsigned int id, + unsigned int flags) +{ + int error; + + error = perfmon_check_event_args(id, flags); + + if (error) { + return error; + } + + spinlock_init(&event->lock); + event->flags = flags; + event->id = id; + event->value = 0; + return 0; +} + +static bool +perfmon_event_type_cpu(const struct perfmon_event *event) +{ + return event->flags & PERFMON_EF_TYPE_CPU; +} + +static void +perfmon_event_set_type_cpu(struct perfmon_event *event) +{ + event->flags |= PERFMON_EF_TYPE_CPU; +} + +static void +perfmon_event_clear_type_cpu(struct perfmon_event *event) +{ + event->flags &= ~PERFMON_EF_TYPE_CPU; +} + +static bool +perfmon_event_attached(const struct perfmon_event *event) +{ + return event->flags & PERFMON_EF_ATTACHED; +} + +static unsigned int +perfmon_event_pmc_index(const struct perfmon_event *event) +{ + assert(perfmon_event_attached(event)); + return event->pmc_index; +} + +static void __init +perfmon_cpu_pmc_init(struct perfmon_cpu_pmc *cpu_pmc) +{ + cpu_pmc->nr_refs = 0; +} + +static bool +perfmon_cpu_pmc_used(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->nr_refs != 0; +} + +static void +perfmon_cpu_pmc_use(struct perfmon_cpu_pmc *cpu_pmc, unsigned int pmc_id, + unsigned int raw_event_id, uint64_t raw_value) +{ + assert(!perfmon_cpu_pmc_used(cpu_pmc)); + + cpu_pmc->nr_refs = 1; + cpu_pmc->pmc_id = pmc_id; + cpu_pmc->raw_event_id = raw_event_id; + cpu_pmc->raw_value = raw_value; + cpu_pmc->value = 0; +} + +static void +perfmon_cpu_pmc_ref(struct perfmon_cpu_pmc *cpu_pmc) +{ + assert(perfmon_cpu_pmc_used(cpu_pmc)); + cpu_pmc->nr_refs++; +} + +static void +perfmon_cpu_pmc_unref(struct perfmon_cpu_pmc *cpu_pmc) +{ + assert(perfmon_cpu_pmc_used(cpu_pmc)); + cpu_pmc->nr_refs--; +} + +static unsigned int +perfmon_cpu_pmc_id(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->pmc_id; +} + +static bool +perfmon_cpu_pmc_update(struct perfmon_cpu_pmc *cpu_pmc, uint64_t raw_value, + unsigned int pmc_width) +{ + bool overflowed; + uint64_t delta; + + delta = raw_value - cpu_pmc->raw_value; + + if (pmc_width == 64) { + overflowed = false; + } else { + if (raw_value >= cpu_pmc->raw_value) { + overflowed = false; + } else { + overflowed = true; + delta += (uint64_t)1 << pmc_width; + } + } + + cpu_pmc->value += delta; + cpu_pmc->raw_value = raw_value; + return overflowed; +} + +static uint64_t +perfmon_cpu_pmc_get_value(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->value; +} + +static struct perfmon_cpu_pmc * +perfmon_cpu_pmu_get_pmc(struct perfmon_cpu_pmu *cpu_pmu, unsigned int index) +{ + assert(index < ARRAY_SIZE(cpu_pmu->pmcs)); + return &cpu_pmu->pmcs[index]; +} + +static void +perfmon_cpu_pmu_start(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id, + unsigned int raw_event_id) +{ + cpu_pmu->dev->ops->start(pmc_id, raw_event_id); +} + +static void +perfmon_cpu_pmu_stop(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id) +{ + cpu_pmu->dev->ops->stop(pmc_id); +} + +static uint64_t +perfmon_cpu_pmu_read(const struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id) +{ + return cpu_pmu->dev->ops->read(pmc_id); +} + +static void +perfmon_cpu_pmu_use_pmc(struct perfmon_cpu_pmu *cpu_pmu, + struct perfmon_cpu_pmc *cpu_pmc, + unsigned int pmc_id, + unsigned int raw_event_id) +{ + uint64_t raw_value; + + perfmon_cpu_pmu_start(cpu_pmu, pmc_id, raw_event_id); + raw_value = perfmon_cpu_pmu_read(cpu_pmu, pmc_id); + perfmon_cpu_pmc_use(cpu_pmc, pmc_id, raw_event_id, raw_value); +} + +static void +perfmon_cpu_pmu_update_pmc(struct perfmon_cpu_pmu *cpu_pmu, + struct perfmon_cpu_pmc *cpu_pmc) +{ + uint64_t raw_value; + bool overflowed; + + raw_value = perfmon_cpu_pmu_read(cpu_pmu, perfmon_cpu_pmc_id(cpu_pmc)); + overflowed = perfmon_cpu_pmc_update(cpu_pmc, raw_value, + cpu_pmu->dev->pmc_width); + + if (overflowed) { + syscnt_inc(&cpu_pmu->sc_nr_overflows); + } +} + +static void +perfmon_cpu_pmu_check_overflow(void *arg) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmu = arg; + assert(cpu_pmu->cpu == cpu_id()); + + for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, i); + + if (!perfmon_cpu_pmc_used(cpu_pmc)) { + continue; + } + + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + } +} + +static void +perfmon_cpu_pmu_poll(struct timer *timer) +{ + struct perfmon_cpu_pmu *cpu_pmu; + + cpu_pmu = structof(timer, struct perfmon_cpu_pmu, poll_timer); + xcall_call(perfmon_cpu_pmu_check_overflow, cpu_pmu, cpu_pmu->cpu); + timer_schedule(timer, timer_get_time(timer) + cpu_pmu->dev->poll_interval); +} + +static void __init +perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu, unsigned int cpu, + struct perfmon_dev *dev) +{ + char name[SYSCNT_NAME_SIZE]; + + cpu_pmu->dev = dev; + cpu_pmu->cpu = cpu; + + for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { + perfmon_cpu_pmc_init(perfmon_cpu_pmu_get_pmc(cpu_pmu, i)); + } + + if (dev->ops->handle_overflow_intr == NULL) { + assert(dev->poll_interval != 0); + + /* + * XXX Ideally, this would be an interrupt timer instead of a high + * priority one, but it can't be because the handler performs + * cross-calls to remote processors, which requires that interrupts + * be enabled. This is one potential user of CPU-bound timers. + */ + timer_init(&cpu_pmu->poll_timer, perfmon_cpu_pmu_poll, TIMER_HIGH_PRIO); + timer_schedule(&cpu_pmu->poll_timer, dev->poll_interval); + } + + snprintf(name, sizeof(name), "perfmon_nr_overflows/%u", cpu); + syscnt_register(&cpu_pmu->sc_nr_overflows, name); +} + +static uint64_t +perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index, + unsigned int pmc_id, unsigned int raw_event_id) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + + if (perfmon_cpu_pmc_used(cpu_pmc)) { + perfmon_cpu_pmc_ref(cpu_pmc); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + } else { + perfmon_cpu_pmu_use_pmc(cpu_pmu, cpu_pmc, pmc_id, raw_event_id); + } + + return perfmon_cpu_pmc_get_value(cpu_pmc); +} + +static uint64_t +perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + unsigned int pmc_id; + uint64_t value; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + pmc_id = perfmon_cpu_pmc_id(cpu_pmc); + + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + value = perfmon_cpu_pmc_get_value(cpu_pmc); + + perfmon_cpu_pmc_unref(cpu_pmc); + + if (!perfmon_cpu_pmc_used(cpu_pmc)) { + perfmon_cpu_pmu_stop(cpu_pmu, pmc_id); + } + + return value; +} + +static uint64_t +perfmon_cpu_pmu_sync(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + return perfmon_cpu_pmc_get_value(cpu_pmc); +} + +static void +perfmon_td_pmc_init(struct perfmon_td_pmc *td_pmc) +{ + td_pmc->nr_refs = 0; + td_pmc->loaded = false; + td_pmc->value = 0; +} + +static bool +perfmon_td_pmc_used(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->nr_refs != 0; +} + +static void +perfmon_td_pmc_use(struct perfmon_td_pmc *td_pmc, unsigned int pmc_id, + unsigned int raw_event_id) +{ + assert(!perfmon_td_pmc_used(td_pmc)); + + td_pmc->nr_refs = 1; + td_pmc->loaded = false; + td_pmc->pmc_id = pmc_id; + td_pmc->raw_event_id = raw_event_id; + td_pmc->value = 0; +} + +static unsigned int +perfmon_td_pmc_id(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->pmc_id; +} + +static unsigned int +perfmon_td_pmc_raw_event_id(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->raw_event_id; +} + +static void +perfmon_td_pmc_ref(struct perfmon_td_pmc *td_pmc) +{ + assert(perfmon_td_pmc_used(td_pmc)); + td_pmc->nr_refs++; +} + +static void +perfmon_td_pmc_unref(struct perfmon_td_pmc *td_pmc) +{ + assert(perfmon_td_pmc_used(td_pmc)); + td_pmc->nr_refs--; +} + +static bool +perfmon_td_pmc_loaded(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->loaded; +} + +static void +perfmon_td_pmc_load(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + assert(!perfmon_td_pmc_loaded(td_pmc)); + + td_pmc->cpu_pmc_value = cpu_pmc_value; + td_pmc->loaded = true; +} + +static void +perfmon_td_pmc_update(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + uint64_t delta; + + assert(perfmon_td_pmc_loaded(td_pmc)); + + delta = cpu_pmc_value - td_pmc->cpu_pmc_value; + td_pmc->cpu_pmc_value = cpu_pmc_value; + td_pmc->value += delta; +} + +static void +perfmon_td_pmc_unload(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + perfmon_td_pmc_update(td_pmc, cpu_pmc_value); + td_pmc->loaded = false; +} + +static uint64_t +perfmon_td_pmc_read(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->value; +} + +static unsigned int +perfmon_td_get_pmc_index(const struct perfmon_td *td, + const struct perfmon_td_pmc *td_pmc) +{ + size_t pmc_index; + + pmc_index = td_pmc - td->pmcs; + assert(pmc_index < ARRAY_SIZE(td->pmcs)); + return pmc_index; +} + +static struct perfmon_td_pmc * +perfmon_td_get_pmc(struct perfmon_td *td, unsigned int index) +{ + assert(index < ARRAY_SIZE(td->pmcs)); + return &td->pmcs[index]; +} + +void +perfmon_td_init(struct perfmon_td *td) +{ + spinlock_init(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + perfmon_td_pmc_init(perfmon_td_get_pmc(td, i)); + } +} + +static void +perfmon_td_load_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + unsigned int pmc_index, pmc_id, raw_event_id; + struct perfmon_cpu_pmu *cpu_pmu; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + pmc_id = perfmon_td_pmc_id(td_pmc); + raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + pmc_id, raw_event_id); + perfmon_td_pmc_load(td_pmc, cpu_pmc_value); +} + +static void +perfmon_td_unload_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_td_pmc_unload(td_pmc, cpu_pmc_value); +} + +static void +perfmon_td_update_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index); + perfmon_td_pmc_update(td_pmc, cpu_pmc_value); +} + +void +perfmon_td_load(struct perfmon_td *td) +{ + unsigned int pmc_index, pmc_id, raw_event_id; + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_td_pmc *td_pmc; + uint64_t cpu_pmc_value; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + + spinlock_lock(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + td_pmc = perfmon_td_get_pmc(td, i); + + if (!perfmon_td_pmc_used(td_pmc) || perfmon_td_pmc_loaded(td_pmc)) { + continue; + } + + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + pmc_id = perfmon_td_pmc_id(td_pmc); + raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + pmc_id, raw_event_id); + perfmon_td_pmc_load(td_pmc, cpu_pmc_value); + } + + spinlock_unlock(&td->lock); +} + +void +perfmon_td_unload(struct perfmon_td *td) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_td_pmc *td_pmc; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + + spinlock_lock(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + td_pmc = perfmon_td_get_pmc(td, i); + + if (!perfmon_td_pmc_loaded(td_pmc)) { + continue; + } + + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_td_pmc_unload(td_pmc, cpu_pmc_value); + } + + spinlock_unlock(&td->lock); +} + +static void +perfmon_event_load(struct perfmon_event *event, uint64_t pmc_value) +{ + event->pmc_value = pmc_value; +} + +static void +perfmon_event_update(struct perfmon_event *event, uint64_t pmc_value) +{ + uint64_t delta; + + delta = pmc_value - event->pmc_value; + event->value += delta; + event->pmc_value = pmc_value; +} + +static void +perfmon_event_load_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + const struct perfmon_pmc *pmc; + struct perfmon_pmu *pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmu = perfmon_get_pmu(); + pmc_index = perfmon_event_pmc_index(event); + pmc = perfmon_pmu_get_pmc(pmu, pmc_index); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + perfmon_pmc_id(pmc), + perfmon_pmc_raw_event_id(pmc)); + perfmon_event_load(event, cpu_pmc_value); +} + +static void +perfmon_event_load_cpu(struct perfmon_event *event, unsigned int cpu) +{ + perfmon_event_set_type_cpu(event); + event->cpu = cpu; + xcall_call(perfmon_event_load_cpu_remote, event, cpu); +} + +static void +perfmon_event_load_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if (thread_self() == event->thread) { + + if (perfmon_td_pmc_loaded(td_pmc)) { + perfmon_td_update_pmc(td, td_pmc); + } else { + perfmon_td_load_pmc(td, td_pmc); + } + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_load(event, td_pmc_value); +} + +static void +perfmon_event_load_thread(struct perfmon_event *event, struct thread *thread) +{ + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + struct perfmon_pmu *pmu; + const struct perfmon_pmc *pmc; + unsigned int pmc_index; + unsigned long flags; + + pmu = perfmon_get_pmu(); + + thread_ref(thread); + event->thread = thread; + + pmc_index = perfmon_event_pmc_index(event); + pmc = perfmon_pmu_get_pmc(pmu, pmc_index); + td = thread_get_perfmon_td(thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock_intr_save(&td->lock, &flags); + + if (perfmon_td_pmc_used(td_pmc)) { + perfmon_td_pmc_ref(td_pmc); + } else { + perfmon_td_pmc_use(td_pmc, perfmon_pmc_id(pmc), + perfmon_pmc_raw_event_id(pmc)); + } + + spinlock_unlock_intr_restore(&td->lock, flags); + + xcall_call(perfmon_event_load_thread_remote, event, thread_cpu(thread)); +} + +static void +perfmon_event_unload_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_event_pmc_index(event); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_event_update(event, cpu_pmc_value); +} + +static void +perfmon_event_unload_cpu(struct perfmon_event *event) +{ + xcall_call(perfmon_event_unload_cpu_remote, event, event->cpu); + perfmon_event_clear_type_cpu(event); +} + +static void +perfmon_event_unload_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if ((thread_self() == event->thread) && perfmon_td_pmc_loaded(td_pmc)) { + if (perfmon_td_pmc_used(td_pmc)) { + perfmon_td_update_pmc(td, td_pmc); + } else { + perfmon_td_unload_pmc(td, td_pmc); + } + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_update(event, td_pmc_value); +} + +static void +perfmon_event_unload_thread(struct perfmon_event *event) +{ + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + unsigned long flags; + + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock_intr_save(&td->lock, &flags); + perfmon_td_pmc_unref(td_pmc); + spinlock_unlock_intr_restore(&td->lock, flags); + + xcall_call(perfmon_event_unload_thread_remote, event, + thread_cpu(event->thread)); + + thread_unref(event->thread); + event->thread = NULL; +} + +static void +perfmon_event_sync_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_event_pmc_index(event); + cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index); + perfmon_event_update(event, cpu_pmc_value); +} + +static void +perfmon_event_sync_cpu(struct perfmon_event *event) +{ + xcall_call(perfmon_event_sync_cpu_remote, event, event->cpu); +} + +static void +perfmon_event_sync_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if (thread_self() == event->thread) { + perfmon_td_update_pmc(td, td_pmc); + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_update(event, td_pmc_value); +} + +static void +perfmon_event_sync_thread(struct perfmon_event *event) +{ + xcall_call(perfmon_event_sync_thread_remote, event, + thread_cpu(event->thread)); +} + +static int +perfmon_event_attach_pmu(struct perfmon_event *event) +{ + unsigned int raw_event_id = 0; + struct perfmon_pmu *pmu; + struct perfmon_pmc *pmc; + int error; + + pmu = perfmon_get_pmu(); + + if (!(event->flags & PERFMON_EF_RAW)) { + error = perfmon_pmu_translate(pmu, &raw_event_id, event->id); + + if (error) { + return error; + } + } + + error = perfmon_pmu_take_pmc(pmu, &pmc, raw_event_id); + + if (error) { + return error; + } + + event->pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc); + event->flags |= PERFMON_EF_ATTACHED; + event->value = 0; + return 0; +} + +static void +perfmon_event_detach_pmu(struct perfmon_event *event) +{ + struct perfmon_pmu *pmu; + struct perfmon_pmc *pmc; + + pmu = perfmon_get_pmu(); + pmc = perfmon_pmu_get_pmc(pmu, perfmon_event_pmc_index(event)); + perfmon_pmu_put_pmc(pmu, pmc); + event->flags &= ~PERFMON_EF_ATTACHED; +} + +int +perfmon_event_attach(struct perfmon_event *event, struct thread *thread) +{ + int error; + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + error = EINVAL; + goto error; + } + + error = perfmon_event_attach_pmu(event); + + if (error) { + goto error; + } + + perfmon_event_load_thread(event, thread); + + spinlock_unlock(&event->lock); + + return 0; + +error: + spinlock_unlock(&event->lock); + + return error; +} + +int +perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu) +{ + int error; + + if (cpu >= cpu_count()) { + return EINVAL; + } + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + error = EINVAL; + goto out; + } + + error = perfmon_event_attach_pmu(event); + + if (error) { + goto out; + } + + perfmon_event_load_cpu(event, cpu); + error = 0; + +out: + spinlock_unlock(&event->lock); + + return error; +} + +int +perfmon_event_detach(struct perfmon_event *event) +{ + int error; + + spinlock_lock(&event->lock); + + if (!perfmon_event_attached(event)) { + error = EINVAL; + goto out; + } + + if (perfmon_event_type_cpu(event)) { + perfmon_event_unload_cpu(event); + } else { + perfmon_event_unload_thread(event); + } + + perfmon_event_detach_pmu(event); + error = 0; + +out: + spinlock_unlock(&event->lock); + + return error; +} + +uint64_t +perfmon_event_read(struct perfmon_event *event) +{ + uint64_t value; + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + if (perfmon_event_type_cpu(event)) { + perfmon_event_sync_cpu(event); + } else { + perfmon_event_sync_thread(event); + } + } + + value = event->value; + + spinlock_unlock(&event->lock); + + return value; +} + +static uint64_t __init +perfmon_compute_poll_interval(uint64_t pmc_width) +{ + uint64_t cycles, time; + + if (pmc_width == 64) { + cycles = (uint64_t)-1; + } else { + cycles = (uint64_t)1 << pmc_width; + } + + /* + * Assume an unrealistically high upper bound on the number of + * events per cycle to otbain a comfortable margin of safety. + */ + cycles /= 100; + time = cycles / (cpu_get_freq() / 1000); + + if (time < PERFMON_MIN_POLL_INTERVAL) { + log_warning("perfmon: invalid poll interval %llu, forced to %llu", + (unsigned long long)time, + (unsigned long long)PERFMON_MIN_POLL_INTERVAL); + time = PERFMON_MIN_POLL_INTERVAL; + } + + return clock_ticks_from_ms(time); +} + +void __init +perfmon_register(struct perfmon_dev *dev) +{ + const struct perfmon_dev_ops *ops; + + ops = dev->ops; + assert(ops->translate && ops->alloc && ops->free + && ops->start && ops->stop && ops->read); + assert(dev->pmc_width <= 64); + + if ((dev->ops->handle_overflow_intr == NULL) && (dev->poll_interval == 0)) { + dev->poll_interval = perfmon_compute_poll_interval(dev->pmc_width); + } + + perfmon_pmu_set_dev(perfmon_get_pmu(), dev); +} + +void +perfmon_overflow_intr(void) +{ + perfmon_pmu_handle_overflow_intr(perfmon_get_pmu()); +} + +void +perfmon_report_overflow(unsigned int pmc_index) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); +} + +static int __init +perfmon_bootstrap(void) +{ + perfmon_pmu_init(perfmon_get_pmu()); + return 0; +} + +INIT_OP_DEFINE(perfmon_bootstrap, + INIT_OP_DEP(log_setup, true), + INIT_OP_DEP(spinlock_setup, true)); + +static int __init +perfmon_setup(void) +{ + struct perfmon_dev *dev; + + dev = perfmon_pmu_get_dev(perfmon_get_pmu()); + + if (!dev) { + return ENODEV; + } + + for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) { + perfmon_cpu_pmu_init(perfmon_get_cpu_pmu(cpu), cpu, dev); + } + + return 0; +} + +INIT_OP_DEFINE(perfmon_setup, + INIT_OP_DEP(boot_setup_pmu, true), + INIT_OP_DEP(cpu_mp_probe, true), + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(percpu_setup, true), + INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(spinlock_setup, true), + INIT_OP_DEP(syscnt_setup, true)); diff --git a/kern/perfmon.h b/kern/perfmon.h new file mode 100644 index 00000000..0c17752c --- /dev/null +++ b/kern/perfmon.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Performance monitoring based on hardware performance counters. + * + * The hardware layer is represented by a performance monitoring unit (PMU), + * which provides performance monitoring counters (PMCs). + */ + +#ifndef KERN_PERFMON_H +#define KERN_PERFMON_H + +#include <stdint.h> + +#include <kern/init.h> +#include <kern/perfmon_types.h> +#include <kern/thread.h> + +/* + * IDs of generic performance monitoring events. + */ +#define PERFMON_EV_CYCLE 0 +#define PERFMON_EV_REF_CYCLE 1 +#define PERFMON_EV_INSTRUCTION 2 +#define PERFMON_EV_CACHE_REF 3 +#define PERFMON_EV_CACHE_MISS 4 +#define PERFMON_EV_BRANCH 5 +#define PERFMON_EV_BRANCH_MISS 6 +#define PERFMON_NR_GENERIC_EVENTS 7 + +/* + * Event flags. + */ +#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */ +#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */ +#define PERFMON_EF_RAW 0x4 /* Raw event ID, generic if unset */ + +/* + * Performance monitoring operations. + * + * This is a public structure. + * + * All operations are either global but serialized by the caller, or + * processor-local and called with interrupts and preemption disabled. + * + * If the hardware doesn't efficiently support overflow interrupts, the + * handler must be set to NULL, making the perfmon module perdiocally + * check the raw value of the hardware counters. + */ +struct perfmon_dev_ops { + /* + * Convert a generic event ID into a raw event ID. + * + * Global operation. + */ + int (*translate)(unsigned int *raw_event_idp, unsigned int event_id); + + /* + * Allocate a performance monitoring counter globally for the given + * raw event ID, and return the counter ID through the given pointer. + * The range of IDs must start from 0 and increase contiguously. + * + * The PMC index is to be used by the driver when reporting overflows, + * if a custom overflow interrupt handler. + * + * Global operation. + */ + int (*alloc)(unsigned int *pmc_idp, unsigned int pmc_index, + unsigned int raw_event_id); + + /* + * Free an allocated performance monitoring counter. + * + * Global operation. + */ + void (*free)(unsigned int pmc_id); + + /* + * Start a performance monitoring counter for the given raw event ID. + * + * Processor-local operation. + */ + void (*start)(unsigned int pmc_id, unsigned int raw_event_id); + + /* + * Stop a performance monitoring counter. + * + * Processor-local operation. + */ + void (*stop)(unsigned int pmc_id); + + /* + * Read the value of a performance monitoring counter. + * + * Processor-local operation. + */ + uint64_t (*read)(unsigned int pmc_id); + + /* + * Custom overflow interrupt handler. + * + * Processor-local operation. + */ + void (*handle_overflow_intr)(void); +}; + +/* + * Performance monitoring device. + * + * This is a public structure. + * + * The PMC width is expressed in bits. + * + * If the driver doesn't provide an overflow interrupt handler, it may set + * the poll interval, in ticks, to a duration that safely allows the detection + * of a single overflow. A value of 0 lets the perfmon module compute a poll + * interval itself. + */ +struct perfmon_dev { + const struct perfmon_dev_ops *ops; + unsigned int pmc_width; + uint64_t poll_interval; +}; + +/* + * Performance monitoring thread data. + */ +struct perfmon_td; + +/* + * Performance monitoring event. + * + * An event describes a single, well-defined hardware condition and tracks + * its occurrences over a period of time. + */ +struct perfmon_event; + +/* + * Initialize thread-specific data. + */ +void perfmon_td_init(struct perfmon_td *td); + +/* + * Load/unload events attached to a thread on the current processor. + * + * These functions should only be used by the scheduler on a context switch. + * Interrupts and preemption must be disabled when calling these functions. + */ +void perfmon_td_load(struct perfmon_td *td); +void perfmon_td_unload(struct perfmon_td *td); + +/* + * Initialize an event. + */ +int perfmon_event_init(struct perfmon_event *event, unsigned int id, + unsigned int flags); + +/* + * Attach/detach an event to/from a thread or a processor. + * + * Attaching an event allocates hardware resources and enables monitoring. + * The number of occurrences for the given event is reset. + * + * An event can only be attached to one thread or processor at a time. + */ +int perfmon_event_attach(struct perfmon_event *event, struct thread *thread); +int perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu); +int perfmon_event_detach(struct perfmon_event *event); + +/* + * Obtain the number of occurrences of an event. + */ +uint64_t perfmon_event_read(struct perfmon_event *event); + +/* + * Register a PMU device. + * + * Currently, there can only be a single system-wide PMU device, which + * assumes the driver is the same for all processors. + */ +void perfmon_register(struct perfmon_dev *dev); + +/* + * Handle an overflow interrupt. + * + * This function must be called in interrupt context. + */ +void perfmon_overflow_intr(void); + +/* + * Report a PMC overflow. + * + * This function is intended to be used by PMU drivers using a custom + * overflow interrupt handler. + * + * This function must be called in interrupt context. + */ +void perfmon_report_overflow(unsigned int pmc_index); + +/* + * This init operation provides : + * - PMU device registration + */ +INIT_OP_DECLARE(perfmon_bootstrap); + +#endif /* KERN_PERFMON_H */ diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h new file mode 100644 index 00000000..c316312a --- /dev/null +++ b/kern/perfmon_types.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definitions used to avoid inclusion circular dependencies. + */ + +#ifndef KERN_PERFMON_TYPES_H +#define KERN_PERFMON_TYPES_H + +#ifdef CONFIG_PERFMON + +#include <stdbool.h> +#include <stdint.h> + +#include <kern/spinlock_types.h> + +/* + * Maximum number of supported hardware counters. + */ +#define PERFMON_MAX_PMCS CONFIG_PERFMON_MAX_PMCS + +/* + * Performance monitoring event. + * + * An event may be unattached, attached to a thread, or attached to a CPU. + * When it is loaded, the current value of the underlying PMC is saved. + * When it is updated, the delta between the current and saved PMC values + * is added to the event value. + */ +struct perfmon_event { + struct spinlock lock; + unsigned int flags; + unsigned int id; + uint64_t pmc_value; + uint64_t value; + + union { + struct thread *thread; + unsigned int cpu; + }; + + unsigned int pmc_index; +}; + +/* + * Per-thread performance monitoring counter. + * + * Per-thread PMCs are indexed the same way as global PMCs. + * + * A per-thread PMC is referenced when an event is attached to a thread. + * The PMC may only be loaded if the thread is running on a processor, + * as a result of an event being attached to the thread, or the thread + * being dispatched by the scheduler. Note that this allows a transient + * state to be seen where a per-thread PMC is both unused and loaded. + * This happens after detaching an event from a thread, resulting in + * the underlying per-thread PMC to become unused, but if the thread + * is running concurrently, the counter is still loaded. The implementation + * resolves the situation by unloading the counter, which is either + * done by an explicit unload cross-call, or when the scheduler preempts + * the thread and unloads its thread data. + * + * When a per-thread PMC is loaded, the current value of the underlying + * PMC is saved, and when it's updated, the delta between the current + * and saved PMC values is added to the per-thread PMC value. + */ +struct perfmon_td_pmc { + unsigned int nr_refs; + bool loaded; + unsigned int pmc_id; + unsigned int raw_event_id; + uint64_t cpu_pmc_value; + uint64_t value; +}; + +/* + * Per-thread performance monitoring data. + * + * Interrupts must be disabled when locking thread data. + */ +struct perfmon_td { + struct spinlock lock; + struct perfmon_td_pmc pmcs[PERFMON_MAX_PMCS]; +}; + +#endif /* CONFIG_PERFMON */ + +#endif /* KERN_PERFMON_TYPES_H */ diff --git a/kern/task.c b/kern/task.c index 5df72251..3ad863bd 100644 --- a/kern/task.c +++ b/kern/task.c @@ -257,7 +257,7 @@ task_info(struct task *task) printf(TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT " %.2s:%02hu %02u %s\n", (unsigned long)thread, - thread_state_to_chr(thread), + thread_state_to_chr(thread_state(thread)), thread_wchan_desc(thread), (unsigned long)thread_wchan_addr(thread), thread_sched_class_to_str(thread_user_sched_class(thread)), diff --git a/kern/thread.c b/kern/thread.c index e79ef311..a8f58b39 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -100,6 +100,7 @@ #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> +#include <kern/perfmon.h> #include <kern/rcu.h> #include <kern/shell.h> #include <kern/sleepq.h> @@ -600,14 +601,28 @@ thread_runq_wakeup_balancer(struct thread_runq *runq) } thread_clear_wchan(runq->balancer); - runq->balancer->state = THREAD_RUNNING; + atomic_store(&runq->balancer->state, THREAD_RUNNING, ATOMIC_RELAXED); thread_runq_wakeup(runq, runq->balancer); } static void -thread_runq_schedule_prepare(struct thread *thread) +thread_runq_schedule_load(struct thread *thread) { pmap_load(thread->task->map->pmap); + +#ifdef CONFIG_PERFMON + perfmon_td_load(thread_get_perfmon_td(thread)); +#endif +} + +static void +thread_runq_schedule_unload(struct thread *thread) +{ +#ifdef CONFIG_PERFMON + perfmon_td_unload(thread_get_perfmon_td(thread)); +#else + (void)thread; +#endif } static struct thread_runq * @@ -639,6 +654,8 @@ thread_runq_schedule(struct thread_runq *runq) assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL); if (likely(prev != next)) { + thread_runq_schedule_unload(prev); + rcu_report_context_switch(thread_rcu_reader(prev)); spinlock_transfer_owner(&runq->lock, next); @@ -660,10 +677,10 @@ thread_runq_schedule(struct thread_runq *runq) * - The current thread may have been migrated to another processor. */ barrier(); + thread_runq_schedule_load(prev); + next = NULL; runq = thread_runq_local(); - - thread_runq_schedule_prepare(prev); } else { next = NULL; } @@ -1750,7 +1767,7 @@ thread_main(void (*fn)(void *), void *arg) assert(!thread_preempt_enabled()); thread = thread_self(); - thread_runq_schedule_prepare(thread); + thread_runq_schedule_load(thread); spinlock_unlock(&thread_runq_local()->lock); cpu_intr_enable(); @@ -1843,6 +1860,10 @@ thread_init(struct thread *thread, void *stack, thread->stack = stack; strlcpy(thread->name, attr->name, sizeof(thread->name)); +#ifdef CONFIG_PERFMON + perfmon_td_init(thread_get_perfmon_td(thread)); +#endif + if (attr->flags & THREAD_ATTR_DETACHED) { thread->flags |= THREAD_DETACHED; } @@ -1989,8 +2010,9 @@ static void thread_join_common(struct thread *thread) { struct thread_runq *runq; - unsigned long flags, state; struct thread *self; + unsigned long flags; + unsigned int state; self = thread_self(); assert(thread != self); @@ -2060,7 +2082,7 @@ thread_balance(void *arg) for (;;) { runq->idle_balance_ticks = THREAD_IDLE_BALANCE_TICKS; thread_set_wchan(self, runq, "runq"); - self->state = THREAD_SLEEPING; + atomic_store(&self->state, THREAD_SLEEPING, ATOMIC_RELAXED); runq = thread_runq_schedule(runq); assert(runq == arg); @@ -2309,6 +2331,13 @@ thread_setup(void) #define THREAD_STACK_GUARD_INIT_OP_DEPS #endif /* CONFIG_THREAD_STACK_GUARD */ +#ifdef CONFIG_PERFMON +#define THREAD_PERFMON_INIT_OP_DEPS \ + INIT_OP_DEP(perfmon_bootstrap, true), +#else /* CONFIG_PERFMON */ +#define THREAD_PERFMON_INIT_OP_DEPS +#endif /* CONFIG_PERFMON */ + INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(cpumap_setup, true), INIT_OP_DEP(kmem_setup, true), @@ -2318,6 +2347,7 @@ INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(turnstile_setup, true), THREAD_STACK_GUARD_INIT_OP_DEPS + THREAD_PERFMON_INIT_OP_DEPS ); void __init @@ -2421,7 +2451,7 @@ thread_exit(void) runq = thread_runq_local(); spinlock_lock_intr_save(&runq->lock, &flags); - thread->state = THREAD_DEAD; + atomic_store(&thread->state, THREAD_DEAD, ATOMIC_RELAXED); thread_runq_schedule(runq); panic("thread: dead thread walking"); @@ -2461,7 +2491,7 @@ thread_wakeup_common(struct thread *thread, int error) } thread_clear_wchan(thread); - thread->state = THREAD_RUNNING; + atomic_store(&thread->state, THREAD_RUNNING, ATOMIC_RELAXED); thread_unlock_runq(runq, flags); } @@ -2532,7 +2562,7 @@ thread_sleep_common(struct spinlock *interlock, const void *wchan_addr, } thread_set_wchan(thread, wchan_addr, wchan_desc); - thread->state = THREAD_SLEEPING; + atomic_store(&thread->state, THREAD_SLEEPING, ATOMIC_RELAXED); runq = thread_runq_schedule(runq); assert(thread->state == THREAD_RUNNING); @@ -2699,9 +2729,9 @@ thread_report_periodic_event(void) } char -thread_state_to_chr(const struct thread *thread) +thread_state_to_chr(unsigned int state) { - switch (thread->state) { + switch (state) { case THREAD_RUNNING: return 'R'; case THREAD_SLEEPING: @@ -2906,6 +2936,21 @@ thread_key_create(unsigned int *keyp, thread_dtor_fn_t dtor) *keyp = key; } +unsigned int +thread_cpu(const struct thread *thread) +{ + const struct thread_runq *runq; + + runq = atomic_load(&thread->runq, ATOMIC_RELAXED); + return runq->cpu; +} + +unsigned int +thread_state(const struct thread *thread) +{ + return atomic_load(&thread->state, ATOMIC_RELAXED); +} + bool thread_is_running(const struct thread *thread) { diff --git a/kern/thread.h b/kern/thread.h index 6e696fc7..5b5729ce 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -75,6 +75,13 @@ struct thread_sched_data { #define THREAD_KERNEL_PREFIX KERNEL_NAME "_" /* + * Thread states. + */ +#define THREAD_RUNNING 0 +#define THREAD_SLEEPING 1 +#define THREAD_DEAD 2 + +/* * Scheduling policies. * * The idle policy is reserved for the per-CPU idle threads. @@ -323,7 +330,7 @@ thread_wchan_desc(const struct thread *thread) /* * Return a character representation of the state of a thread. */ -char thread_state_to_chr(const struct thread *thread); +char thread_state_to_chr(unsigned int state); static inline const struct thread_sched_data * thread_get_user_sched_data(const struct thread *thread) @@ -705,6 +712,28 @@ thread_get_specific(unsigned int key) return thread_tsd_get(thread_self(), key); } +#ifdef CONFIG_PERFMON +static inline struct perfmon_td * +thread_get_perfmon_td(struct thread *thread) +{ + return &thread->perfmon_td; +} +#endif /* CONFIG_PERFMON */ + +/* + * Return the last CPU on which the thread has been scheduled. + * + * This call isn't synchronized, and the caller may obtain an outdated value. + */ +unsigned int thread_cpu(const struct thread *thread); + +/* + * Return the current state of the given thread. + * + * This call isn't synchronized, and the caller may obtain an outdated value. + */ +unsigned int thread_state(const struct thread *thread); + /* * Return true if the given thread is running. * diff --git a/kern/thread_i.h b/kern/thread_i.h index 0be1e773..9c9a705b 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -24,6 +24,7 @@ #include <kern/atomic.h> #include <kern/cpumap.h> #include <kern/list_types.h> +#include <kern/perfmon_types.h> #include <kern/rcu_types.h> #include <kern/spinlock_types.h> #include <kern/turnstile_types.h> @@ -45,16 +46,6 @@ struct thread_fs_runq; #define THREAD_DETACHED 0x2UL /* Resources automatically released on exit */ /* - * Thread states. - * - * Threads in the running state may not be on a run queue if they're being - * awaken. - */ -#define THREAD_RUNNING 0 -#define THREAD_SLEEPING 1 -#define THREAD_DEAD 2 - -/* * Scheduling data for a real-time thread. */ struct thread_rt_data { @@ -113,7 +104,7 @@ struct thread { const void *wchan_addr; /* (r) */ const char *wchan_desc; /* (r) */ int wakeup_error; /* (r) */ - unsigned short state; /* (r) */ + unsigned int state; /* (a,r) */ /* Sleep queue available for lending */ struct sleepq *priv_sleepq; /* (-) */ @@ -185,6 +176,10 @@ struct thread { struct list task_node; /* (T) */ void *stack; /* (-) */ char name[THREAD_NAME_SIZE]; /* ( ) */ + +#ifdef CONFIG_PERFMON + struct perfmon_td perfmon_td; /* ( ) */ +#endif }; #define THREAD_ATTR_DETACHED 0x1 |