diff options
-rw-r--r-- | arch/x86/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/machine/cpu.c | 59 | ||||
-rw-r--r-- | arch/x86/machine/cpu.h | 54 | ||||
-rw-r--r-- | arch/x86/machine/lapic.c | 19 | ||||
-rw-r--r-- | arch/x86/machine/lapic.h | 1 | ||||
-rw-r--r-- | arch/x86/machine/pmu.h | 32 | ||||
-rw-r--r-- | arch/x86/machine/pmu_amd.c | 249 | ||||
-rw-r--r-- | arch/x86/machine/pmu_intel.c | 383 | ||||
-rw-r--r-- | arch/x86/machine/trap.c | 2 | ||||
-rw-r--r-- | arch/x86/machine/trap.h | 1 | ||||
-rw-r--r-- | kern/Kconfig | 6 | ||||
-rw-r--r-- | kern/Makefile | 2 | ||||
-rw-r--r-- | kern/perfmon.c | 1388 | ||||
-rw-r--r-- | kern/perfmon.h | 251 | ||||
-rw-r--r-- | kern/perfmon_i.h | 42 | ||||
-rw-r--r-- | kern/perfmon_types.h | 26 | ||||
-rw-r--r-- | kern/thread.c | 54 | ||||
-rw-r--r-- | kern/thread.h | 8 | ||||
-rw-r--r-- | kern/thread_i.h | 5 | ||||
-rw-r--r-- | test/Kconfig | 16 | ||||
-rw-r--r-- | test/Makefile | 3 | ||||
-rw-r--r-- | test/test_perfmon_cpu.c | 158 | ||||
-rw-r--r-- | test/test_perfmon_thread.c | 257 | ||||
-rw-r--r-- | test/test_perfmon_torture.c | 281 | ||||
-rwxr-xr-x | tools/build_configs.py | 1 |
25 files changed, 3284 insertions, 17 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9866d93..002ed44 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -61,3 +61,6 @@ x15_SOURCES-y += \ arch/x86/machine/trap_asm.S \ arch/x86/machine/trap.c \ arch/x86/machine/uart.c + +x15_SOURCES-$(CONFIG_PERFMON) += arch/x86/machine/pmu_amd.c +x15_SOURCES-$(CONFIG_PERFMON) += arch/x86/machine/pmu_intel.c diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c index 98d3680..6cbe168 100644 --- a/arch/x86/machine/cpu.c +++ b/arch/x86/machine/cpu.c @@ -69,6 +69,11 @@ #define CPU_INVALID_APIC_ID ((unsigned int)-1) +struct cpu_vendor { + unsigned int id; + const char *str; +}; + /* * MP related CMOS ports, registers and values. */ @@ -155,6 +160,12 @@ static alignas(8) struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __read_mostly; static unsigned long cpu_double_fault_handler; static alignas(CPU_DATA_ALIGN) char cpu_double_fault_stack[TRAP_STACK_SIZE]; +uint64_t +cpu_get_freq(void) +{ + return cpu_freq; +} + void cpu_delay(unsigned long usecs) { @@ -173,6 +184,11 @@ cpu_delay(unsigned long usecs) } while (total > 0); } +static const struct cpu_vendor cpu_vendors[] = { + { CPU_VENDOR_INTEL, "GenuineIntel" }, + { CPU_VENDOR_AMD, "AuthenticAMD" }, +}; + void * __init cpu_get_boot_stack(void) { @@ -182,10 +198,9 @@ cpu_get_boot_stack(void) static void __init cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id) { + memset(cpu, 0, sizeof(*cpu)); cpu->id = id; cpu->apic_id = apic_id; - cpu->state = CPU_STATE_OFF; - cpu->boot_stack = NULL; } static void @@ -430,6 +445,32 @@ cpu_load_idt(const void *idt, size_t size) asm volatile("lidt %0" : : "m" (idtr)); } +static const struct cpu_vendor * +cpu_vendor_lookup(const char *str) +{ + for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) { + if (strcmp(str, cpu_vendors[i].str) == 0) { + return &cpu_vendors[i]; + } + } + + return NULL; +} + +static void __init +cpu_init_vendor_id(struct cpu *cpu) +{ + const struct cpu_vendor *vendor; + + vendor = cpu_vendor_lookup(cpu->vendor_str); + + if (vendor == NULL) { + return; + } + + cpu->vendor_id = vendor->id; +} + /* * Initialize the given cpu structure for the current processor. */ @@ -456,10 +497,12 @@ cpu_init(struct cpu *cpu) eax = 0; cpu_cpuid(&eax, &ebx, &ecx, &edx); max_basic = eax; - memcpy(cpu->vendor_id, &ebx, sizeof(ebx)); - memcpy(cpu->vendor_id + 4, &edx, sizeof(edx)); - memcpy(cpu->vendor_id + 8, &ecx, sizeof(ecx)); - cpu->vendor_id[sizeof(cpu->vendor_id) - 1] = '\0'; + cpu->cpuid_max_basic = max_basic; + memcpy(cpu->vendor_str, &ebx, sizeof(ebx)); + memcpy(cpu->vendor_str + 4, &edx, sizeof(edx)); + memcpy(cpu->vendor_str + 8, &ecx, sizeof(ecx)); + cpu->vendor_str[sizeof(cpu->vendor_str) - 1] = '\0'; + cpu_init_vendor_id(cpu); /* Some fields are only initialized if supported by the processor */ cpu->model_name[0] = '\0'; @@ -498,6 +541,8 @@ cpu_init(struct cpu *cpu) max_extended = eax; } + cpu->cpuid_max_extended = max_extended; + if (max_extended < 0x80000001) { cpu->features3 = 0; cpu->features4 = 0; @@ -617,7 +662,7 @@ void cpu_log_info(const struct cpu *cpu) { log_info("cpu%u: %s, type %u, family %u, model %u, stepping %u", - cpu->id, cpu->vendor_id, cpu->type, cpu->family, cpu->model, + cpu->id, cpu->vendor_str, cpu->type, cpu->family, cpu->model, cpu->stepping); if (strlen(cpu->model_name) > 0) { diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h index 8f9b23c..2464117 100644 --- a/arch/x86/machine/cpu.h +++ b/arch/x86/machine/cpu.h @@ -218,9 +218,13 @@ struct cpu_tss { uint16_t iobp_base; } __packed; -#define CPU_VENDOR_ID_SIZE 13 +#define CPU_VENDOR_STR_SIZE 13 #define CPU_MODEL_NAME_SIZE 49 +#define CPU_VENDOR_UNKNOWN 0 +#define CPU_VENDOR_INTEL 1 +#define CPU_VENDOR_AMD 2 + /* * CPU states. */ @@ -230,8 +234,11 @@ struct cpu_tss { struct cpu { unsigned int id; unsigned int apic_id; - char vendor_id[CPU_VENDOR_ID_SIZE]; + char vendor_str[CPU_VENDOR_STR_SIZE]; char model_name[CPU_MODEL_NAME_SIZE]; + unsigned int cpuid_max_basic; + unsigned int cpuid_max_extended; + unsigned int vendor_id; unsigned int type; unsigned int family; unsigned int model; @@ -537,16 +544,48 @@ cpu_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, : : "memory"); } +/* + * Implies a compiler barrier. + */ static __always_inline void cpu_get_msr(uint32_t msr, uint32_t *high, uint32_t *low) { - asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr)); + asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr) : "memory"); } -static __always_inline void +/* + * Implies a compiler barrier. + */ +static __always_inline uint64_t +cpu_get_msr64(uint32_t msr) +{ + uint32_t high, low; + + cpu_get_msr(msr, &high, &low); + return (((uint64_t)high << 32) | low); +} + +/* + * Implies a full memory barrier. + */ +static inline void cpu_set_msr(uint32_t msr, uint32_t high, uint32_t low) { - asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high)); + asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high) : "memory"); +} + +/* + * Implies a full memory barrier. + */ +static inline void +cpu_set_msr64(uint32_t msr, uint64_t value) +{ + uint32_t low, high; + + low = value & 0xffffffff; + high = value >> 32; + + cpu_set_msr(msr, high, low); } static __always_inline uint64_t @@ -607,6 +646,11 @@ cpu_tlb_flush_va(unsigned long va) } /* + * Get cpu frequency in Hz. + */ +uint64_t cpu_get_freq(void); + +/* * Busy-wait for a given amount of time, in microseconds. */ void cpu_delay(unsigned long usecs); diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c index 3f6d0c2..d74e676 100644 --- a/arch/x86/machine/lapic.c +++ b/arch/x86/machine/lapic.c @@ -25,6 +25,7 @@ #include <kern/log.h> #include <kern/macros.h> #include <kern/panic.h> +#include <kern/perfmon.h> #include <machine/cpu.h> #include <machine/lapic.h> #include <machine/pmap.h> @@ -159,7 +160,7 @@ struct lapic_map { struct lapic_register icr_high; struct lapic_register lvt_timer; const struct lapic_register reserved14; /* Thermal sensor register */ - const struct lapic_register reserved15; /* Performance counters register */ + struct lapic_register lvt_pmc; /* Performance counters register */ struct lapic_register lvt_lint0; struct lapic_register lvt_lint1; struct lapic_register lvt_error; @@ -239,6 +240,7 @@ lapic_setup_registers(void) lapic_write(&lapic_map->lvt_error, TRAP_LAPIC_ERROR); lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1); lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ); + lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF); } void __init @@ -334,6 +336,21 @@ lapic_ipi_broadcast(uint32_t vector) } void +lapic_pmc_of_intr(struct trap_frame *frame) +{ + (void)frame; + +#ifdef CONFIG_PERFMON + perfmon_of_intr(); + + /* Resets the interupt as it is auto cleared when it fires */ + lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF); +#endif + + lapic_eoi(); +} + +void lapic_timer_intr(struct trap_frame *frame) { (void)frame; diff --git a/arch/x86/machine/lapic.h b/arch/x86/machine/lapic.h index 6355da4..4b8385c 100644 --- a/arch/x86/machine/lapic.h +++ b/arch/x86/machine/lapic.h @@ -54,6 +54,7 @@ void lapic_ipi_broadcast(uint32_t vector); /* * Interrupt handlers. */ +void lapic_pmc_of_intr(struct trap_frame *frame); void lapic_timer_intr(struct trap_frame *frame); void lapic_error_intr(struct trap_frame *frame); void lapic_spurious_intr(struct trap_frame *frame); diff --git a/arch/x86/machine/pmu.h b/arch/x86/machine/pmu.h new file mode 100644 index 0000000..009aac5 --- /dev/null +++ b/arch/x86/machine/pmu.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2018 Remy Noel. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Pmu driver modules. + */ + +#ifndef X86_PMU_H +#define X86_PMU_H + +#include <kern/init.h> + +/* + * PMU init modules + * - module fully initialized + */ +INIT_OP_DECLARE(pmu_intel_setup); +INIT_OP_DECLARE(pmu_amd_setup); + +#endif /* X86_PMU_H */ diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c new file mode 100644 index 0000000..8e56bfa --- /dev/null +++ b/arch/x86/machine/pmu_amd.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdint.h> + +#include <kern/init.h> +#include <kern/clock.h> +#include <kern/log.h> +#include <kern/perfmon.h> +#include <machine/cpu.h> +#include <machine/pmu.h> + +/* + * AMD raw event IDs. + */ +#define PMU_AMD_RE_CYCLE 0 +#define PMU_AMD_RE_INSTRUCTION 1 +#define PMU_AMD_RE_CACHE_REF 2 +#define PMU_AMD_RE_CACHE_MISS 3 +#define PMU_AMD_RE_BRANCH 4 +#define PMU_AMD_RE_BRANCH_MISS 5 +#define PMU_AMD_RE_DCACHE_REF 6 +#define PMU_AMD_RE_DCACHE_MISS 7 +#define PMU_AMD_RE_IFETCH_STALL 8 +#define PMU_AMD_RE_INVALID ((unsigned int)-1) + +/* + * PMU MSR addresses + */ +#define PMU_AMD_MSR_PERFEVTSEL0 0xc0010000 +#define PMU_AMD_MSR_PERCTR0 0xc0010004 + +/* + * Event Select Register addresses + */ +#define PMU_AMD_EVTSEL_USR 0x00010000 +#define PMU_AMD_EVTSEL_OS 0x00020000 +#define PMU_AMD_EVTSEL_INT 0x00100000 +#define PMU_AMD_EVTSEL_EN 0x00400000 + +/* + * XXX These properties have the minimum values required by the architecture. + * TODO Per-family/model event availability database. + */ +#define PMU_AMD_NR_PMCS 4 +#define PMU_AMD_PMC_WIDTH 48 + +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ +struct pmu_amd { + unsigned int pmc_bm; +}; + +static struct pmu_amd pmu_amd; + +struct pmu_amd_event_code { + unsigned short event_select; + unsigned short umask; +}; + +/* + * TODO Per-family/model event availability database. + */ +static const struct pmu_amd_event_code pmu_amd_event_codes[] = { + [PMU_AMD_RE_CYCLE] = { 0x76, 0x00 }, + [PMU_AMD_RE_INSTRUCTION] = { 0xc0, 0x00 }, + [PMU_AMD_RE_CACHE_REF] = { 0x80, 0x00 }, + [PMU_AMD_RE_CACHE_MISS] = { 0x81, 0x00 }, + [PMU_AMD_RE_BRANCH] = { 0xc2, 0x00 }, + [PMU_AMD_RE_BRANCH_MISS] = { 0xc3, 0x00 }, + [PMU_AMD_RE_DCACHE_REF] = { 0x40, 0x00 }, + [PMU_AMD_RE_DCACHE_MISS] = { 0x41, 0x00 }, + [PMU_AMD_RE_IFETCH_STALL] = { 0x87, 0x00 }, +}; + +static const unsigned int pmu_amd_generic_events[] = { + [PERFMON_EV_CYCLE] = PMU_AMD_RE_CYCLE, + [PERFMON_EV_REF_CYCLE] = PMU_AMD_RE_INVALID, + [PERFMON_EV_INSTRUCTION] = PMU_AMD_RE_INSTRUCTION, + [PERFMON_EV_CACHE_REF] = PMU_AMD_RE_CACHE_REF, + [PERFMON_EV_CACHE_MISS] = PMU_AMD_RE_CACHE_MISS, + [PERFMON_EV_BRANCH] = PMU_AMD_RE_BRANCH, + [PERFMON_EV_BRANCH_MISS] = PMU_AMD_RE_BRANCH_MISS, +}; + +static struct pmu_amd * +pmu_amd_get(void) +{ + return &pmu_amd; +} + +static void +pmu_amd_info(void) +{ + log_info("pmu: driver: amd, nr_pmcs: %u, pmc_width: %u\n", + PMU_AMD_NR_PMCS, PMU_AMD_PMC_WIDTH); +} + +static int +pmu_amd_translate(unsigned int *raw_event_idp, unsigned int event_id) +{ + assert(event_id < ARRAY_SIZE(pmu_amd_generic_events)); + + *raw_event_idp = pmu_amd_generic_events[event_id]; + return 0; +} + +static int +pmu_amd_alloc(unsigned int *pmc_idp, unsigned int raw_event_id) +{ + struct pmu_amd *pmu; + unsigned int pmc_id; + + /* TODO Per-family/model event availability database */ + (void)raw_event_id; + + pmu = pmu_amd_get(); + + if (pmu->pmc_bm == 0) { + return EAGAIN; + } + + pmc_id = __builtin_ffs(pmu->pmc_bm) - 1; + pmu->pmc_bm &= ~(1U << pmc_id); + *pmc_idp = pmc_id; + + return 0; +} + +static void +pmu_amd_free(unsigned int pmc_id) +{ + struct pmu_amd *pmu; + unsigned int mask; + + assert(pmc_id < PMU_AMD_NR_PMCS); + + pmu = pmu_amd_get(); + mask = (1U << pmc_id); + assert(!(pmu->pmc_bm & mask)); + pmu->pmc_bm |= mask; +} + +static void +pmu_amd_start(unsigned int pmc_id, unsigned int raw_event_id) +{ + const struct pmu_amd_event_code *code; + uint32_t high, low; + + assert(pmc_id < PMU_AMD_NR_PMCS); + assert(raw_event_id < ARRAY_SIZE(pmu_amd_event_codes)); + + code = &pmu_amd_event_codes[raw_event_id]; + + /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ + high = code->event_select >> 8; + low = PMU_AMD_EVTSEL_EN + | PMU_AMD_EVTSEL_OS + | PMU_AMD_EVTSEL_USR + | (code->umask << 8) + | (code->event_select & 0xff); + cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, high, low); +} + +static void +pmu_amd_stop(unsigned int pmc_id) +{ + assert(pmc_id < PMU_AMD_NR_PMCS); + + cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, 0, 0); +} + +static uint64_t +pmu_amd_read(unsigned int pmc_id) +{ + assert(pmc_id < PMU_AMD_NR_PMCS); + + return cpu_get_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id); +} + +static void +pmu_amd_write(unsigned int pmc_id, uint64_t value) +{ + cpu_set_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id, value); +} + +static int __init +pmu_amd_setup(void) +{ + const struct cpu *cpu; + struct pmu_amd *pmu; + struct perfmon_pmu_driver pmu_driver; + + cpu = cpu_current(); + + if (cpu->vendor_id != CPU_VENDOR_AMD) { + return 0; + } + + /* Support AMD Family 10h processors and later */ + if (cpu->family < 16) { + return ENODEV; + } + + pmu = pmu_amd_get(); + pmu->pmc_bm = (1U << PMU_AMD_NR_PMCS) - 1; + + pmu_driver.pmc_width = PMU_AMD_PMC_WIDTH; + /* Set max_tick to half the number of instruction per seconds. */ + pmu_driver.of_max_ticks = + (1UL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ); + + pmu_driver.ops.info = pmu_amd_info; + pmu_driver.ops.translate = pmu_amd_translate; + pmu_driver.ops.alloc = pmu_amd_alloc; + pmu_driver.ops.free = pmu_amd_free; + pmu_driver.ops.start = pmu_amd_start; + pmu_driver.ops.stop = pmu_amd_stop; + pmu_driver.ops.read = pmu_amd_read; + pmu_driver.ops.write = pmu_amd_write; + + return perfmon_pmu_register(&pmu_driver); +} + +INIT_OP_DEFINE(pmu_amd_setup, + INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(log_setup, true)); diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c new file mode 100644 index 0000000..6fbdbae --- /dev/null +++ b/arch/x86/machine/pmu_intel.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdint.h> + +#include <kern/init.h> +#include <kern/clock.h> +#include <kern/log.h> +#include <kern/perfmon.h> +#include <machine/cpu.h> +#include <machine/pmu.h> + +/* + * Intel raw event IDs. + */ +#define PMU_INTEL_RE_CYCLE 0 +#define PMU_INTEL_RE_REF_CYCLE 1 +#define PMU_INTEL_RE_INSTRUCTION 2 +#define PMU_INTEL_RE_CACHE_REF 3 +#define PMU_INTEL_RE_CACHE_MISS 4 +#define PMU_INTEL_RE_BRANCH 5 +#define PMU_INTEL_RE_BRANCH_MISS 6 + +/* + * PMU MSR addresses + */ +#define PMU_INTEL_MSR_PMC0 0x0c1 +#define PMU_INTEL_MSR_EVTSEL0 0x186 + +/* + * V2 MSR addresses + */ +#define PMU_INTEL_MSR_FIXED_CTR0 0x0309 +#define PMU_INTEL_MSR_FIXED_CTR1 0x030a +#define PMU_INTEL_MSR_FIXED_CTR2 0x030b +#define PMU_INTEL_MSR_FIXED_CTR_CTRL 0x038d +#define PMU_INTEL_MSR_GLOBAL_STATUS 0x038e +#define PMU_INTEL_MSR_GLOBAL_CTRL 0x038f +#define PMU_INTEL_MSR_GLOBAL_OVF_CTRL 0x0390 + +/* + * Event Select Register addresses + */ +#define PMU_INTEL_EVTSEL_USR 0x00010000 +#define PMU_INTEL_EVTSEL_OS 0x00020000 +#define PMU_INTEL_EVTSEL_EDGE 0x00040000 +#define PMU_INTEL_EVTSEL_PC 0x00080000 +#define PMU_INTEL_EVTSEL_INT 0x00100000 +#define PMU_INTEL_EVTSEL_EN 0x00400000 +#define PMU_INTEL_EVTSEL_INV 0x00800000 + +#define PMU_INTEL_ID_VERSION_MASK 0x000000ff +#define PMU_INTEL_ID_NR_PMCS_MASK 0x0000ff00 +#define PMU_INTEL_ID_NR_PMCS_OFFSET 8 +#define PMU_INTEL_ID_PMC_WIDTH_MASK 0x00ff0000 +#define PMU_INTEL_ID_PMC_WIDTH_OFFSET 16 +#define PMU_INTEL_ID_EVLEN_MASK 0xff000000 +#define PMU_INTEL_ID_EVLEN_OFFSET 24 +#define PMU_INTEL_ID_EVLEN_MAX 7 + +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ +struct pmu_intel { + unsigned int version; + unsigned int nr_pmcs; + unsigned int pmc_bm; + unsigned int pmc_width; + unsigned int events; +}; + +static struct pmu_intel pmu_intel; + +/* + * Intel hardware events. + */ +#define PMU_INTEL_EVENT_CYCLE 0x01 +#define PMU_INTEL_EVENT_INSTRUCTION 0x02 +#define PMU_INTEL_EVENT_REF_CYCLE 0x04 +#define PMU_INTEL_EVENT_CACHE_REF 0x08 +#define PMU_INTEL_EVENT_CACHE_MISS 0x10 +#define PMU_INTEL_EVENT_BRANCH 0x20 +#define PMU_INTEL_EVENT_BRANCH_MISS 0x40 + +struct pmu_intel_event_code { + unsigned int hw_event_id; + unsigned short event_select; + unsigned short umask; +}; + +static const unsigned int pmu_intel_raw_events[] = { + [PERFMON_EV_CYCLE] = PMU_INTEL_RE_CYCLE, + [PERFMON_EV_REF_CYCLE] = PMU_INTEL_RE_REF_CYCLE, + [PERFMON_EV_INSTRUCTION] = PMU_INTEL_RE_INSTRUCTION, + [PERFMON_EV_CACHE_REF] = PMU_INTEL_RE_CACHE_REF, + [PERFMON_EV_CACHE_MISS] = PMU_INTEL_RE_CACHE_MISS, + [PERFMON_EV_BRANCH] = PMU_INTEL_RE_BRANCH, + [PERFMON_EV_BRANCH_MISS] = PMU_INTEL_RE_BRANCH_MISS, +}; + +static const struct pmu_intel_event_code pmu_intel_event_codes[] = { + [PMU_INTEL_RE_CYCLE] = { PMU_INTEL_EVENT_CYCLE, 0x3c, 0x00 }, + [PMU_INTEL_RE_REF_CYCLE] = { PMU_INTEL_EVENT_REF_CYCLE, 0x3c, 0x01 }, + [PMU_INTEL_RE_INSTRUCTION] = { PMU_INTEL_EVENT_INSTRUCTION, 0xc0, 0x00 }, + [PMU_INTEL_RE_CACHE_REF] = { PMU_INTEL_EVENT_CACHE_REF, 0x2e, 0x4f }, + [PMU_INTEL_RE_CACHE_MISS] = { PMU_INTEL_EVENT_CACHE_MISS, 0x2e, 0x41 }, + [PMU_INTEL_RE_BRANCH] = { PMU_INTEL_EVENT_BRANCH, 0xc4, 0x00 }, + [PMU_INTEL_RE_BRANCH_MISS] = { PMU_INTEL_EVENT_BRANCH_MISS, 0xc5, 0x00 }, +}; + +static struct pmu_intel * +pmu_intel_get(void) +{ + return &pmu_intel; +} + +static uint64_t +pmu_intel_get_status(void) +{ + return cpu_get_msr64(PMU_INTEL_MSR_GLOBAL_STATUS); +} + +static void +pmu_intel_ack_status(uint64_t status) +{ + return cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_OVF_CTRL, status); +} + +/* + * TODO use the compiler built-in once libgcc is linked again. + */ +static unsigned int +pmu_popcount(unsigned int bits) +{ + unsigned int count; + + count = 0; + + while (bits) { + if (bits & 1) { + count++; + } + + bits >>= 1; + } + + return count; +} + +static void +pmu_intel_info(void) +{ + const struct pmu_intel *pmu; + unsigned int nr_events; + + pmu = pmu_intel_get(); + nr_events = pmu_popcount(pmu->events); + log_info("pmu: driver: intel, architectural v%d " + "pmu: nr_pmcs: %u, pmc_width: %u, events: %#x, nr_events: %u\n", + pmu->version, pmu->nr_pmcs, pmu->pmc_width, pmu->events, + nr_events); +} + +static int +pmu_intel_translate(unsigned int *raw_event_idp, unsigned event_id) +{ + if (event_id >= ARRAY_SIZE(pmu_intel_raw_events)) { + return EINVAL; + } + + *raw_event_idp = pmu_intel_raw_events[event_id]; + return 0; +} + +static int +pmu_intel_alloc(unsigned int *pmc_idp, unsigned int raw_event_id) +{ + struct pmu_intel *pmu; + unsigned int pmc_id; + unsigned int hw_event_id; + + assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes)); + + pmu = pmu_intel_get(); + hw_event_id = pmu_intel_event_codes[raw_event_id].hw_event_id; + + if (!(pmu->events & hw_event_id)) { + return EINVAL; + } + + if (pmu->pmc_bm == 0) { + return EAGAIN; + } + + pmc_id = __builtin_ffs(pmu->pmc_bm) - 1; + pmu->pmc_bm &= ~(1U << pmc_id); + *pmc_idp = pmc_id; + return 0; +} + +static void +pmu_intel_free(unsigned int pmc_id) +{ + struct pmu_intel *pmu; + unsigned int mask; + + pmu = pmu_intel_get(); + mask = (1U << pmc_id); + assert(!(pmu->pmc_bm & mask)); + pmu->pmc_bm |= mask; +} + +static void +pmu_intel_start(unsigned int pmc_id, unsigned int raw_event_id) +{ + const struct pmu_intel_event_code *code; + struct pmu_intel *pmu; + uint32_t evtsel; + + assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes)); + + code = &pmu_intel_event_codes[raw_event_id]; + pmu = pmu_intel_get(); + + /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ + evtsel = PMU_INTEL_EVTSEL_EN + | PMU_INTEL_EVTSEL_OS + | PMU_INTEL_EVTSEL_USR + | (code->umask << 8) + | code->event_select; + if (pmu->version >= 2) { + evtsel |= PMU_INTEL_EVTSEL_INT; + } + cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, evtsel); +} + +static void +pmu_intel_stop(unsigned int pmc_id) +{ + cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, 0); +} + +static uint64_t +pmu_intel_read(unsigned int pmc_id) +{ + return cpu_get_msr64(PMU_INTEL_MSR_PMC0 + pmc_id); +} + +static void +pmu_intel_write(unsigned int pmc_id, uint64_t value) +{ + cpu_set_msr64(PMU_INTEL_MSR_PMC0 + pmc_id, value); +} + +static int +pmu_intel_consume_bits(uint64_t *bits) +{ + int bit; + + bit = __builtin_ffsll(*bits) - 1; + + if (bit < 0) { + return bit; + } + + *bits &= ~(1U << bit); + return bit; +} + +static void +pmu_intel_handle_of_intr_v2(void) +{ + struct pmu_intel *pmu; + uint64_t status; + int pmc_id; + + status = pmu_intel_get_status(); + + if (status == 0) { + return; + } + + pmu_intel_ack_status(status); + pmu = pmu_intel_get(); + + status &= ((1ULL << pmu->pmc_width) - 1); + + for (;;) { + pmc_id = pmu_intel_consume_bits(&status); + + if (pmc_id < 0) { + break; + } + + perfmon_cpu_on_pmc_of(pmc_id); + } +} + +static int __init +pmu_intel_setup(void) +{ + const struct cpu *cpu; + struct pmu_intel *pmu; + struct perfmon_pmu_driver pmu_driver; + unsigned int eax, ebx, ecx, edx, ev_len; + + cpu = cpu_current(); + eax = 0xa; + + if (cpu->vendor_id != CPU_VENDOR_INTEL) { + return 0; + } + + if (cpu->cpuid_max_basic < eax) { + return ENODEV; + } + + pmu = pmu_intel_get(); + cpu_cpuid(&eax, &ebx, &ecx, &edx); + pmu->version = eax & PMU_INTEL_ID_VERSION_MASK; + + if (pmu->version == 0) { + return ENODEV; + } + + pmu->nr_pmcs = (eax & PMU_INTEL_ID_NR_PMCS_MASK) + >> PMU_INTEL_ID_NR_PMCS_OFFSET; + pmu->pmc_bm = (1U << pmu->nr_pmcs ) - 1; + pmu->pmc_width = (eax & PMU_INTEL_ID_PMC_WIDTH_MASK) + >> PMU_INTEL_ID_PMC_WIDTH_OFFSET; + ev_len = (eax & PMU_INTEL_ID_EVLEN_MASK) >> PMU_INTEL_ID_EVLEN_OFFSET; + + assert(ev_len <= PMU_INTEL_ID_EVLEN_MAX); + + pmu->events = ~ebx & ((1U << ev_len) - 1); + + pmu_driver.pmc_width = pmu->pmc_width; + pmu_driver.ops.info = pmu_intel_info; + pmu_driver.ops.translate = pmu_intel_translate; + pmu_driver.ops.alloc = pmu_intel_alloc; + pmu_driver.ops.free = pmu_intel_free; + pmu_driver.ops.start = pmu_intel_start; + pmu_driver.ops.stop = pmu_intel_stop; + pmu_driver.ops.read = pmu_intel_read; + pmu_driver.ops.write = pmu_intel_write; + + if (pmu->version >= 2) { + pmu_driver.ops.handle_of_intr = pmu_intel_handle_of_intr_v2; + pmu_driver.of_max_ticks = 0; + } else { + /* Set max_tick to half the number of instruction per seconds. */ + pmu_driver.ops.handle_of_intr = NULL; + pmu_driver.of_max_ticks = + (1ULL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ); + } + + return perfmon_pmu_register(&pmu_driver); +} + +INIT_OP_DEFINE(pmu_intel_setup, + INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(log_setup, true)); diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c index 534b3f6..aaf49ea 100644 --- a/arch/x86/machine/trap.c +++ b/arch/x86/machine/trap.c @@ -210,9 +210,11 @@ trap_setup(void) trap_install(TRAP_XCALL, TRAP_HF_INTR, cpu_xcall_intr); trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_INTR, cpu_thread_schedule_intr); trap_install(TRAP_CPU_HALT, TRAP_HF_INTR, cpu_halt_intr); + trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, trap_default); trap_install(TRAP_LAPIC_TIMER, TRAP_HF_INTR, lapic_timer_intr); trap_install(TRAP_LAPIC_ERROR, TRAP_HF_INTR, lapic_error_intr); trap_install(TRAP_LAPIC_SPURIOUS, TRAP_HF_INTR, lapic_spurious_intr); + trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, lapic_pmc_of_intr); return 0; } diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h index af6fd6b..c5bdc1f 100644 --- a/arch/x86/machine/trap.h +++ b/arch/x86/machine/trap.h @@ -62,6 +62,7 @@ #define TRAP_XCALL 238 #define TRAP_THREAD_SCHEDULE 239 #define TRAP_CPU_HALT 240 +#define TRAP_LAPIC_PMC_OF 252 #define TRAP_LAPIC_TIMER 253 #define TRAP_LAPIC_ERROR 254 #define TRAP_LAPIC_SPURIOUS 255 diff --git a/kern/Kconfig b/kern/Kconfig index fced67c..5e0e5eb 100644 --- a/kern/Kconfig +++ b/kern/Kconfig @@ -94,6 +94,12 @@ config THREAD_STACK_GUARD If unsure, disable. +config PERFMON + bool "Performances monitoring counters" + ---help--- + Enable the performance monitoring counters (perfmon API). While in use, + it might lengthen threads scheduling critical section a bit. + endmenu menu "Debugging" diff --git a/kern/Makefile b/kern/Makefile index ab7d6b5..5b04fcb 100644 --- a/kern/Makefile +++ b/kern/Makefile @@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c + +x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c diff --git a/kern/perfmon.c b/kern/perfmon.c new file mode 100644 index 0000000..c910069 --- /dev/null +++ b/kern/perfmon.c @@ -0,0 +1,1388 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2015 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * The perfomance monitoring modules allows to manage performance monitoring as + * event groups. Each physical performance monitoring counter (pmc) may be + * referenced by perfmon events, which are theelves groupped in perfmon groups. + * Groups can then be attached to either threads or cpus into perfmon + * grouplists. + * + * In order to guarantee that thread relocation, is properly handled, events + * types are reseved on perfomance monitoring units (pmu) for all cpus for every + * event of a group when it is attached. Therefore a group attach may fail if no + * compatible pmc is available globally. + * + * Locking order : interrupts -> thread runq -> grouplist -> group + * + * TODO API to differenciate user and kernel events. + */ + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <kern/error.h> +#include <kern/init.h> +#include <kern/kmem.h> +#include <kern/list.h> +#include <kern/log.h> +#include <kern/macros.h> +#include <kern/panic.h> +#include <kern/percpu.h> +#include <kern/perfmon.h> +#include <kern/perfmon_i.h> +#include <kern/spinlock.h> +#include <kern/thread.h> +#include <kern/timer.h> +#include <kern/xcall.h> +#include <machine/cpu.h> +#include <machine/pmu.h> + +/* + * Performance monitoring event. + * + * When a group is attached, each of its events is associated to a PMC, + * adding a reference in the process. + */ +struct perfmon_event { + uint64_t count; + uint64_t prev; + uint64_t overflow_id; + struct list node; + int flags; + unsigned int type; + unsigned int id; + unsigned int pmc_index; +#ifdef CONFIG_PERFMON_TEST + uint64_t value; + bool set_value; +#endif +}; + +#define PERFMON_INVALID_CPU ((unsigned int)-1) + +/* + * Group types. + */ +#define PERFMON_GT_UNKNOWN 0 +#define PERFMON_GT_CPU 1 +#define PERFMON_GT_THREAD 2 + +/* + * Group States flags. + */ +#define PERFMON_GF_ATTACHED 1 +#define PERFMON_GF_ENABLED 2 +#define PERFMON_GF_LOADED 4 +#define PERFMON_GF_PENDING_DISABLE 8 + +/* + * Group possible states are handled through the flags attribute. + * - A group can either be unattached or attached(to a thread or a cpu) + * - An attached group may be enabled or not. + * - An enabled group may be loaded or not e.g. have actual running + * performance counters on a CPU. + * + * When a group is attached, some ressources are reserved for it so it can be + * monitored at any time. + * When a group is enabled it will get loaded when needed: + * -Cpu groups stay loaded as long as they are enabled. + * -Thread groups are loaded when running. + * When a group is loaded its performance counters are currently enabled. + * + * The PENDING_DISABLE is here so that a remote thread can be disabled when it + * unscedule itself. + * + * Note that a non-attached group can only be referenced from the api. Since + * manipulating the same group from different threads as the same time is not + * supported, the code does not bother to lock it when the group is assumed + * un-attached. + * + * About thread-type group counters synchronization: + * - The groups are synchronized when their thread is unscheduled which + * means their counter value is updated and pending counter changes + * (like unloading) are performed. + * - Since all operations requires the group to be locked, it is mandatory + * to unlock the group before xcalling any remote operation in order to + * avoid a deadlock. + * - Any remote thread operation that gets executed after the thread got + * unscheduled will have nothing to do if the current thread is not the + * target one since the target thread have been unloaded inbetween. + */ +struct perfmon_group { + struct list node; + struct list events; + struct thread *thread; + struct spinlock lock; + unsigned int cpu; + short flags; + unsigned short type; +}; + +/* + * List of all groups attached to a single monitored object, either a CPU + * or a thread. + */ +struct perfmon_grouplist { + struct list groups; + struct spinlock lock; +}; + +/* + * Maximum number of supported hardware counters. + */ +#define PERFMON_MAX_PMCS 64 + +/* + * Performance monitoring counter. + * + * When a PMC is valid, it maps a raw event to a hardware counter. + * A PMC is valid if and only if its reference count isn't zero. + */ +struct perfmon_pmc { + unsigned int nr_refs; + unsigned int raw_event_id; + unsigned int id; +}; + +/* + * Performance monitoring unit. + * + * There is a single system-wide logical PMU, used to globally allocate + * PMCs. Reserving a counter across the entire system ensures thread + * migration isn't hindered by performance monitoring. + */ +struct perfmon_pmu { + struct spinlock lock; + unsigned int nr_pmcs; + struct perfmon_pmc pmcs[PERFMON_MAX_PMCS]; +}; + +/* + * Per-CPU performance monitoring counter. + * + * When a group is loaded on a processor, the per-CPU PMCs of its events + * get referenced. When a per-CPU PMC is referenced, its underlying hardware + * counter is active. + * + * Preemption must be disabled on access. + */ +struct perfmon_cpu_pmc { + unsigned int nr_refs; + uint64_t prev_value; + uint64_t overflow_id; +}; + +/* + * Per-CPU performance monitoring unit. + * + * The per-CPU PMCs are indexed the same way as the global PMCs. + * + * Preemption must be disabled on access. + */ +struct perfmon_cpu_pmu { + struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS]; + struct timer of_timer; + unsigned int cpu_id; +}; + +static struct perfmon_pmu_driver pmu_driver __read_mostly; + +static struct perfmon_pmu perfmon_pmu; +static unsigned int perfmon_pmc_id_to_index[PERFMON_MAX_PMCS]; + +static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu; + +/* + * Cache of thread-specific group lists. + */ +static struct kmem_cache perfmon_grouplist_cache; + +/* + * CPU specific group lists. + */ +static struct perfmon_grouplist *perfmon_cpu_grouplist __percpu; + +static inline int +perfmon_translate(unsigned int *raw_event_idp, unsigned int event_type, + unsigned int event_id) +{ + switch (event_type) { + case PERFMON_ET_RAW: + *raw_event_idp = event_id; + return 0; + case PERFMON_ET_GENERIC: + return pmu_driver.ops.translate(raw_event_idp, event_id); + default: + panic("perfmon: unsupported event type"); + } +} + +static int +perfmon_pmc_alloc(struct perfmon_pmc **pmcp, unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + size_t i; + int error; + + if (perfmon_pmu.nr_pmcs == ARRAY_SIZE(perfmon_pmu.pmcs)) { + return EAGAIN; + } + + for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) { + pmc = &perfmon_pmu.pmcs[i]; + + if (pmc->nr_refs == 0) { + break; + } + } + + assert(i < ARRAY_SIZE(perfmon_pmu.pmcs)); + error = pmu_driver.ops.alloc(&pmc->id, raw_event_id); + + if (error) { + return error; + } + + pmc->raw_event_id = raw_event_id; + perfmon_pmu.nr_pmcs++; + *pmcp = pmc; + + return 0; +} + +static struct perfmon_pmc * +perfmon_pmc_lookup(unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + size_t i; + + if (perfmon_pmu.nr_pmcs == 0) { + return NULL; + } + + for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) { + pmc = &perfmon_pmu.pmcs[i]; + + if ((pmc->nr_refs != 0) && (pmc->raw_event_id == raw_event_id)) { + return pmc; + } + } + + return NULL; +} + +static inline unsigned int +perfmon_pmc_index(const struct perfmon_pmc *pmc) +{ + unsigned int index; + + index = pmc - perfmon_pmu.pmcs; + assert(index < ARRAY_SIZE(perfmon_pmu.pmcs)); + return index; +} + +/* + * Obtain a reference on a PMC for the given event. + * + * If there is no existing PMC suitable for this event, allocate one. + */ +static int +perfmon_pmc_get(struct perfmon_pmc **pmcp, const struct perfmon_event *event) +{ + struct perfmon_pmc *pmc; + unsigned int raw_event_id; + unsigned int pmc_index; + int error; + + error = perfmon_translate(&raw_event_id, event->type, event->id); + + if (error) { + return error; + } + + spinlock_lock(&perfmon_pmu.lock); + + pmc = perfmon_pmc_lookup(raw_event_id); + + if (pmc == NULL) { + error = perfmon_pmc_alloc(&pmc, raw_event_id); + + if (error) { + goto out; + } + pmc_index = perfmon_pmc_index(pmc); + assert(perfmon_pmc_id_to_index[pmc->id] == UINT32_MAX); + perfmon_pmc_id_to_index[pmc->id] = pmc_index; + } + + pmc->nr_refs++; + +out: + spinlock_unlock(&perfmon_pmu.lock); + + if (error) { + return error; + } + *pmcp = pmc; + + return 0; +} + +/* + * Release a reference on a PMC. + */ +static void +perfmon_pmc_put(struct perfmon_pmc *pmc) +{ + spinlock_lock(&perfmon_pmu.lock); + + assert(pmc->nr_refs != 0); + pmc->nr_refs--; + + if (pmc->nr_refs == 0) { + pmu_driver.ops.free(pmc->id); + assert(perfmon_pmc_id_to_index[pmc->id] != UINT32_MAX); + perfmon_pmc_id_to_index[pmc->id] = UINT32_MAX; + } + + spinlock_unlock(&perfmon_pmu.lock); +} + +static inline struct perfmon_pmc * +perfmon_pmc_from_index(unsigned int index) +{ + assert(index < ARRAY_SIZE(perfmon_pmu.pmcs)); + return &perfmon_pmu.pmcs[index]; +} + +static void +perfmon_grouplist_ctor(void *arg) +{ + struct perfmon_grouplist *grouplist; + + grouplist = arg; + + list_init(&grouplist->groups); + spinlock_init(&grouplist->lock); +} + +static struct perfmon_grouplist * +perfmon_grouplist_create(void) +{ + return kmem_cache_alloc(&perfmon_grouplist_cache); +} + +static void +perfmon_grouplist_destroy(struct perfmon_grouplist *grouplist) +{ + kmem_cache_free(&perfmon_grouplist_cache, grouplist); +} + +static void perfmon_check_of(struct timer *timer); + +static void __init +perfmon_cpu_pmu_init(unsigned int cpuid) +{ + unsigned int i; + struct perfmon_cpu_pmu *cpu_pmu; + + cpu_pmu = percpu_ptr(perfmon_cpu_pmu, cpuid); + cpu_pmu->cpu_id = cpuid; + if (!pmu_driver.ops.handle_of_intr) { + /* XXX: using high prio instead or INTR because we might xcall from the + * callbacks. + */ + timer_init(&cpu_pmu->of_timer, &perfmon_check_of, TIMER_HIGH_PRIO); + timer_schedule(&cpu_pmu->of_timer, pmu_driver.of_max_ticks); + } + + for (i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { + struct perfmon_cpu_pmc *pmc; + + pmc = &cpu_pmu->pmcs[i]; + + pmc->nr_refs = 0; + pmc->overflow_id = 0; + + } +} + +static struct perfmon_cpu_pmc * +perfmon_cpu_pmu_get_pmc_from_id(unsigned int pmc_id) +{ + unsigned int pmc_index; + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_cpu_pmc *cpu_pmc; + + assert(perfmon_pmc_id_to_index[pmc_id] != UINT32_MAX); + pmc_index = perfmon_pmc_id_to_index[pmc_id]; + + /* TODO: this may be called many times in a row. We may want to have it + * passed to the function. + */ + cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu); + cpu_pmc = &cpu_pmu->pmcs[pmc_index]; + + assert(cpu_pmc->nr_refs != 0); + + return cpu_pmc; +} + +void +perfmon_cpu_on_pmc_of(unsigned int pmc_id) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + cpu_pmc = perfmon_cpu_pmu_get_pmc_from_id(pmc_id); + cpu_pmc->overflow_id++; +} + +static void +perfmon_check_of_remote(void *arg) +{ + perfmon_check_of(arg); +} + +static void +perfmon_check_pmc_of(struct perfmon_cpu_pmc *cpu_pmc, uint64_t value) +{ + uint64_t prev; + + prev = cpu_pmc->prev_value; + if (prev > value) { + /* Overflow */ + cpu_pmc->overflow_id++; + } + cpu_pmc->prev_value = value; +} + +static void +perfmon_check_of(struct timer *timer) +{ + struct perfmon_pmc *pmc; + struct perfmon_cpu_pmc *cpu_pmc; + struct perfmon_cpu_pmu *cpu_pmu; + uint64_t value; + + cpu_pmu = structof(timer, struct perfmon_cpu_pmu, of_timer); + if (cpu_pmu->cpu_id != cpu_id()) + { + xcall_call(perfmon_check_of_remote, timer, cpu_pmu->cpu_id); + return; + } + + for (size_t i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) { + pmc = perfmon_pmc_from_index(i); + if (pmc->nr_refs == 0) { + continue; + } + + cpu_pmc = &cpu_pmu->pmcs[i]; + value = pmu_driver.ops.read(pmc->id); + + perfmon_check_pmc_of(cpu_pmc, value); + } + + timer_schedule(timer, pmu_driver.of_max_ticks); +} + +static void +perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + cpu_pmc = &cpu_pmu->pmcs[pmc_index]; + + if (cpu_pmc->nr_refs == 0) { + pmu_driver.ops.start(perfmon_pmu.pmcs[pmc_index].id, + perfmon_pmu.pmcs[pmc_index].raw_event_id); + cpu_pmc->prev_value = pmu_driver.ops.read(perfmon_pmu.pmcs[pmc_index].id); + } + + cpu_pmc->nr_refs++; +} + +static void +perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + cpu_pmc = &cpu_pmu->pmcs[pmc_index]; + assert(cpu_pmc->nr_refs != 0); + cpu_pmc->nr_refs--; + + if (cpu_pmc->nr_refs == 0) { + pmu_driver.ops.stop(perfmon_pmu.pmcs[pmc_index].id); + } +} + +void +perfmon_of_intr(void) +{ + assert(pmu_driver.ops.handle_of_intr); + pmu_driver.ops.handle_of_intr(); +} + +int +perfmon_pmu_register(struct perfmon_pmu_driver *driver) +{ + struct perfmon_pmu_ops *ops = &driver->ops; + + assert(ops->info && ops->translate && ops->alloc + && ops->free && ops->start && ops->stop); + assert(!ops->handle_of_intr != !driver->of_max_ticks); + + if (pmu_driver.ops.info) { + /* Already initialized */ + assert(0); + return EINVAL; + } + pmu_driver = *driver; + + return 0; +} + +static int __init +perfmon_bootstrap(void) +{ + kmem_cache_init(&perfmon_grouplist_cache, "perfmon_grouplist", + sizeof(struct perfmon_grouplist), 0, + perfmon_grouplist_ctor, 0); + + return 0; +} + +INIT_OP_DEFINE(perfmon_bootstrap, + INIT_OP_DEP(kmem_setup, true)); + +static int __init +perfmon_setup(void) +{ + struct perfmon_grouplist *grouplist; + unsigned int i; + + spinlock_init(&perfmon_pmu.lock); + perfmon_pmu.nr_pmcs = 0; + + for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) { + perfmon_pmu.pmcs[i].nr_refs = 0; + } + for (i = 0; i < ARRAY_SIZE(perfmon_pmc_id_to_index); i++) { + perfmon_pmc_id_to_index[i] = UINT32_MAX; + } + + for (i = 0; i < cpu_count(); i++) { + perfmon_cpu_pmu_init(i); + } + + for (i = 0; i < cpu_count(); i++) { + grouplist = perfmon_grouplist_create(); + + if (grouplist == NULL) { + panic("perfmon: unable to create cpu grouplists"); + } + + percpu_var(perfmon_cpu_grouplist, i) = grouplist; + } + + if (!pmu_driver.ops.info) { + log_err("unable to start perfmon: no compatible pmu driver available"); + return ENODEV; + } + pmu_driver.ops.info(); + + return 0; +} + +INIT_OP_DEFINE(perfmon_setup, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(kmem_setup, true), + INIT_OP_DEP(panic_setup, true), + INIT_OP_DEP(percpu_setup, true), + INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(pmu_amd_setup, false), + INIT_OP_DEP(pmu_intel_setup, false), + INIT_OP_DEP(spinlock_setup, true), + INIT_OP_DEP(thread_setup, true), + INIT_OP_DEP(trap_setup, true)); + +static void +perfmon_check_event_args(unsigned int type, unsigned int id, int flags) +{ + (void)type; + (void)id; + (void)flags; + assert((type == PERFMON_ET_RAW) || (type == PERFMON_ET_GENERIC)); + assert((type != PERFMON_ET_GENERIC) || (id < PERFMON_NR_GENERIC_EVENTS)); + assert((flags & PERFMON_EF_MASK) == flags); + assert((flags & (PERFMON_EF_KERN | PERFMON_EF_USER))); +} + +int +perfmon_event_create(struct perfmon_event **eventp, unsigned int type, + unsigned int id, int flags) +{ + struct perfmon_event *event; + + perfmon_check_event_args(type, id, flags); + + event = kmem_alloc(sizeof(*event)); + + if (event == NULL) { + return ENOMEM; + } + + event->count = 0; + list_node_init(&event->node); + event->flags = flags; + event->type = type; + event->id = id; + *eventp = event; + return 0; +} + +void +perfmon_event_destroy(struct perfmon_event *event) +{ + kmem_free(event, sizeof(*event)); +} + +uint64_t +perfmon_event_read(const struct perfmon_event *event) +{ + return event->count; +} + +#ifdef CONFIG_PERFMON_TEST + +int +perfmon_event_write(struct perfmon_event *event, uint64_t value) +{ + if (!pmu_driver.ops.write) { + return ENODEV; + } + event->value = value; + event->set_value = true; + + return 0; +} + +int +perfmon_get_pmc_width(void) +{ + return pmu_driver.pmc_width; +} + +#endif /* CONFIG_PERFMON_TEST */ + +void +perfmon_event_reset(struct perfmon_event *event) +{ + event->count = 0; +} + +static void +perfmon_event_sync(struct perfmon_cpu_pmu *cpu_pmu, + struct perfmon_event *event) +{ + struct perfmon_pmc *pmc; + struct perfmon_cpu_pmc *cpu_pmc; + uint64_t count; + int diff; + + pmc = perfmon_pmc_from_index(event->pmc_index); + cpu_pmc = &cpu_pmu->pmcs[event->pmc_index]; + count = pmu_driver.ops.read(pmc->id); + + if (!pmu_driver.ops.handle_of_intr) { + /* Force pmc overflow status update */ + perfmon_check_pmc_of(cpu_pmc, count); + } + + if (unlikely(event->overflow_id != cpu_pmc->overflow_id)) { + assert(cpu_pmc->overflow_id > event->overflow_id); + diff = cpu_pmc->overflow_id > event->overflow_id; + /* diff is very likely 1. */ + event->count += (1ULL << pmu_driver.pmc_width) * diff + - event->prev + count; + event->overflow_id = cpu_pmc->overflow_id; + } else { + event->count += count - event->prev; + } + event->prev = count; +} + +static inline int +perfmon_group_attached(const struct perfmon_group *group) +{ + return group->flags & PERFMON_GF_ATTACHED; +} + +static inline int +perfmon_group_enabled(const struct perfmon_group *group) +{ + return group->flags & PERFMON_GF_ENABLED; +} + +static inline int +perfmon_group_loaded(const struct perfmon_group *group) +{ + return group->flags & PERFMON_GF_LOADED; +} + +static inline int +perfmon_group_stopping(const struct perfmon_group *group) +{ + return group->flags & PERFMON_GF_PENDING_DISABLE; +} + +int +perfmon_group_create(struct perfmon_group **groupp) +{ + struct perfmon_group *group; + + group = kmem_alloc(sizeof(*group)); + + if (group == NULL) { + return ENOMEM; + } + + list_init(&group->events); + spinlock_init(&group->lock); + group->cpu = PERFMON_INVALID_CPU; + group->flags = 0; + group->type = PERFMON_GT_UNKNOWN; + *groupp = group; + return 0; +} + +int +perfmon_group_destroy(struct perfmon_group *group) +{ + struct perfmon_event *event; + + if (perfmon_group_attached(group)) { + return EINVAL; + } + assert (!perfmon_group_enabled(group)); + + while (!list_empty(&group->events)) { + event = list_first_entry(&group->events, struct perfmon_event, node); + list_remove(&event->node); + perfmon_event_destroy(event); + } + + kmem_free(group, sizeof(*group)); + return 0; +} + +void +perfmon_group_add(struct perfmon_group *group, struct perfmon_event *event) +{ + assert(list_node_unlinked(&event->node)); + assert(!perfmon_group_attached(group)); + + /* TODO: check that we we do not have the same event twice. */ + + list_insert_tail(&group->events, &event->node); +} + +/* + * Attach a group to the global logical PMU. + * + * For each event in the group, obtain a reference on a PMC. + */ +static int +perfmon_group_attach_pmu(struct perfmon_group *group) +{ + struct perfmon_event *event, *tmp; + struct perfmon_pmc *pmc = NULL; + int error; + + assert(!perfmon_group_attached(group)); + + list_for_each_entry(&group->events, event, node) { + error = perfmon_pmc_get(&pmc, event); + + if (error) { + goto error_pmc; + } + + event->pmc_index = perfmon_pmc_index(pmc); + } + + return 0; + +error_pmc: + list_for_each_entry(&group->events, tmp, node) { + if (tmp == event) { + break; + } + + perfmon_pmc_put(perfmon_pmc_from_index(tmp->pmc_index)); + } + + return error; +} + +static void +perfmon_group_detach_pmu(struct perfmon_group *group) +{ + struct perfmon_event *event; + + assert(perfmon_group_attached(group)); + + list_for_each_entry(&group->events, event, node) { + perfmon_pmc_put(perfmon_pmc_from_index(event->pmc_index)); + } +} + +int +perfmon_group_attach(struct perfmon_group *group, struct thread *thread) +{ + struct perfmon_grouplist *grouplist; + unsigned long flags; + int error; + + assert(group->type == PERFMON_GT_UNKNOWN); + + error = perfmon_group_attach_pmu(group); + + if (error) { + return error; + } + + thread_ref(thread); + group->thread = thread; + group->type = PERFMON_GT_THREAD; + group->flags |= PERFMON_GF_ATTACHED; + + grouplist = thread->perfmon_groups; + + spinlock_lock_intr_save(&grouplist->lock, &flags); + list_insert_tail(&grouplist->groups, &group->node); + spinlock_unlock_intr_restore(&grouplist->lock, flags); + + return 0; +} + +int +perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu) +{ + int error; + struct perfmon_grouplist *grouplist; + + assert(cpu < cpu_count()); + assert(group->type == PERFMON_GT_UNKNOWN); + + error = perfmon_group_attach_pmu(group); + + if (error) { + return error; + } + + group->cpu = cpu; + group->type = PERFMON_GT_CPU; + group->flags |= PERFMON_GF_ATTACHED; + + grouplist = percpu_var(perfmon_cpu_grouplist, cpu); + + spinlock_lock(&grouplist->lock); + list_insert_tail(&grouplist->groups, &group->node); + spinlock_unlock(&grouplist->lock); + + return 0; +} + +int +perfmon_group_detach(struct perfmon_group *group) +{ + unsigned long flags; + unsigned long grouplist_flags; + struct perfmon_grouplist *grouplist; + struct thread *prev_thread; + unsigned int type; + int ret; + + type = group->type; + grouplist_flags = 0; /* silence Wmaybe-uninitialized warning. */ + ret = 0; + prev_thread = NULL; + + switch (type) { + case PERFMON_GT_THREAD: + grouplist = group->thread->perfmon_groups; + spinlock_lock_intr_save(&grouplist->lock, &grouplist_flags); + prev_thread = group->thread; + break; + case PERFMON_GT_CPU: + grouplist = percpu_var(perfmon_cpu_grouplist, group->cpu); + spinlock_lock(&grouplist->lock); + break; + default: + panic("perfmon: invalid group type on detach"); + } + spinlock_lock_intr_save(&group->lock, &flags); + + + if (perfmon_group_enabled(group)) { + ret = EINVAL; + goto out; + } + + if (!perfmon_group_attached(group)) { + goto out; + } + + perfmon_group_detach_pmu(group); + list_remove(&group->node); + + group->thread = NULL; + group->cpu = PERFMON_INVALID_CPU; + group->type = PERFMON_GT_UNKNOWN; + group->flags &= ~PERFMON_GF_ATTACHED; + assert(!group->flags); + + goto out; + +out: + spinlock_unlock_intr_restore(&group->lock, flags); + switch (type) { + case PERFMON_GT_THREAD: + spinlock_unlock_intr_restore(&grouplist->lock, grouplist_flags); + break; + case PERFMON_GT_CPU: + spinlock_unlock(&grouplist->lock); + break; + } + + if (prev_thread) { + /* Late unref as it might destroy the thread and lock the runq. */ + thread_unref(prev_thread); + } + + return ret; +} + +static void +perfmon_group_load(struct perfmon_group *group) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_event *event; + struct perfmon_pmc *pmc; + uint64_t prev; + + assert(!thread_preempt_enabled()); + assert(perfmon_group_enabled(group)); + assert(!perfmon_group_loaded(group)); + + cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu); + +#ifdef CONFIG_PERFMON_TEST + /* XXX: could be done in the loading loop, but performance does not + * matters in the functional tests using this feature. + */ + list_for_each_entry(&group->events, event, node) { + if (!event->set_value) { + continue; + } + pmc = perfmon_pmc_from_index(event->pmc_index); + pmu_driver.ops.write(pmc->id, event->value); + event->set_value = false; + } +#endif + + list_for_each_entry(&group->events, event, node) { + pmc = perfmon_pmc_from_index(event->pmc_index); + prev = pmu_driver.ops.read(pmc->id); + + perfmon_cpu_pmu_load(cpu_pmu, event->pmc_index); + event->prev = prev; + event->overflow_id = cpu_pmu->pmcs[event->pmc_index].overflow_id; + } + + group->cpu = cpu_id(); + group->flags |= PERFMON_GF_LOADED; +} + +static void +perfmon_cpu_load_remote(void *arg) +{ + struct perfmon_group *group; + + group = arg; + assert (group->cpu == cpu_id()); + + spinlock_lock(&group->lock); + + perfmon_group_load(group); + + spinlock_unlock(&group->lock); +} + +static void +perfmon_group_unload(struct perfmon_group *group) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_event *event; + + assert(!thread_preempt_enabled()); + assert(perfmon_group_enabled(group)); + assert(perfmon_group_loaded(group)); + + cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu); + + list_for_each_entry(&group->events, event, node) { + perfmon_cpu_pmu_unload(cpu_pmu, event->pmc_index); + perfmon_event_sync(cpu_pmu, event); + } + + group->flags &= ~PERFMON_GF_LOADED; +} + +static void +perfmon_cpu_unload_remote(void *arg) +{ + struct perfmon_group *group; + + group = arg; + assert (group->cpu == cpu_id()); + assert (perfmon_group_stopping(group)); + + spinlock_lock(&group->lock); + + perfmon_group_unload(group); + + group->flags &= ~PERFMON_GF_PENDING_DISABLE; + group->flags &= ~PERFMON_GF_ENABLED; + + spinlock_unlock(&group->lock); +} + +static void +perfmon_thread_load_remote(void *arg) +{ + struct perfmon_group *group; + struct thread *thread; + + assert (!cpu_intr_enabled()); + + group = arg; + thread = thread_self(); + + if (thread != group->thread) { + return; + } + + spinlock_lock(&group->lock); + + if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) { + perfmon_group_load(group); + } + + spinlock_unlock(&group->lock); +} + +static void +perfmon_thread_unload_remote(void *arg) +{ + struct perfmon_group *group; + struct thread *thread; + + assert (!cpu_intr_enabled()); + + group = arg; + thread = thread_self(); + + if (thread != group->thread) { + return; + } + + spinlock_lock(&group->lock); + + if (perfmon_group_enabled(group)) { + assert (perfmon_group_stopping(group)); + if (perfmon_group_loaded(group)) { + perfmon_group_unload(group); + } + group->flags &= ~PERFMON_GF_PENDING_DISABLE; + group->flags &= ~PERFMON_GF_ENABLED; + } + + spinlock_unlock(&group->lock); +} + +int +perfmon_group_start(struct perfmon_group *group) +{ + unsigned long flags; + unsigned int cpu; + int ret; + + ret = 0; + spinlock_lock_intr_save(&group->lock, &flags); + + if (!perfmon_group_attached(group) || perfmon_group_loaded(group)) { + ret = EINVAL; + goto end; + } + assert(!perfmon_group_enabled(group)); + + group->flags |= PERFMON_GF_ENABLED; + + if (group->type == PERFMON_GT_CPU) { + spinlock_unlock_intr_restore(&group->lock, flags); + + xcall_call(perfmon_cpu_load_remote, group, group->cpu); + + return 0; + } else if (group->thread == thread_self()) { + perfmon_group_load(group); + } else if (group->thread->state == THREAD_RUNNING) { + spinlock_unlock_intr_restore(&group->lock, flags); + + cpu = thread_cpu(group->thread); + + xcall_call(perfmon_thread_load_remote, group, cpu); + + return 0; + } +end: + spinlock_unlock_intr_restore(&group->lock, flags); + + return ret; +} + +static void +perfmon_group_sync_local(struct perfmon_group *group) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + + cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu); + + /* The group sync duration *should be* limited as a group may only have a + * limited amount of *different* events. + */ + list_for_each_entry(&group->events, event, node) { + perfmon_event_sync(cpu_pmu, event); + } +} + +static void +perfmon_cpu_sync_remote(void *arg) +{ + struct perfmon_group *group; + + group = arg; + assert (group->type == PERFMON_GT_CPU); + assert (group->cpu == cpu_id()); + + perfmon_group_sync_local(group); +} + +static void +perfmon_thread_sync_remote(void *arg) +{ + struct perfmon_group *group; + unsigned long flags; + + group = arg; + + assert (group->type == PERFMON_GT_THREAD); + if (thread_self() != group->thread) { + return; + } + spinlock_lock_intr_save(&group->lock, &flags); + + perfmon_group_sync_local(group); + + spinlock_unlock_intr_restore(&group->lock, flags); +} + +void +perfmon_group_update(struct perfmon_group *group) +{ + unsigned long flags; + unsigned int cpu; + + assert(perfmon_group_enabled(group)); + + spinlock_lock_intr_save(&group->lock, &flags); + + assert(perfmon_group_attached(group)); + assert(perfmon_group_enabled(group)); + + if (!perfmon_group_loaded(group)) { + goto end; + } + + if (group->type == PERFMON_GT_CPU) { + if (group->cpu == cpu_id()) + perfmon_group_sync_local(group); + else { + xcall_call(perfmon_cpu_sync_remote, group, group->cpu); + } + } else { + if (group->thread == thread_self()) { + assert (perfmon_group_loaded(group)); + perfmon_group_sync_local(group); + } else if (group->thread->state == THREAD_RUNNING) { + spinlock_unlock_intr_restore(&group->lock, flags); + cpu = thread_cpu(group->thread); + xcall_call(perfmon_thread_sync_remote, group, cpu); + return; + } + } +end: + spinlock_unlock_intr_restore(&group->lock, flags); +} + +int +perfmon_group_stop(struct perfmon_group *group) +{ + int ret; + unsigned long flags; + unsigned int cpu; + + ret = 0; + spinlock_lock_intr_save(&group->lock, &flags); + + if (!perfmon_group_attached(group) || !perfmon_group_enabled(group)) { + ret = EINVAL; + goto end; + } + + if (!perfmon_group_loaded(group)) { + goto disable; + } + + group->flags |= PERFMON_GF_PENDING_DISABLE; + + if (group->type == PERFMON_GT_CPU) { + spinlock_unlock_intr_restore(&group->lock, flags); + + xcall_call(perfmon_cpu_unload_remote, group, group->cpu); + return 0; + } else if (group->thread == thread_self()) { + perfmon_group_unload(group); + } else { + /* If the thead is not running (but still loaded), the unload is + * (probably) getting called when we release the group lock, but we + * still need a blocking xcall to guarantee the group is disabled when + * the function returns. + */ + spinlock_unlock_intr_restore(&group->lock, flags); + + cpu = thread_cpu(group->thread); + + xcall_call(perfmon_thread_unload_remote, group, cpu); + return 0; + } + +disable: + group->flags &= ~PERFMON_GF_PENDING_DISABLE; + group->flags &= ~PERFMON_GF_ENABLED; + +end: + spinlock_unlock_intr_restore(&group->lock, flags); + return ret; +} + +int +perfmon_thread_init(struct thread *thread) +{ + struct perfmon_grouplist *grouplist; + + grouplist = perfmon_grouplist_create(); + + if (grouplist == NULL) { + return ENOMEM; + } + + thread->perfmon_groups = grouplist; + return 0; +} + +void +perfmon_thread_destroy(struct thread *thread) +{ + perfmon_grouplist_destroy(thread->perfmon_groups); +} + +void +perfmon_thread_load(struct thread *thread) +{ + struct perfmon_grouplist *grouplist; + struct perfmon_group *group; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + grouplist = thread->perfmon_groups; + + spinlock_lock(&grouplist->lock); + + list_for_each_entry(&grouplist->groups, group, node) { + spinlock_lock(&group->lock); + + if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) { + perfmon_group_load(group); + } + + spinlock_unlock(&group->lock); + } + + spinlock_unlock(&grouplist->lock); +} + +void +perfmon_thread_unload(struct thread *thread) +{ + struct perfmon_grouplist *grouplist; + struct perfmon_group *group; + + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + grouplist = thread->perfmon_groups; + + spinlock_lock(&grouplist->lock); + + list_for_each_entry(&grouplist->groups, group, node) { + spinlock_lock(&group->lock); + /* TODO: we may want to prevent long looping on the groups. + * One way to do this would be to maintain events mapping in the + * grouplist in order to have a finite operation upon schedueling. + */ + + if (perfmon_group_loaded(group)) { + perfmon_group_unload(group); + if (perfmon_group_stopping(group)) { + group->flags &= ~PERFMON_GF_PENDING_DISABLE; + group->flags &= ~PERFMON_GF_ENABLED; + } + } + + spinlock_unlock(&group->lock); + } + + spinlock_unlock(&grouplist->lock); +} diff --git a/kern/perfmon.h b/kern/perfmon.h new file mode 100644 index 0000000..b1da4ec --- /dev/null +++ b/kern/perfmon.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Performance monitoring based on hardware performance counters. + */ + +#ifndef KERN_PERFMON_H +#define KERN_PERFMON_H + +#include <stdint.h> + +#include <kern/init.h> +#include <kern/thread.h> + +/* + * Performance event types. + */ +#define PERFMON_ET_GENERIC 0 +#define PERFMON_ET_RAW 1 + +/* + * IDs of generic performance events. + */ +#define PERFMON_EV_CYCLE 0 +#define PERFMON_EV_REF_CYCLE 1 +#define PERFMON_EV_INSTRUCTION 2 +#define PERFMON_EV_CACHE_REF 3 +#define PERFMON_EV_CACHE_MISS 4 +#define PERFMON_EV_BRANCH 5 +#define PERFMON_EV_BRANCH_MISS 6 +#define PERFMON_NR_GENERIC_EVENTS 7 + +/* + * Event flags. + */ +#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */ +#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */ +#define PERFMON_EF_MASK (PERFMON_EF_KERN | PERFMON_EF_USER) + +/* + * Pmu operations. + * + * Set by calling perfmon_register_pmu_ops. + */ +struct perfmon_pmu_ops { + void (*info)(void); + int (*translate)(unsigned int *raw_event_idp, unsigned int event_id); + int (*alloc)(unsigned int *pmc_idp, unsigned int raw_event_id); + void (*free)(unsigned int pmc_id); + void (*start)(unsigned int pmc_id, unsigned int raw_event_id); + void (*stop)(unsigned int pmc_id); + uint64_t (*read)(unsigned int pmc_id); + void (*write)(unsigned int pmc_id, uint64_t value); + /* If set, of_max_ticks should be set to 0. */ + void (*handle_of_intr)(void); +}; + +/* + * Pmu device description. + */ +struct perfmon_pmu_driver { + uint8_t pmc_width; /* width in bits of a pmc */ + /* + * Maximum number of clock ticks between two overflow ckecks. + * Should be set to 0 if handle_of_intr is set. + */ + uint64_t of_max_ticks; + struct perfmon_pmu_ops ops; +}; + +/* + * Performance monitoring event. + * + * An event describes a single, well-defined state and records its + * occurrences over a period of time. It must be added to exactly + * one group before being used. + */ +struct perfmon_event; + +/* + * Group of performance monitoring events. + * + * A group must be attached to either a thread or a processor, and abstracts + * all operations on hardware counters. + * + * Until a group is actually attached, it is assumed there is only one + * reference on it, owned by the caller. + * + * For a thread-attached group, it is the user's responsability to make sure + * that perfmon_stop is always called before the monitored thread is deleted. + */ +struct perfmon_group; + +/* + * Create an event. + */ +int perfmon_event_create(struct perfmon_event **eventp, unsigned int type, + unsigned int id, int flags); + +/* + * Destroy an event. + * + * Once an event is added to a group, it can only be destroyed by destroying + * the group. + */ +void perfmon_event_destroy(struct perfmon_event *event); + +/* + * Obtain the number of occurrences of an event. + * + * Events are updated at specific points in time, which means the value + * returned by this function can be outdated. + * + * See perfmon_group_update() and perfmon_group_stop(). + */ +uint64_t perfmon_event_read(const struct perfmon_event *event); + +/* + * Reset the number of occurrences of an event to 0. + * + * The group containing the given event should be stopped when calling + * this function. + */ +void perfmon_event_reset(struct perfmon_event *event); + +/* + * Create an event group. + * + * Events must be added to the group, which must then be attached to a + * processor or a thread. + */ +int perfmon_group_create(struct perfmon_group **groupp); + +/* + * Destroy a group and all its events. + * + * A group can only be destroyed once stopped and detached. + * + * Will return EINVAL if the group is not detached. + */ +int perfmon_group_destroy(struct perfmon_group *group); + +/* + * Add an event into a group. + * + * Events can only be added when a group isn't attached. + */ +void perfmon_group_add(struct perfmon_group *group, + struct perfmon_event *event); + +/* + * Attach a group to, respectively, a thread or a processor, reserving + * associated logical counter. + * + * A group can only be attached to one thread or processor at a time. + */ +int perfmon_group_attach(struct perfmon_group *group, struct thread *thread); +int perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu); + +/* + * Detach a group from a thread or a processor. + * + * It frees associated logical counters.. + * + * returns EINVAL if the group is still enabled (not stopped). + */ +int perfmon_group_detach(struct perfmon_group *group); + +/* + * Start performance monitoring. + * + * A group must be attached before being started. + */ +int perfmon_group_start(struct perfmon_group *group); + +/* + * Update all events in the given group. + */ +void perfmon_group_update(struct perfmon_group *group); + +/* + * Stop performance monitoring. + * + * A group can't be detached before it's stopped. Events are implicitely + * updated when calling this function. + */ +int perfmon_group_stop(struct perfmon_group *group); + +/* + * Initialize perfmon thread-specific data for the given thread. + */ +int perfmon_thread_init(struct thread *thread); + +/* + * Destroy perfmon thread-specific data for the given thread. + */ +void perfmon_thread_destroy(struct thread *thread); + +/* + * Load/unload the events associated to a thread on the current processor. + * + * These functions should only be used by the scheduler during context switch. + * Interrupts and preemption must be disabled when calling this function. + */ +void perfmon_thread_load(struct thread *thread); +void perfmon_thread_unload(struct thread *thread); + +/* + * This init operation provides : + * - perfmon_thread_init() + */ +INIT_OP_DECLARE(perfmon_bootstrap); + +/* + * This init operation provides : + * - module fully initialized + */ +INIT_OP_DECLARE(perfmon_setup); + +/* + * Handle overflow interrupt. + */ +void perfmon_of_intr(void); + +/* + * Register an architecture-specific driver. + */ +int perfmon_pmu_register(struct perfmon_pmu_driver *driver); + +/* + * Signal overflow for given pmc. + * + * Should be called from a pmu driver custom overflow interrupt handler. + */ +void perfmon_cpu_on_pmc_of(unsigned int pmc_id); + +#endif /* KERN_PERFMON_H */ diff --git a/kern/perfmon_i.h b/kern/perfmon_i.h new file mode 100644 index 0000000..3072171 --- /dev/null +++ b/kern/perfmon_i.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Performance monitoring based on performance counters internal functions. + */ + +#ifndef KERN_PERFMON_I_H +#define KERN_PERFMON_I_H + +#include <kern/perfmon.h> + +#ifdef CONFIG_PERFMON_TEST + +/* + * Set a running event hardware counter value for overflow tests purposes. + * + * Beware, this will affect all events associated to the same hardware counter. + */ +int perfmon_event_write(struct perfmon_event *event, uint64_t value); + +/* + * Returns the bit width of the register used in perfmon. + */ +int perfmon_get_pmc_width(void); + +#endif /* CONFIG_PERFMON_TEST */ + +#endif /* KERN_PERFMON_I_H */ diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h new file mode 100644 index 0000000..6f9be0b --- /dev/null +++ b/kern/perfmon_types.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef KERN_PERFMON_TYPES_H +#define KERN_PERFMON_TYPES_H + +struct perfmon_grouplist; + +#endif /* KERN_PERFMON_TYPES_H */ diff --git a/kern/thread.c b/kern/thread.c index 85e557d..77960ec 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -100,6 +100,7 @@ #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> +#include <kern/perfmon.h> #include <kern/rcu.h> #include <kern/shell.h> #include <kern/sleepq.h> @@ -605,9 +606,22 @@ thread_runq_wakeup_balancer(struct thread_runq *runq) } static void -thread_runq_schedule_prepare(struct thread *thread) +thread_runq_schedule_load(struct thread *thread) { pmap_load(thread->task->map->pmap); +#ifdef CONFIG_PERFMON + perfmon_thread_load(thread); +#endif +} + +static void +thread_runq_schedule_unload(struct thread *thread) +{ +#ifdef CONFIG_PERFMON + perfmon_thread_unload(thread); +#else + (void)thread; +#endif } static struct thread_runq * @@ -639,6 +653,8 @@ thread_runq_schedule(struct thread_runq *runq) assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL); if (likely(prev != next)) { + thread_runq_schedule_unload(prev); + rcu_report_context_switch(thread_rcu_reader(prev)); spinlock_transfer_owner(&runq->lock, next); @@ -660,10 +676,10 @@ thread_runq_schedule(struct thread_runq *runq) * - The current thread may have been migrated to another processor. */ barrier(); + thread_runq_schedule_load(prev); + next = NULL; runq = thread_runq_local(); - - thread_runq_schedule_prepare(prev); } else { next = NULL; } @@ -1750,7 +1766,7 @@ thread_main(void (*fn)(void *), void *arg) assert(!thread_preempt_enabled()); thread = thread_self(); - thread_runq_schedule_prepare(thread); + thread_runq_schedule_load(thread); spinlock_unlock(&thread_runq_local()->lock); cpu_intr_enable(); @@ -1847,6 +1863,14 @@ thread_init(struct thread *thread, void *stack, thread->flags |= THREAD_DETACHED; } +#ifdef CONFIG_PERFMON + error = perfmon_thread_init(thread); + + if (error) { + goto error_perfmon; + } +#endif /* CONFIG_PERFMON */ + error = tcb_build(&thread->tcb, stack, fn, arg); if (error) { @@ -1858,6 +1882,10 @@ thread_init(struct thread *thread, void *stack, return 0; error_tcb: +#ifdef CONFIG_PERFMON + perfmon_thread_destroy(thread); +error_perfmon: +#endif /* CONFIG_PERFMON */ thread_destroy_tsd(thread); turnstile_destroy(thread->priv_turnstile); error_turnstile: @@ -1977,6 +2005,9 @@ thread_destroy(struct thread *thread) /* See task_info() */ task_remove_thread(thread->task, thread); +#ifdef CONFIG_PERFMON + perfmon_thread_destroy(thread); +#endif thread_destroy_tsd(thread); turnstile_destroy(thread->priv_turnstile); sleepq_destroy(thread->priv_sleepq); @@ -2309,6 +2340,13 @@ thread_setup(void) #define THREAD_STACK_GUARD_INIT_OP_DEPS #endif /* CONFIG_THREAD_STACK_GUARD */ +#ifdef CONFIG_PERFMON +#define THREAD_PERFMON_INIT_OP_DEPS \ + INIT_OP_DEP(perfmon_bootstrap, true), +#else /* CONFIG_PERFMON */ +#define THREAD_PERFMON_INIT_OP_DEPS +#endif /* CONFIG_PERFMON */ + INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(cpumap_setup, true), INIT_OP_DEP(kmem_setup, true), @@ -2317,6 +2355,7 @@ INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(task_setup, true), INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(turnstile_setup, true), + THREAD_PERFMON_INIT_OP_DEPS THREAD_STACK_GUARD_INIT_OP_DEPS ); @@ -2696,6 +2735,13 @@ thread_report_periodic_event(void) spinlock_unlock(&runq->lock); } +unsigned int +thread_cpu(const struct thread *thread) +{ + assert(thread->runq); + return thread->runq->cpu; +} + char thread_state_to_chr(const struct thread *thread) { diff --git a/kern/thread.h b/kern/thread.h index 4bead75..787ccf5 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -283,6 +283,14 @@ void thread_setscheduler(struct thread *thread, unsigned char policy, void thread_pi_setscheduler(struct thread *thread, unsigned char policy, unsigned short priority); +/* + * Return the last CPU on which the thread has been scheduled. + * + * This call is not synchronized with respect to migration. The caller + * may obtain an outdated value. + */ +unsigned int thread_cpu(const struct thread *thread); + static inline void thread_ref(struct thread *thread) { diff --git a/kern/thread_i.h b/kern/thread_i.h index 0be1e77..9c24d3a 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -24,6 +24,7 @@ #include <kern/atomic.h> #include <kern/cpumap.h> #include <kern/list_types.h> +#include <kern/perfmon_types.h> #include <kern/rcu_types.h> #include <kern/spinlock_types.h> #include <kern/turnstile_types.h> @@ -185,6 +186,10 @@ struct thread { struct list task_node; /* (T) */ void *stack; /* (-) */ char name[THREAD_NAME_SIZE]; /* ( ) */ + +#ifdef CONFIG_PERFMON + struct perfmon_grouplist *perfmon_groups; +#endif }; #define THREAD_ATTR_DETACHED 0x1 diff --git a/test/Kconfig b/test/Kconfig index 80679ef..6999605 100644 --- a/test/Kconfig +++ b/test/Kconfig @@ -19,6 +19,18 @@ config TEST_MODULE_MUTEX config TEST_MODULE_MUTEX_PI bool "mutex_pi" +config TEST_MODULE_PERFMON_CPU + bool "perfmon_cpu" + depends on PERFMON + +config TEST_MODULE_PERFMON_THREAD + bool "perfmon_thread" + depends on PERFMON + +config TEST_MODULE_PERFMON_TORTURE + bool "perfmon_torture" + depends on PERFMON + config TEST_MODULE_PMAP_UPDATE_MP bool "pmap_update_mp" @@ -43,3 +55,7 @@ config TEST_MODULE_XCALL endchoice endif + +config PERFMON_TEST + def_bool y + depends on TEST_MODULE_PERFMON_THREAD || TEST_MODULE_PERFMON_CPU diff --git a/test/Makefile b/test/Makefile index c98d6fb..b6f2260 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,6 +1,9 @@ x15_SOURCES-$(CONFIG_TEST_MODULE_BULLETIN) += test/test_bulletin.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX) += test/test_mutex.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI) += test/test_mutex_pi.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP) += test/test_pmap_update_mp.c x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER) += test/test_rcu_defer.c x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES) += test/test_sref_dirty_zeroes.c diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c new file mode 100644 index 0000000..6f1414c --- /dev/null +++ b/test/test_perfmon_cpu.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2014 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Here, we test the perfmon module for cross CPU performances monitoring. + */ + +#include <stdbool.h> +#include <stddef.h> + +#include <kern/error.h> +#include <kern/perfmon_i.h> +#include <kern/printf.h> +#include <kern/thread.h> +#include <test/test.h> + +#define WAIT_DELAY_USEC 1000000 + +static volatile bool stop; + +static void +test_do_nothing(void* arg) +{ + (void)arg; + + while (!stop); +} + +static void +test_report_event(const struct perfmon_event *event, const char *name) +{ + unsigned long long count; + + count = perfmon_event_read(event); + printf("test: %s: %llu\n", name, count); +} + +static uint64_t +test_get_pre_overflow_value(uint64_t value) +{ + uint64_t pmc_max; + unsigned int pmc_width; + + pmc_width = perfmon_get_pmc_width(); + pmc_max = (1ULL << pmc_width) - 1; + pmc_max &= 0xffffffff80000000; + + /* XXX: workaround most processor not allowing full width writes */ + return ((~value + 1) & 0x7fffffff) | pmc_max; +} + +static void +test_run(void *arg) +{ + struct perfmon_event *ev_cycle, *ev_instruction; + struct perfmon_group *group; + int error; + uint64_t value; + + (void)arg; + + error = perfmon_group_create(&group); + error_check(error, "perfmon_group_create"); + + error = perfmon_event_create(&ev_cycle, PERFMON_ET_GENERIC, + PERFMON_EV_CYCLE, PERFMON_EF_KERN); + error_check(error, "perfmon_event_create"); + perfmon_group_add(group, ev_cycle); + + error = perfmon_event_create(&ev_instruction, PERFMON_ET_GENERIC, + PERFMON_EV_INSTRUCTION, PERFMON_EF_KERN); + error_check(error, "perfmon_event_create"); + perfmon_group_add(group, ev_instruction); + + error = perfmon_group_attach_cpu(group, 1); + error_check(error, "perfmon_group_attach_cpu 1"); + + error = perfmon_group_start(group); + error_check(error, "perfmon_group_start"); + + cpu_delay(WAIT_DELAY_USEC); + error = perfmon_group_stop(group); + error_check(error, "perfmon_group_stop"); + test_report_event(ev_cycle, "cycle"); + test_report_event(ev_instruction, "instruction"); + + printf("checking with overflow ...\n"); + value = test_get_pre_overflow_value( perfmon_event_read(ev_cycle) / 2); + error = perfmon_event_write(ev_cycle, value); + error_check(error, "perfmon_event_write"); + + value = test_get_pre_overflow_value(perfmon_event_read(ev_instruction) / 3); + error = perfmon_event_write(ev_instruction, value); + error_check(error, "perfmon_event_write"); + + perfmon_event_reset(ev_cycle); + perfmon_event_reset(ev_instruction); + + error = perfmon_group_start(group); + error_check(error, "perfmon_group_start"); + + cpu_delay(WAIT_DELAY_USEC); + error = perfmon_group_stop(group); + error_check(error, "perfmon_group_stop"); + test_report_event(ev_cycle, "cycle"); + test_report_event(ev_instruction, "instruction"); + + error = perfmon_group_detach(group); + error_check(error, "perfmon_group_detach"); + + error = perfmon_group_destroy(group); + error_check(error, "perfmon_group_destroy"); + + stop = true; +} + +void +test_setup(void) +{ + struct thread_attr attr; + struct thread *thread0, *thread1; + struct cpumap *cpumap; + int error; + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create 0"); + cpumap_zero(cpumap); + cpumap_set(cpumap, 0); + + thread_attr_init(&attr, "x15_test_run"); + thread_attr_set_detached(&attr); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread0, &attr, test_run, NULL); + error_check(error, "thread_create 0"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 1); + + thread_attr_init(&attr, "x15_test_do_nothing"); + thread_attr_set_detached(&attr); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread1, &attr, test_do_nothing, NULL); + error_check(error, "thread_create 1"); +} diff --git a/test/test_perfmon_thread.c b/test/test_perfmon_thread.c new file mode 100644 index 0000000..e78fab9 --- /dev/null +++ b/test/test_perfmon_thread.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2014 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This test checks that cpu-local remote thread monitoring works properly. + * The test uses two threads: the main one which is monitored and another that + * acts as a control thread. The instruction count should increase while the + * main thread runs and shouldn't otherwise. + * Initially the main thread runs while the control one doesn't. + * The control thread is then scheduled and finally the main thread is + * rescheduled. + * + * In order to trigger the counter slot reuse mechanism, this tests also adds a + * cpu counter on the same cpu as the threads. + */ + +#include <stdbool.h> +#include <stddef.h> + +#include <kern/condition.h> +#include <kern/error.h> +#include <kern/panic.h> +#include <kern/perfmon.h> +#include <kern/printf.h> +#include <kern/thread.h> + +#include <test/test.h> + +#define NR_LOOPS 1000000UL + +static inline void +test_loop(void) +{ + volatile unsigned long i; + + for (i = 0; i < NR_LOOPS; i++); +} + +struct thread *test_main, *test_control; +struct perfmon_group *thread_group; +struct perfmon_event *thread_ev_cycle; +bool test_monitoring = true; + +struct perfmon_group *cpu_group; +struct perfmon_event *cpu_ev_cycle; + +struct proxy_thread_runq { + struct spinlock lock; + unsigned int cpu; +}; + +static void +x15_test_main_run(void *arg) +{ + unsigned long long thread_count1, thread_count2; + unsigned long long cpu_count1, cpu_count2; + int error; + + (void)arg; + + thread_preempt_disable(); + + /* Create a perfmon group to monitor this cpu. */ + error = perfmon_group_create(&cpu_group); + error_check(error, "perfmon_cpu_group_create"); + + error = perfmon_event_create(&cpu_ev_cycle, PERFMON_ET_GENERIC, + PERFMON_EV_CYCLE, PERFMON_EF_KERN); + error_check(error, "perfmon_cpu_event_create"); + perfmon_group_add(cpu_group, cpu_ev_cycle); + + error = perfmon_group_attach_cpu(cpu_group, 0); + error_check(error, "perfmon_cpu_group_attach"); + + error = perfmon_group_start(cpu_group); + error_check(error, "perfmon_group_start_cpu"); + + perfmon_group_update(cpu_group); + cpu_count1 = perfmon_event_read(cpu_ev_cycle); + + /* Create a perfmon group to monitor this thread.*/ + error = perfmon_group_create(&thread_group); + error_check(error, "perfmon_thread_group_create"); + + error = perfmon_event_create(&thread_ev_cycle, PERFMON_ET_GENERIC, + PERFMON_EV_CYCLE, PERFMON_EF_KERN); + error_check(error, "perfmon_thread_event_create"); + perfmon_group_add(thread_group, thread_ev_cycle); + + error = perfmon_group_attach(thread_group, thread_self()); + error_check(error, "perfmon_thread_group_attach"); + + /* Start monitoring */ + error = perfmon_group_start(thread_group); + error_check(error, "perfmon_group_start_thread"); + + perfmon_group_update(thread_group); + thread_count1 = perfmon_event_read(thread_ev_cycle); + + test_loop(); + perfmon_group_update(thread_group); + thread_count2 = perfmon_event_read(thread_ev_cycle); + + perfmon_group_update(cpu_group); + cpu_count2 = perfmon_event_read(cpu_ev_cycle); + + if (thread_count1 == thread_count2) { + panic("not monitoring thread after monitoring start \n" + "stayed at %llu cycles\n", thread_count1); + } + if (cpu_count1 == cpu_count2) { + panic("not monitoring cpu after monitoring start \n" + "stayed at %llu cycles\n", cpu_count1); + } + + /* Lets switch to the other thread and sleep */ + test_monitoring = false; + thread_wakeup(test_control); + thread_sleep(NULL, &test_monitoring, "dummy sync object"); + + /* waking up */ + if (!test_monitoring) { + panic("main thread woke up when it should not"); + } + + /* Check monitoring is active again */ + perfmon_group_update(cpu_group); + cpu_count1 = perfmon_event_read(cpu_ev_cycle); + + perfmon_group_update(thread_group); + thread_count1 = perfmon_event_read(thread_ev_cycle); + + test_loop(); + + perfmon_group_update(thread_group); + thread_count2 = perfmon_event_read(thread_ev_cycle); + + perfmon_group_update(cpu_group); + cpu_count2 = perfmon_event_read(cpu_ev_cycle); + + if (thread_count1 == thread_count2) { + panic("not monitoring thread after thread re-schedueling\n" + "stayed at %llu cycles\n", thread_count1); + } + if (cpu_count1 == cpu_count2) { + panic("not monitoring cpu after thread got re-scheduled \n" + "stayed at %llu cycles\n", cpu_count1); + } + + thread_preempt_enable(); + error = perfmon_group_stop(thread_group); + error_check(error, "perfmon_group_stop_thread"); + + error = perfmon_group_detach(thread_group); + error_check(error, "perfmon_group_detach_thread"); + + error = perfmon_group_destroy(thread_group); + error_check(error, "perfmon_group_destroy_thread"); + + error = perfmon_group_stop(cpu_group); + error_check(error, "perfmon_group_stop_cpu"); + error = perfmon_group_detach(cpu_group); + error_check(error, "perfmon_group_detach_cpu"); + error = perfmon_group_destroy(cpu_group); + error_check(error, "perfmon_group_destroy_cpu"); + + printf("test perfmon thread sched finished\n"); +} + +static void +x15_test_control_run(void *arg) +{ + unsigned long long thread_count1, thread_count2; + unsigned long long cpu_count1, cpu_count2; + + (void)arg; + + thread_preempt_disable(); + + /* Let first thread run */ + while (test_monitoring) { + thread_sleep(NULL, &test_monitoring, "dummy sync object"); + } + + /* Check this thread is not monitored (but the cpu is) */ + perfmon_group_update(cpu_group); + cpu_count1 = perfmon_event_read(cpu_ev_cycle); + + perfmon_group_update(thread_group); + thread_count1 = perfmon_event_read(thread_ev_cycle); + + test_loop(); + + perfmon_group_update(thread_group); + thread_count2 = perfmon_event_read(thread_ev_cycle); + + perfmon_group_update(cpu_group); + cpu_count2 = perfmon_event_read(cpu_ev_cycle); + + if (thread_count1 != thread_count2) { + panic("still monitoring while thread is unschedueled\n" + "gone from %llu to %llu cycles\n", thread_count1, thread_count2); + } + if (cpu_count1 == cpu_count2) { + panic("not monitoring cpu after thread got unscheduled \n" + "stayed at %llu cycles\n", cpu_count1); + } + + /* Wakeup x15_test_main */ + test_monitoring = true; + thread_wakeup(test_main); + thread_preempt_enable(); +} + +void +test_setup(void) +{ + struct thread_attr attr; + struct cpumap *cpumap; + int error; + + printf("test perfmon thread sched start\n"); + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create"); + cpumap_zero(cpumap); + cpumap_set(cpumap, 0); + + thread_attr_init(&attr, "x15_test_main thread"); + thread_attr_set_detached(&attr); + thread_attr_set_cpumap(&attr, cpumap); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + + error = thread_create(&test_main, &attr, x15_test_main_run, NULL); + error_check(error, "thread_create 0"); + + thread_attr_init(&attr, "15_test_control_thread"); + thread_attr_set_detached(&attr); + thread_attr_set_cpumap(&attr, cpumap); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + error = thread_create(&test_control, &attr, x15_test_control_run, NULL); + error_check(error, "thread_create 1"); +} diff --git a/test/test_perfmon_torture.c b/test/test_perfmon_torture.c new file mode 100644 index 0000000..7b54129 --- /dev/null +++ b/test/test_perfmon_torture.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2014 Remy Noel. + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This is a stress test for perfmon thread monitoring. + * The goal is to pass through a maximum of code paths of the perfmon module. + * We therefore try to get a high migration rate by schedueling at least as + * many threads as CPUs. + * Also, we stop and restart some threads along the way in order to check wether + * stopped threads are propery handled. + * + * TODO: replace thread selection with a proper pseudo-random function once we + * get one. + */ + +#include <stddef.h> +#include <stdio.h> + +#include <kern/error.h> +#include <kern/kmem.h> +#include <kern/panic.h> +#include <kern/perfmon.h> +#include <kern/thread.h> +#include <test/test.h> + +enum test_thread_state { + TEST_LAUNCHED = 0, + TEST_RUNNING, + TEST_STOPPING, + TEST_STOPPED, +}; + +struct test_thread { + struct thread *thread; + struct perfmon_group *group; + struct perfmon_event *event; + + bool monitored; + enum test_thread_state state; + unsigned long long count; +}; + +struct threads_stats { + size_t num_group_start; + size_t num_group_started; + size_t num_thread_start; + size_t num_thread_started; +}; + +static struct test_thread **test_threads; + +static struct thread *test_control; + +#define TEST_WAIT_INSTRUCT_COUNT 1000UL +#define TEST_NUM_LOOP_STATUS_PRINT 200000 + +static void +test_wait(void) +{ + volatile unsigned long i; + + /* TODO: Do something a a bit more clever once timers are here */ + + for (i = 0; i < TEST_WAIT_INSTRUCT_COUNT; i++); +} + +static void +test_thread_run(void *arg) +{ + struct test_thread *thread = arg; + unsigned long num_loops; + + assert(thread->state == TEST_LAUNCHED); + num_loops = 0; + + thread->state = TEST_RUNNING; + + for (;;) { + barrier(); + if (thread->state == TEST_STOPPING) { + break; + } + /* Invividual threads waits twice as much as control one in order to + * induce some asynchronism between control and treads. + */ + test_wait(); + test_wait(); + num_loops++; + } + thread->state = TEST_STOPPED; +} + +static void +test_thread_toggle_monitor(struct test_thread *thread, + struct threads_stats *stats) +{ + int error; + struct perfmon_group *group; + + group = thread->group; + + if (!thread->monitored) { + error = perfmon_group_start(group); + error_check(error, "perfmon_group_start"); + thread->monitored = true; + stats->num_group_start++; + stats->num_group_started++; + } else { + perfmon_group_update(group); + thread->count = perfmon_event_read(thread->event); + error = perfmon_group_stop(group); + error_check(error, "perfmon_group_stop"); + thread->monitored = false; + stats->num_group_started--; + } +} + +static int +test_thread_create_monitored_thread(struct thread **thread, size_t index, + void *arg) +{ + struct thread_attr attr; + char name[THREAD_NAME_SIZE]; + + snprintf(name, sizeof(name), THREAD_KERNEL_PREFIX + "test_monitored_thread:%zu", index); + thread_attr_init(&attr, name); + thread_attr_set_detached(&attr); + + return thread_create(thread, &attr, test_thread_run, arg); +} + +static void +test_thread_toggle_state(size_t index, + struct threads_stats *stats) +{ + int error; + struct perfmon_group *group; + struct test_thread *thread; + + thread = test_threads[index]; + group = thread->group; + + switch (thread->state) { + case TEST_RUNNING: + thread->state = TEST_STOPPING; + stats->num_thread_started--; + break; + case TEST_STOPPED: + /* restart thread and attach it to the group of the previous thread. + */ + if (thread->monitored) { + test_thread_toggle_monitor(thread, stats); + } + error = perfmon_group_detach(group); + error_check(error, "perfmon_group_detach"); + thread->state = TEST_LAUNCHED; + error = test_thread_create_monitored_thread(&thread->thread, index, + thread); + error_check(error, "thread_recreate monitored"); + error = perfmon_group_attach(group, thread->thread); + error_check(error, "perfmon_group_attach"); + stats->num_thread_start++; + stats->num_thread_started++; + break; + default: + /* Do nothing if the thread is not in a stable state */ + break; + } +} + +static void +test_x15_test_control_run(void *arg) +{ + size_t selected_thread; + size_t stopped_thread; + struct test_thread *thread; + size_t nr_threads; + size_t loop_since_status; + struct threads_stats stats; + + (void)arg; + nr_threads = MAX(cpu_count() - 1, 1); + selected_thread = 0; + stopped_thread = 0; + loop_since_status = 0; + stats.num_group_start = 0; + stats.num_group_started = 0; + stats.num_thread_start = nr_threads; + stats.num_thread_started = nr_threads; + + printf("monitoring %zu threads\n", nr_threads); + + for (;;) { + /* Dummy `random` thread selection. */ + selected_thread = (selected_thread + 7) % nr_threads; + thread = test_threads[selected_thread]; + test_thread_toggle_monitor(thread, &stats); + + /* only half of the threads may be stopped / restarted */ + stopped_thread = (stopped_thread + 11) % ((nr_threads + 1) / 2); + test_thread_toggle_state(stopped_thread, &stats); + + test_wait(); + if (!(++loop_since_status % TEST_NUM_LOOP_STATUS_PRINT)) { + printf("===============================\n"); + printf("%zu groups started (%zu total)\n", stats.num_group_started, + stats.num_group_start); + printf("%zu threads started (%zu total)\n", + stats.num_thread_started, stats.num_thread_start); + printf("monitor value: "); + for (size_t i = 0; i < nr_threads; i++) { + printf("%zu: %llu, ", i, test_threads[i]->count); + } + printf("\n"); + } + } +} + +static struct test_thread * +test_thread_create(size_t index) +{ + struct test_thread *thread; + int error; + + thread = kmem_zalloc(sizeof(*thread)); + + if (thread == NULL) { + panic("thread allocation failed"); + } + + error = test_thread_create_monitored_thread(&thread->thread, index, thread); + error_check(error, "thread_create"); + error = perfmon_group_create(&thread->group); + error_check(error, "perfmon_group_create"); + error = perfmon_event_create(&thread->event, PERFMON_ET_GENERIC, + PERFMON_EV_CYCLE, PERFMON_EF_KERN); + error_check(error, "perfmon_event_create"); + + perfmon_group_add(thread->group, thread->event); + error = perfmon_group_attach(thread->group, thread->thread); + error_check(error, "perfmon_group_attach"); + + return thread; +} + +void +test_setup(void) +{ + struct thread_attr attr; + size_t nr_threads; + size_t i; + int error; + + nr_threads = MAX(cpu_count() - 1, 1); + + test_threads = kmem_alloc(nr_threads * sizeof(*test_threads)); + for (i = 0; i < nr_threads; i++) { + test_threads[i] = test_thread_create(i); + } + + thread_attr_init(&attr, "15_test_control_thread"); + thread_attr_set_detached(&attr); + error = thread_create(&test_control, &attr, test_x15_test_control_run, + NULL); + error_check(error, "thread_create control"); +} diff --git a/tools/build_configs.py b/tools/build_configs.py index b0674b7..ba872d4 100755 --- a/tools/build_configs.py +++ b/tools/build_configs.py @@ -94,6 +94,7 @@ small_options_dict = { 'CONFIG_SMP' : ['y', 'n'], 'CONFIG_MAX_CPUS' : ['1', '128'], 'CONFIG_ASSERT' : ['y', 'n'], + 'CONFIG_PERFMON' : ['y', 'n'], } large_options_dict = dict(small_options_dict) |