diff options
author | Richard Braun <rbraun@sceen.net> | 2018-06-25 21:56:01 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2018-06-25 21:56:01 +0200 |
commit | 30dd97fb786ef5f7ca28049684b17bdc2ee7a718 (patch) | |
tree | 330f0514edcfaaa4e3266edb1191c90a39edcb80 | |
parent | 0a7c73d2e06172a1210e2bbdfba5718040f4f007 (diff) | |
parent | 7686bfcb703049db5d3711e59133ca4b2259e1f1 (diff) |
Merge branch 'perfmon'
-rw-r--r-- | arch/x86/Kconfig | 20 | ||||
-rw-r--r-- | arch/x86/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/machine/boot.c | 30 | ||||
-rw-r--r-- | arch/x86/machine/boot.h | 6 | ||||
-rw-r--r-- | arch/x86/machine/cpu.c | 59 | ||||
-rw-r--r-- | arch/x86/machine/cpu.h | 49 | ||||
-rw-r--r-- | arch/x86/machine/lapic.c | 19 | ||||
-rw-r--r-- | arch/x86/machine/lapic.h | 1 | ||||
-rw-r--r-- | arch/x86/machine/pmu_amd.c | 240 | ||||
-rw-r--r-- | arch/x86/machine/pmu_amd.h | 33 | ||||
-rw-r--r-- | arch/x86/machine/pmu_intel.c | 391 | ||||
-rw-r--r-- | arch/x86/machine/pmu_intel.h | 33 | ||||
-rw-r--r-- | arch/x86/machine/trap.c | 3 | ||||
-rw-r--r-- | arch/x86/machine/trap.h | 1 | ||||
-rw-r--r-- | doc/intro.9.txt | 2 | ||||
-rw-r--r-- | kern/Kconfig | 13 | ||||
-rw-r--r-- | kern/Makefile | 2 | ||||
-rw-r--r-- | kern/percpu.c | 30 | ||||
-rw-r--r-- | kern/percpu.h | 31 | ||||
-rw-r--r-- | kern/perfmon.c | 1443 | ||||
-rw-r--r-- | kern/perfmon.h | 221 | ||||
-rw-r--r-- | kern/perfmon_types.h | 102 | ||||
-rw-r--r-- | kern/task.c | 2 | ||||
-rw-r--r-- | kern/thread.c | 69 | ||||
-rw-r--r-- | kern/thread.h | 31 | ||||
-rw-r--r-- | kern/thread_i.h | 17 | ||||
-rw-r--r-- | test/Kconfig | 12 | ||||
-rw-r--r-- | test/Makefile | 3 | ||||
-rw-r--r-- | test/test_perfmon_cpu.c | 225 | ||||
-rw-r--r-- | test/test_perfmon_thread.c | 383 | ||||
-rw-r--r-- | test/test_perfmon_torture.c | 346 |
31 files changed, 3781 insertions, 39 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 251c4a4c..eeb999cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -13,6 +13,26 @@ config X86_PAE PAE allows addressing physical memory beyond 4 GiB at the cost of more pagetable lookup and memory overhead. +config X86_PMU_AMD + bool "Enable AMD PMU driver" + select PERFMON + default n + ---help--- + Enable support for the performance monitoring unit on AMD + processors. + + If unsure, disable. + +config X86_PMU_INTEL + bool "Enable Intel PMU driver" + select PERFMON + default n + ---help--- + Enable support for the performance monitoring unit on Intel + processors. + + If unsure, disable. + endmenu config X86_32 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9866d93a..226f4a90 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -61,3 +61,6 @@ x15_SOURCES-y += \ arch/x86/machine/trap_asm.S \ arch/x86/machine/trap.c \ arch/x86/machine/uart.c + +x15_SOURCES-$(CONFIG_X86_PMU_AMD) += arch/x86/machine/pmu_amd.c +x15_SOURCES-$(CONFIG_X86_PMU_INTEL) += arch/x86/machine/pmu_intel.c diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c index 6934896e..d540d8d8 100644 --- a/arch/x86/machine/boot.c +++ b/arch/x86/machine/boot.c @@ -56,6 +56,7 @@ #include <kern/log.h> #include <kern/macros.h> #include <kern/panic.h> +#include <kern/percpu.h> #include <kern/thread.h> #include <machine/acpi.h> #include <machine/atcons.h> @@ -67,6 +68,8 @@ #include <machine/multiboot.h> #include <machine/page.h> #include <machine/pmap.h> +#include <machine/pmu_amd.h> +#include <machine/pmu_intel.h> #include <machine/strace.h> #include <machine/uart.h> #include <vm/vm_kmem.h> @@ -504,6 +507,7 @@ boot_ap_main(void) cpu_ap_setup(); thread_ap_setup(); pmap_ap_setup(); + percpu_ap_setup(); kernel_ap_main(); /* Never reached */ @@ -551,6 +555,32 @@ boot_setup_intr(void) INIT_OP_DEFINE(boot_setup_intr, INIT_OP_DEP(acpi_setup, true)); +#ifdef CONFIG_PERFMON +static int __init +boot_setup_pmu(void) +{ + return 0; +} + +#ifdef CONFIG_X86_PMU_AMD +#define BOOT_PMU_AMD_INIT_OP_DEPS \ + INIT_OP_DEP(pmu_amd_setup, false), +#else /* CONFIG_X86_PMU_AMD */ +#define BOOT_PMU_AMD_INIT_OP_DEPS +#endif /* CONFIG_X86_PMU_AMD */ + +#ifdef CONFIG_X86_PMU_INTEL +#define BOOT_PMU_INTEL_INIT_OP_DEPS \ + INIT_OP_DEP(pmu_intel_setup, false), +#else /* CONFIG_X86_PMU_INTEL */ +#define BOOT_PMU_INTEL_INIT_OP_DEPS +#endif /* CONFIG_X86_PMU_INTEL */ + +INIT_OP_DEFINE(boot_setup_pmu, + BOOT_PMU_AMD_INIT_OP_DEPS + BOOT_PMU_INTEL_INIT_OP_DEPS); +#endif /* CONFIG_PERFMON */ + static int __init boot_setup_shutdown(void) { diff --git a/arch/x86/machine/boot.h b/arch/x86/machine/boot.h index d30b3beb..087f0c2a 100644 --- a/arch/x86/machine/boot.h +++ b/arch/x86/machine/boot.h @@ -160,6 +160,12 @@ INIT_OP_DECLARE(boot_setup_intr); /* * This init operation provides : + * - all PMU drivers have probed hardware + */ +INIT_OP_DECLARE(boot_setup_pmu); + +/* + * This init operation provides : * - all shutdown operations have been registered */ INIT_OP_DECLARE(boot_setup_shutdown); diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c index 98d3680e..6cbe168a 100644 --- a/arch/x86/machine/cpu.c +++ b/arch/x86/machine/cpu.c @@ -69,6 +69,11 @@ #define CPU_INVALID_APIC_ID ((unsigned int)-1) +struct cpu_vendor { + unsigned int id; + const char *str; +}; + /* * MP related CMOS ports, registers and values. */ @@ -155,6 +160,12 @@ static alignas(8) struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __read_mostly; static unsigned long cpu_double_fault_handler; static alignas(CPU_DATA_ALIGN) char cpu_double_fault_stack[TRAP_STACK_SIZE]; +uint64_t +cpu_get_freq(void) +{ + return cpu_freq; +} + void cpu_delay(unsigned long usecs) { @@ -173,6 +184,11 @@ cpu_delay(unsigned long usecs) } while (total > 0); } +static const struct cpu_vendor cpu_vendors[] = { + { CPU_VENDOR_INTEL, "GenuineIntel" }, + { CPU_VENDOR_AMD, "AuthenticAMD" }, +}; + void * __init cpu_get_boot_stack(void) { @@ -182,10 +198,9 @@ cpu_get_boot_stack(void) static void __init cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id) { + memset(cpu, 0, sizeof(*cpu)); cpu->id = id; cpu->apic_id = apic_id; - cpu->state = CPU_STATE_OFF; - cpu->boot_stack = NULL; } static void @@ -430,6 +445,32 @@ cpu_load_idt(const void *idt, size_t size) asm volatile("lidt %0" : : "m" (idtr)); } +static const struct cpu_vendor * +cpu_vendor_lookup(const char *str) +{ + for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) { + if (strcmp(str, cpu_vendors[i].str) == 0) { + return &cpu_vendors[i]; + } + } + + return NULL; +} + +static void __init +cpu_init_vendor_id(struct cpu *cpu) +{ + const struct cpu_vendor *vendor; + + vendor = cpu_vendor_lookup(cpu->vendor_str); + + if (vendor == NULL) { + return; + } + + cpu->vendor_id = vendor->id; +} + /* * Initialize the given cpu structure for the current processor. */ @@ -456,10 +497,12 @@ cpu_init(struct cpu *cpu) eax = 0; cpu_cpuid(&eax, &ebx, &ecx, &edx); max_basic = eax; - memcpy(cpu->vendor_id, &ebx, sizeof(ebx)); - memcpy(cpu->vendor_id + 4, &edx, sizeof(edx)); - memcpy(cpu->vendor_id + 8, &ecx, sizeof(ecx)); - cpu->vendor_id[sizeof(cpu->vendor_id) - 1] = '\0'; + cpu->cpuid_max_basic = max_basic; + memcpy(cpu->vendor_str, &ebx, sizeof(ebx)); + memcpy(cpu->vendor_str + 4, &edx, sizeof(edx)); + memcpy(cpu->vendor_str + 8, &ecx, sizeof(ecx)); + cpu->vendor_str[sizeof(cpu->vendor_str) - 1] = '\0'; + cpu_init_vendor_id(cpu); /* Some fields are only initialized if supported by the processor */ cpu->model_name[0] = '\0'; @@ -498,6 +541,8 @@ cpu_init(struct cpu *cpu) max_extended = eax; } + cpu->cpuid_max_extended = max_extended; + if (max_extended < 0x80000001) { cpu->features3 = 0; cpu->features4 = 0; @@ -617,7 +662,7 @@ void cpu_log_info(const struct cpu *cpu) { log_info("cpu%u: %s, type %u, family %u, model %u, stepping %u", - cpu->id, cpu->vendor_id, cpu->type, cpu->family, cpu->model, + cpu->id, cpu->vendor_str, cpu->type, cpu->family, cpu->model, cpu->stepping); if (strlen(cpu->model_name) > 0) { diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h index 8f9b23c4..28308042 100644 --- a/arch/x86/machine/cpu.h +++ b/arch/x86/machine/cpu.h @@ -218,9 +218,13 @@ struct cpu_tss { uint16_t iobp_base; } __packed; -#define CPU_VENDOR_ID_SIZE 13 +#define CPU_VENDOR_STR_SIZE 13 #define CPU_MODEL_NAME_SIZE 49 +#define CPU_VENDOR_UNKNOWN 0 +#define CPU_VENDOR_INTEL 1 +#define CPU_VENDOR_AMD 2 + /* * CPU states. */ @@ -230,8 +234,11 @@ struct cpu_tss { struct cpu { unsigned int id; unsigned int apic_id; - char vendor_id[CPU_VENDOR_ID_SIZE]; + char vendor_str[CPU_VENDOR_STR_SIZE]; char model_name[CPU_MODEL_NAME_SIZE]; + unsigned int cpuid_max_basic; + unsigned int cpuid_max_extended; + unsigned int vendor_id; unsigned int type; unsigned int family; unsigned int model; @@ -537,16 +544,41 @@ cpu_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, : : "memory"); } -static __always_inline void +static inline void cpu_get_msr(uint32_t msr, uint32_t *high, uint32_t *low) { - asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr)); + asm("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr)); } -static __always_inline void +static inline uint64_t +cpu_get_msr64(uint32_t msr) +{ + uint32_t high, low; + + cpu_get_msr(msr, &high, &low); + return (((uint64_t)high << 32) | low); +} + +/* + * Implies a full memory barrier. + */ +static inline void cpu_set_msr(uint32_t msr, uint32_t high, uint32_t low) { - asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high)); + asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high) : "memory"); +} + +/* + * Implies a full memory barrier. + */ +static inline void +cpu_set_msr64(uint32_t msr, uint64_t value) +{ + uint32_t low, high; + + low = value & 0xffffffff; + high = value >> 32; + cpu_set_msr(msr, high, low); } static __always_inline uint64_t @@ -607,6 +639,11 @@ cpu_tlb_flush_va(unsigned long va) } /* + * Get CPU frequency in Hz. + */ +uint64_t cpu_get_freq(void); + +/* * Busy-wait for a given amount of time, in microseconds. */ void cpu_delay(unsigned long usecs); diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c index 3f6d0c22..a15bd5f1 100644 --- a/arch/x86/machine/lapic.c +++ b/arch/x86/machine/lapic.c @@ -25,6 +25,7 @@ #include <kern/log.h> #include <kern/macros.h> #include <kern/panic.h> +#include <kern/perfmon.h> #include <machine/cpu.h> #include <machine/lapic.h> #include <machine/pmap.h> @@ -159,7 +160,7 @@ struct lapic_map { struct lapic_register icr_high; struct lapic_register lvt_timer; const struct lapic_register reserved14; /* Thermal sensor register */ - const struct lapic_register reserved15; /* Performance counters register */ + struct lapic_register lvt_pmc; /* Performance counters register */ struct lapic_register lvt_lint0; struct lapic_register lvt_lint1; struct lapic_register lvt_error; @@ -239,6 +240,7 @@ lapic_setup_registers(void) lapic_write(&lapic_map->lvt_error, TRAP_LAPIC_ERROR); lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1); lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ); + lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF); } void __init @@ -333,6 +335,21 @@ lapic_ipi_broadcast(uint32_t vector) | (vector & LAPIC_ICR_VECTOR_MASK)); } +#ifdef CONFIG_PERFMON +void +lapic_pmc_overflow_intr(struct trap_frame *frame) +{ + (void)frame; + + lapic_eoi(); + + /* Reset the LVT entry as it is automatically cleared when triggered */ + lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF); + + perfmon_overflow_intr(); +} +#endif /* CONFIG_PERFMON */ + void lapic_timer_intr(struct trap_frame *frame) { diff --git a/arch/x86/machine/lapic.h b/arch/x86/machine/lapic.h index 6355da48..eac225d7 100644 --- a/arch/x86/machine/lapic.h +++ b/arch/x86/machine/lapic.h @@ -54,6 +54,7 @@ void lapic_ipi_broadcast(uint32_t vector); /* * Interrupt handlers. */ +void lapic_pmc_overflow_intr(struct trap_frame *frame); void lapic_timer_intr(struct trap_frame *frame); void lapic_error_intr(struct trap_frame *frame); void lapic_spurious_intr(struct trap_frame *frame); diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c new file mode 100644 index 00000000..c3e56429 --- /dev/null +++ b/arch/x86/machine/pmu_amd.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdint.h> + +#include <kern/clock.h> +#include <kern/init.h> +#include <kern/log.h> +#include <kern/macros.h> +#include <kern/perfmon.h> +#include <machine/cpu.h> +#include <machine/pmu_amd.h> + +/* + * AMD raw event IDs. + */ +#define PMU_AMD_RE_CYCLE 0 +#define PMU_AMD_RE_INSTRUCTION 1 +#define PMU_AMD_RE_CACHE_REF 2 +#define PMU_AMD_RE_CACHE_MISS 3 +#define PMU_AMD_RE_BRANCH 4 +#define PMU_AMD_RE_BRANCH_MISS 5 +#define PMU_AMD_RE_DCACHE_REF 6 +#define PMU_AMD_RE_DCACHE_MISS 7 +#define PMU_AMD_RE_IFETCH_STALL 8 +#define PMU_AMD_RE_INVALID ((unsigned int)-1) + +/* + * PMU MSR addresses + */ +#define PMU_AMD_MSR_PERFEVTSEL0 0xc0010000 +#define PMU_AMD_MSR_PERCTR0 0xc0010004 + +/* + * Event Select Register addresses + */ +#define PMU_AMD_EVTSEL_USR 0x00010000 +#define PMU_AMD_EVTSEL_OS 0x00020000 +#define PMU_AMD_EVTSEL_INT 0x00100000 +#define PMU_AMD_EVTSEL_EN 0x00400000 + +/* + * XXX These properties have the minimum values required by the architecture. + * TODO Per-family/model event availability database. + */ +#define PMU_AMD_NR_PMCS 4 +#define PMU_AMD_PMC_WIDTH 48 + +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ +struct pmu_amd { + unsigned int pmc_bm; +}; + +static struct pmu_amd pmu_amd; + +struct pmu_amd_event_code { + unsigned short event_select; + unsigned short umask; +}; + +/* + * TODO Per-family/model event availability database. + */ +static const struct pmu_amd_event_code pmu_amd_event_codes[] = { + [PMU_AMD_RE_CYCLE] = { 0x76, 0x00 }, + [PMU_AMD_RE_INSTRUCTION] = { 0xc0, 0x00 }, + [PMU_AMD_RE_CACHE_REF] = { 0x80, 0x00 }, + [PMU_AMD_RE_CACHE_MISS] = { 0x81, 0x00 }, + [PMU_AMD_RE_BRANCH] = { 0xc2, 0x00 }, + [PMU_AMD_RE_BRANCH_MISS] = { 0xc3, 0x00 }, + [PMU_AMD_RE_DCACHE_REF] = { 0x40, 0x00 }, + [PMU_AMD_RE_DCACHE_MISS] = { 0x41, 0x00 }, + [PMU_AMD_RE_IFETCH_STALL] = { 0x87, 0x00 }, +}; + +static const unsigned int pmu_amd_generic_events[] = { + [PERFMON_EV_CYCLE] = PMU_AMD_RE_CYCLE, + [PERFMON_EV_REF_CYCLE] = PMU_AMD_RE_INVALID, + [PERFMON_EV_INSTRUCTION] = PMU_AMD_RE_INSTRUCTION, + [PERFMON_EV_CACHE_REF] = PMU_AMD_RE_CACHE_REF, + [PERFMON_EV_CACHE_MISS] = PMU_AMD_RE_CACHE_MISS, + [PERFMON_EV_BRANCH] = PMU_AMD_RE_BRANCH, + [PERFMON_EV_BRANCH_MISS] = PMU_AMD_RE_BRANCH_MISS, +}; + +static struct pmu_amd * +pmu_amd_get(void) +{ + return &pmu_amd; +} + +static int +pmu_amd_translate(unsigned int *raw_event_idp, unsigned int event_id) +{ + assert(event_id < ARRAY_SIZE(pmu_amd_generic_events)); + + *raw_event_idp = pmu_amd_generic_events[event_id]; + return 0; +} + +static int +pmu_amd_alloc(unsigned int *pmc_idp, unsigned int pmc_index, + unsigned int raw_event_id) +{ + struct pmu_amd *pmu; + unsigned int pmc_id; + + /* TODO Per-family/model event availability database */ + + (void)pmc_index; + (void)raw_event_id; + + pmu = pmu_amd_get(); + + if (pmu->pmc_bm == 0) { + return EAGAIN; + } + + pmc_id = __builtin_ffs(pmu->pmc_bm) - 1; + pmu->pmc_bm &= ~(1U << pmc_id); + *pmc_idp = pmc_id; + + return 0; +} + +static void +pmu_amd_free(unsigned int pmc_id) +{ + struct pmu_amd *pmu; + unsigned int mask; + + assert(pmc_id < PMU_AMD_NR_PMCS); + + pmu = pmu_amd_get(); + mask = (1U << pmc_id); + assert(!(pmu->pmc_bm & mask)); + pmu->pmc_bm |= mask; +} + +static void +pmu_amd_start(unsigned int pmc_id, unsigned int raw_event_id) +{ + const struct pmu_amd_event_code *code; + uint32_t high, low; + + assert(pmc_id < PMU_AMD_NR_PMCS); + assert(raw_event_id < ARRAY_SIZE(pmu_amd_event_codes)); + + code = &pmu_amd_event_codes[raw_event_id]; + + /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ + high = code->event_select >> 8; + low = PMU_AMD_EVTSEL_EN + | PMU_AMD_EVTSEL_OS + | PMU_AMD_EVTSEL_USR + | (code->umask << 8) + | (code->event_select & 0xff); + cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, high, low); +} + +static void +pmu_amd_stop(unsigned int pmc_id) +{ + assert(pmc_id < PMU_AMD_NR_PMCS); + + cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, 0, 0); +} + +static uint64_t +pmu_amd_read(unsigned int pmc_id) +{ + assert(pmc_id < PMU_AMD_NR_PMCS); + + return cpu_get_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id); +} + +static const struct perfmon_dev_ops pmu_amd_ops = { + .translate = pmu_amd_translate, + .alloc = pmu_amd_alloc, + .free = pmu_amd_free, + .start = pmu_amd_start, + .stop = pmu_amd_stop, + .read = pmu_amd_read, +}; + +static struct perfmon_dev pmu_amd_dev __read_mostly; + +static int __init +pmu_amd_setup(void) +{ + const struct cpu *cpu; + struct pmu_amd *pmu; + + cpu = cpu_current(); + + if (cpu->vendor_id != CPU_VENDOR_AMD) { + return ENODEV; + } + + if (cpu->family < 0x10) { + return ENODEV; + } + + pmu = pmu_amd_get(); + pmu->pmc_bm = (1U << PMU_AMD_NR_PMCS) - 1; + + pmu_amd_dev.ops = &pmu_amd_ops; + pmu_amd_dev.pmc_width = PMU_AMD_PMC_WIDTH; + perfmon_register(&pmu_amd_dev); + log_info("pmu: amd, nr_pmcs:%u pmc_width:%u", + PMU_AMD_NR_PMCS, PMU_AMD_PMC_WIDTH); + return 0; +} + +INIT_OP_DEFINE(pmu_amd_setup, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(log_setup, true), + INIT_OP_DEP(perfmon_bootstrap, true)); diff --git a/arch/x86/machine/pmu_amd.h b/arch/x86/machine/pmu_amd.h new file mode 100644 index 00000000..db74355c --- /dev/null +++ b/arch/x86/machine/pmu_amd.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 Remy Noel. + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * PMU driver for AMD processors. + */ + +#ifndef X86_PMU_AMD_H +#define X86_PMU_AMD_H + +#include <kern/init.h> + +/* + * This init operation provides : + * - module fully initialized + */ +INIT_OP_DECLARE(pmu_amd_setup); + +#endif /* X86_PMU_AMD_H */ diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c new file mode 100644 index 00000000..f2a26499 --- /dev/null +++ b/arch/x86/machine/pmu_intel.c @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdint.h> + +#include <kern/clock.h> +#include <kern/init.h> +#include <kern/log.h> +#include <kern/perfmon.h> +#include <kern/percpu.h> +#include <machine/cpu.h> +#include <machine/pmu_intel.h> + +/* + * Intel raw event IDs. + */ +#define PMU_INTEL_RE_CYCLE 0 +#define PMU_INTEL_RE_REF_CYCLE 1 +#define PMU_INTEL_RE_INSTRUCTION 2 +#define PMU_INTEL_RE_CACHE_REF 3 +#define PMU_INTEL_RE_CACHE_MISS 4 +#define PMU_INTEL_RE_BRANCH 5 +#define PMU_INTEL_RE_BRANCH_MISS 6 + +/* + * PMU MSR addresses + */ +#define PMU_INTEL_MSR_PMC0 0x0c1 +#define PMU_INTEL_MSR_EVTSEL0 0x186 + +/* + * V2 MSR addresses + */ +#define PMU_INTEL_MSR_GLOBAL_STATUS 0x038e +#define PMU_INTEL_MSR_GLOBAL_CTRL 0x038f +#define PMU_INTEL_MSR_GLOBAL_OVF_CTRL 0x0390 + +/* + * Event Select Register addresses + */ +#define PMU_INTEL_EVTSEL_USR 0x00010000 +#define PMU_INTEL_EVTSEL_OS 0x00020000 +#define PMU_INTEL_EVTSEL_INT 0x00100000 +#define PMU_INTEL_EVTSEL_EN 0x00400000 + +#define PMU_INTEL_ID_VERSION_MASK 0x000000ff +#define PMU_INTEL_ID_NR_PMCS_MASK 0x0000ff00 +#define PMU_INTEL_ID_NR_PMCS_OFFSET 8 +#define PMU_INTEL_ID_PMC_WIDTH_MASK 0x00ff0000 +#define PMU_INTEL_ID_PMC_WIDTH_OFFSET 16 +#define PMU_INTEL_ID_EVLEN_MASK 0xff000000 +#define PMU_INTEL_ID_EVLEN_OFFSET 24 +#define PMU_INTEL_ID_EVLEN_MAX 7 + +#define PMU_INTEL_MAX_NR_PMCS 8 + +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ +struct pmu_intel { + unsigned int version; + unsigned int nr_pmcs; + unsigned int pmc_bm; + unsigned int pmc_indexes[PMU_INTEL_MAX_NR_PMCS]; + unsigned int pmc_width; + unsigned int events; +}; + +static struct pmu_intel pmu_intel; + +/* + * Intel hardware events. + */ +#define PMU_INTEL_EVENT_CYCLE 0x01 +#define PMU_INTEL_EVENT_INSTRUCTION 0x02 +#define PMU_INTEL_EVENT_REF_CYCLE 0x04 +#define PMU_INTEL_EVENT_CACHE_REF 0x08 +#define PMU_INTEL_EVENT_CACHE_MISS 0x10 +#define PMU_INTEL_EVENT_BRANCH 0x20 +#define PMU_INTEL_EVENT_BRANCH_MISS 0x40 + +struct pmu_intel_event_code { + unsigned int hw_event_id; + unsigned short event_select; + unsigned short umask; +}; + +static const unsigned int pmu_intel_raw_events[] = { + [PERFMON_EV_CYCLE] = PMU_INTEL_RE_CYCLE, + [PERFMON_EV_REF_CYCLE] = PMU_INTEL_RE_REF_CYCLE, + [PERFMON_EV_INSTRUCTION] = PMU_INTEL_RE_INSTRUCTION, + [PERFMON_EV_CACHE_REF] = PMU_INTEL_RE_CACHE_REF, + [PERFMON_EV_CACHE_MISS] = PMU_INTEL_RE_CACHE_MISS, + [PERFMON_EV_BRANCH] = PMU_INTEL_RE_BRANCH, + [PERFMON_EV_BRANCH_MISS] = PMU_INTEL_RE_BRANCH_MISS, +}; + +static const struct pmu_intel_event_code pmu_intel_event_codes[] = { + [PMU_INTEL_RE_CYCLE] = { PMU_INTEL_EVENT_CYCLE, 0x3c, 0x00 }, + [PMU_INTEL_RE_REF_CYCLE] = { PMU_INTEL_EVENT_REF_CYCLE, 0x3c, 0x01 }, + [PMU_INTEL_RE_INSTRUCTION] = { PMU_INTEL_EVENT_INSTRUCTION, 0xc0, 0x00 }, + [PMU_INTEL_RE_CACHE_REF] = { PMU_INTEL_EVENT_CACHE_REF, 0x2e, 0x4f }, + [PMU_INTEL_RE_CACHE_MISS] = { PMU_INTEL_EVENT_CACHE_MISS, 0x2e, 0x41 }, + [PMU_INTEL_RE_BRANCH] = { PMU_INTEL_EVENT_BRANCH, 0xc4, 0x00 }, + [PMU_INTEL_RE_BRANCH_MISS] = { PMU_INTEL_EVENT_BRANCH_MISS, 0xc5, 0x00 }, +}; + +static struct pmu_intel * +pmu_intel_get(void) +{ + return &pmu_intel; +} + +static uint64_t +pmu_intel_get_status(void) +{ + return cpu_get_msr64(PMU_INTEL_MSR_GLOBAL_STATUS); +} + +static void +pmu_intel_ack_status(uint64_t status) +{ + return cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_OVF_CTRL, status); +} + +/* + * TODO Use the compiler built-in once libgcc is linked again. + */ +static unsigned int +pmu_popcount(unsigned int bits) +{ + unsigned int count; + + count = 0; + + while (bits) { + if (bits & 1) { + count++; + } + + bits >>= 1; + } + + return count; +} + +static int +pmu_intel_translate(unsigned int *raw_event_idp, unsigned event_id) +{ + if (event_id >= ARRAY_SIZE(pmu_intel_raw_events)) { + return EINVAL; + } + + *raw_event_idp = pmu_intel_raw_events[event_id]; + return 0; +} + +static int +pmu_intel_alloc(unsigned int *pmc_idp, unsigned int pmc_index, + unsigned int raw_event_id) +{ + struct pmu_intel *pmu; + unsigned int pmc_id; + unsigned int hw_event_id; + + assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes)); + + pmu = pmu_intel_get(); + hw_event_id = pmu_intel_event_codes[raw_event_id].hw_event_id; + + if (!(pmu->events & hw_event_id)) { + return EINVAL; + } + + if (pmu->pmc_bm == 0) { + return EAGAIN; + } + + pmc_id = __builtin_ffs(pmu->pmc_bm) - 1; + assert(pmc_id < ARRAY_SIZE(pmu->pmc_indexes)); + pmu->pmc_indexes[pmc_id] = pmc_index; + pmu->pmc_bm &= ~(1U << pmc_id); + *pmc_idp = pmc_id; + return 0; +} + +static void +pmu_intel_free(unsigned int pmc_id) +{ + struct pmu_intel *pmu; + unsigned int mask; + + pmu = pmu_intel_get(); + mask = (1U << pmc_id); + assert(!(pmu->pmc_bm & mask)); + pmu->pmc_bm |= mask; +} + +static void +pmu_intel_start(unsigned int pmc_id, unsigned int raw_event_id) +{ + const struct pmu_intel_event_code *code; + struct pmu_intel *pmu; + uint32_t evtsel; + + assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes)); + + code = &pmu_intel_event_codes[raw_event_id]; + pmu = pmu_intel_get(); + + /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ + evtsel = PMU_INTEL_EVTSEL_EN + | PMU_INTEL_EVTSEL_OS + | PMU_INTEL_EVTSEL_USR + | (code->umask << 8) + | code->event_select; + + if (pmu->version >= 2) { + evtsel |= PMU_INTEL_EVTSEL_INT; + } + + cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, evtsel); +} + +static void +pmu_intel_stop(unsigned int pmc_id) +{ + cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, 0); +} + +static uint64_t +pmu_intel_read(unsigned int pmc_id) +{ + return cpu_get_msr64(PMU_INTEL_MSR_PMC0 + pmc_id); +} + +static int +pmu_intel_consume_bits(uint64_t *bits) +{ + int bit; + + bit = __builtin_ffsll(*bits) - 1; + + if (bit < 0) { + return bit; + } + + *bits &= ~(1U << bit); + return bit; +} + +static void +pmu_intel_handle_overflow_intr(void) +{ + struct pmu_intel *pmu; + unsigned int pmc_index; + uint64_t status; + int pmc_id; + + status = pmu_intel_get_status(); + + if (status == 0) { + return; + } + + pmu_intel_ack_status(status); + pmu = pmu_intel_get(); + + status &= ((1ULL << pmu->pmc_width) - 1); + + for (;;) { + pmc_id = pmu_intel_consume_bits(&status); + + if (pmc_id < 0) { + break; + } + + pmc_index = pmu->pmc_indexes[pmc_id]; + perfmon_report_overflow(pmc_index); + } +} + +static struct perfmon_dev_ops pmu_intel_ops __read_mostly = { + .translate = pmu_intel_translate, + .alloc = pmu_intel_alloc, + .free = pmu_intel_free, + .start = pmu_intel_start, + .stop = pmu_intel_stop, + .read = pmu_intel_read, +}; + +static struct perfmon_dev pmu_intel_dev __read_mostly; + +static void +pmu_intel_percpu_init(void) +{ + const struct pmu_intel *pmu; + uint64_t pmc_mask; + + pmu = pmu_intel_get(); + + pmc_mask = (1U << pmu->nr_pmcs) - 1; + cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_CTRL, 0x700000000 | pmc_mask); +} + +static struct percpu_op pmu_intel_percpu_op = \ + PERCPU_OP_INITIALIZER(pmu_intel_percpu_init); + +static int __init +pmu_intel_setup(void) +{ + unsigned int eax, ebx, ecx, edx, ev_len; + const struct cpu *cpu; + struct pmu_intel *pmu; + + cpu = cpu_current(); + eax = 0xa; + + if (cpu->vendor_id != CPU_VENDOR_INTEL) { + return 0; + } + + if (cpu->cpuid_max_basic < eax) { + return ENODEV; + } + + pmu = pmu_intel_get(); + cpu_cpuid(&eax, &ebx, &ecx, &edx); + pmu->version = eax & PMU_INTEL_ID_VERSION_MASK; + + if (pmu->version == 0) { + return ENODEV; + } + + pmu->nr_pmcs = (eax & PMU_INTEL_ID_NR_PMCS_MASK) + >> PMU_INTEL_ID_NR_PMCS_OFFSET; + + if (pmu->nr_pmcs > ARRAY_SIZE(pmu->pmc_indexes)) { + log_err("pmu: invalid number of PMCs (%u)", pmu->nr_pmcs); + return ENODEV; + } + + pmu->pmc_bm = (1U << pmu->nr_pmcs ) - 1; + pmu->pmc_width = (eax & PMU_INTEL_ID_PMC_WIDTH_MASK) + >> PMU_INTEL_ID_PMC_WIDTH_OFFSET; + ev_len = (eax & PMU_INTEL_ID_EVLEN_MASK) >> PMU_INTEL_ID_EVLEN_OFFSET; + + assert(ev_len <= PMU_INTEL_ID_EVLEN_MAX); + + pmu->events = ~ebx & ((1U << ev_len) - 1); + + pmu_intel_dev.ops = &pmu_intel_ops; + pmu_intel_dev.pmc_width = pmu->pmc_width; + + if (pmu->version >= 2) { + percpu_register_op(&pmu_intel_percpu_op); + pmu_intel_ops.handle_overflow_intr = pmu_intel_handle_overflow_intr; + } + + perfmon_register(&pmu_intel_dev); + log_info("pmu: intel v%d, nr_pmcs:%u pmc_width:%u events:%#x nr_events:%u", + pmu->version, pmu->nr_pmcs, pmu->pmc_width, pmu->events, + pmu_popcount(pmu->events)); + return 0; +} + +INIT_OP_DEFINE(pmu_intel_setup, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(log_setup, true), + INIT_OP_DEP(percpu_setup, true), + INIT_OP_DEP(perfmon_bootstrap, true)); diff --git a/arch/x86/machine/pmu_intel.h b/arch/x86/machine/pmu_intel.h new file mode 100644 index 00000000..400017c6 --- /dev/null +++ b/arch/x86/machine/pmu_intel.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018 Remy Noel. + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * PMU driver for Intel processors. + */ + +#ifndef X86_PMU_INTEL_H +#define X86_PMU_INTEL_H + +#include <kern/init.h> + +/* + * This init operation provides : + * - module fully initialized + */ +INIT_OP_DECLARE(pmu_intel_setup); + +#endif /* X86_PMU_INTEL_H */ diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c index 534b3f6f..90c8bf66 100644 --- a/arch/x86/machine/trap.c +++ b/arch/x86/machine/trap.c @@ -210,6 +210,9 @@ trap_setup(void) trap_install(TRAP_XCALL, TRAP_HF_INTR, cpu_xcall_intr); trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_INTR, cpu_thread_schedule_intr); trap_install(TRAP_CPU_HALT, TRAP_HF_INTR, cpu_halt_intr); +#ifdef CONFIG_PERFMON + trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, lapic_pmc_overflow_intr); +#endif trap_install(TRAP_LAPIC_TIMER, TRAP_HF_INTR, lapic_timer_intr); trap_install(TRAP_LAPIC_ERROR, TRAP_HF_INTR, lapic_error_intr); trap_install(TRAP_LAPIC_SPURIOUS, TRAP_HF_INTR, lapic_spurious_intr); diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h index af6fd6b5..c5bdc1f2 100644 --- a/arch/x86/machine/trap.h +++ b/arch/x86/machine/trap.h @@ -62,6 +62,7 @@ #define TRAP_XCALL 238 #define TRAP_THREAD_SCHEDULE 239 #define TRAP_CPU_HALT 240 +#define TRAP_LAPIC_PMC_OF 252 #define TRAP_LAPIC_TIMER 253 #define TRAP_LAPIC_ERROR 254 #define TRAP_LAPIC_SPURIOUS 255 diff --git a/doc/intro.9.txt b/doc/intro.9.txt index 281db50e..6fcd9618 100644 --- a/doc/intro.9.txt +++ b/doc/intro.9.txt @@ -153,6 +153,8 @@ module:kern/list:: Doubly-linked list. module:kern/macros:: Useful generic macros. +module:kern/perfmon:: + Performance monitoring. module:kern/rbtree:: Red-black tree. module:kern/rdxtree:: diff --git a/kern/Kconfig b/kern/Kconfig index 7dd04a6a..ea61937f 100644 --- a/kern/Kconfig +++ b/kern/Kconfig @@ -94,6 +94,19 @@ config THREAD_STACK_GUARD If unsure, disable. +config PERFMON + def_bool n + +config PERFMON_MAX_PMCS + int "Number of performance monitoring counters" + default 8 + depends on PERFMON + ---help--- + Number of performance monitoring counters. + + This value affects the minimum duration of some critical sections + that run with interrupts disabled. + endmenu menu "Debugging" diff --git a/kern/Makefile b/kern/Makefile index ab7d6b59..5b04fcb3 100644 --- a/kern/Makefile +++ b/kern/Makefile @@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c + +x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c diff --git a/kern/percpu.c b/kern/percpu.c index 53861a30..f344bd70 100644 --- a/kern/percpu.c +++ b/kern/percpu.c @@ -26,6 +26,7 @@ #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> +#include <kern/slist.h> #include <machine/cpu.h> #include <vm/vm_kmem.h> #include <vm/vm_page.h> @@ -36,6 +37,14 @@ static void *percpu_area_content __initdata; static size_t percpu_area_size __initdata; static int percpu_skip_warning __initdata; +static struct slist percpu_ops __initdata; + +static void __init +percpu_op_run(const struct percpu_op *op) +{ + op->fn(); +} + static int __init percpu_bootstrap(void) { @@ -51,6 +60,8 @@ percpu_setup(void) struct vm_page *page; unsigned int order; + slist_init(&percpu_ops); + percpu_area_size = &_percpu_end - &_percpu; log_info("percpu: max_cpus: %u, section size: %zuk", CONFIG_MAX_CPUS, percpu_area_size >> 10); @@ -76,6 +87,15 @@ INIT_OP_DEFINE(percpu_setup, INIT_OP_DEP(percpu_bootstrap, true), INIT_OP_DEP(vm_page_setup, true)); +void __init +percpu_register_op(struct percpu_op *op) +{ + slist_insert_tail(&percpu_ops, &op->node); + + /* Run on BSP */ + percpu_op_run(op); +} + int __init percpu_add(unsigned int cpu) { @@ -116,6 +136,16 @@ out: return 0; } +void __init +percpu_ap_setup(void) +{ + struct percpu_op *op; + + slist_for_each_entry(&percpu_ops, op, node) { + percpu_op_run(op); + } +} + static int __init percpu_cleanup(void) { diff --git a/kern/percpu.h b/kern/percpu.h index 96f706ea..f77e7fd8 100644 --- a/kern/percpu.h +++ b/kern/percpu.h @@ -59,10 +59,26 @@ #include <kern/init.h> #include <kern/macros.h> +#include <kern/slist_types.h> #define PERCPU_SECTION .percpu #define __percpu __section(QUOTE(PERCPU_SECTION)) +typedef void (*percpu_op_fn_t)(void); + +/* + * Per-CPU operation. + * + * These operations allow initialization code to register functions to be run + * on APs when they're started. + */ +struct percpu_op { + struct slist_node node; + percpu_op_fn_t fn; +}; + +#define PERCPU_OP_INITIALIZER(op_fn) { .fn = op_fn } + /* * Boundaries of the percpu section. * @@ -96,6 +112,15 @@ percpu_area(unsigned int cpu) } /* + * Register a percpu operation to be run on all processors when + * they're started. + * + * The operation is run on the BSP when it's registered. It's run as late as + * possible on APs, normally right before scheduling is enabled. + */ +void percpu_register_op(struct percpu_op *op); + +/* * Register a processor. * * This function creates a percpu area from kernel virtual memory for the @@ -105,6 +130,11 @@ percpu_area(unsigned int cpu) int percpu_add(unsigned int cpu); /* + * Run registered percpu operations on an AP. + */ +void percpu_ap_setup(void); + +/* * This init operation provides : * - access to percpu variables on processor 0 */ @@ -112,6 +142,7 @@ INIT_OP_DECLARE(percpu_bootstrap); /* * This init operation provides : + * - percpu operations can be registered * - new percpu areas can be created * * The dependency that provides access to percpu variables on all processors diff --git a/kern/perfmon.c b/kern/perfmon.c new file mode 100644 index 00000000..6fd319e8 --- /dev/null +++ b/kern/perfmon.c @@ -0,0 +1,1443 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Locking order : + * + * thread_runq -+ + * | + * event -+-> interrupts -+-> td + * | + * +-> pmu + * + * TODO Kernel/user mode seggregation. + */ + +#include <assert.h> +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include <kern/clock.h> +#include <kern/init.h> +#include <kern/list.h> +#include <kern/log.h> +#include <kern/macros.h> +#include <kern/percpu.h> +#include <kern/perfmon.h> +#include <kern/perfmon_types.h> +#include <kern/spinlock.h> +#include <kern/syscnt.h> +#include <kern/thread.h> +#include <kern/timer.h> +#include <kern/xcall.h> +#include <machine/boot.h> +#include <machine/cpu.h> + +/* + * Minimum hardware counter poll interval, in milliseconds. + * + * The main purpose of polling hardware counters is to detect overflows + * when the driver is unable to reliably use overflow interrupts. + */ +#define PERFMON_MIN_POLL_INTERVAL 50 + +/* + * Internal event flags. + */ +#define PERFMON_EF_TYPE_CPU 0x100 +#define PERFMON_EF_ATTACHED 0x200 +#define PERFMON_EF_PUBLIC_MASK (PERFMON_EF_KERN \ + | PERFMON_EF_USER \ + | PERFMON_EF_RAW) + +/* + * Per-CPU performance monitoring counter. + * + * When an event is attached to a processor, the matching per-CPU PMC get + * referenced. When a per-CPU PMC is referenced, its underlying hardware + * counter is active. + * + * Interrupts and preemption must be disabled on access. + */ +struct perfmon_cpu_pmc { + unsigned int nr_refs; + unsigned int pmc_id; + unsigned int raw_event_id; + uint64_t raw_value; + uint64_t value; +}; + +/* + * Per-CPU performance monitoring unit. + * + * Per-CPU PMCs are indexed the same way as global PMCs. + * + * Interrupts and preemption must be disabled on access. + */ +struct perfmon_cpu_pmu { + struct perfmon_dev *dev; + unsigned int cpu; + struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS]; + struct timer poll_timer; + struct syscnt sc_nr_overflows; +}; + +/* + * Performance monitoring counter. + * + * When a PMC is used, it maps a raw event to a hardware counter. + * A PMC is used if and only if its reference counter isn't zero. + */ +struct perfmon_pmc { + unsigned int nr_refs; + unsigned int pmc_id; + unsigned int raw_event_id; +}; + +/* + * Performance monitoring unit. + * + * There is a single system-wide logical PMU, used to globally allocate + * PMCs. Reserving a counter across the entire system ensures thread + * migration isn't hindered by performance monitoring. + * + * Locking the global PMU is only required when allocating or releasing + * a PMC. Once allocated, the PMC may safely be accessed without hodling + * the lock. + */ +struct perfmon_pmu { + struct perfmon_dev *dev; + struct spinlock lock; + struct perfmon_pmc pmcs[PERFMON_MAX_PMCS]; +}; + +static struct perfmon_pmu perfmon_pmu; +static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu; + +static struct perfmon_pmu * +perfmon_get_pmu(void) +{ + return &perfmon_pmu; +} + +static struct perfmon_cpu_pmu * +perfmon_get_local_cpu_pmu(void) +{ + assert(!thread_preempt_enabled()); + return cpu_local_ptr(perfmon_cpu_pmu); +} + +static struct perfmon_cpu_pmu * +perfmon_get_cpu_pmu(unsigned int cpu) +{ + return percpu_ptr(perfmon_cpu_pmu, cpu); +} + +static void __init +perfmon_pmc_init(struct perfmon_pmc *pmc) +{ + pmc->nr_refs = 0; +} + +static bool +perfmon_pmc_used(const struct perfmon_pmc *pmc) +{ + return pmc->nr_refs != 0; +} + +static unsigned int +perfmon_pmc_id(const struct perfmon_pmc *pmc) +{ + return pmc->pmc_id; +} + +static unsigned int +perfmon_pmc_raw_event_id(const struct perfmon_pmc *pmc) +{ + return pmc->raw_event_id; +} + +static void +perfmon_pmc_use(struct perfmon_pmc *pmc, unsigned int pmc_id, + unsigned int raw_event_id) +{ + assert(!perfmon_pmc_used(pmc)); + + pmc->nr_refs = 1; + pmc->pmc_id = pmc_id; + pmc->raw_event_id = raw_event_id; +} + +static void +perfmon_pmc_ref(struct perfmon_pmc *pmc) +{ + assert(perfmon_pmc_used(pmc)); + pmc->nr_refs++; +} + +static void +perfmon_pmc_unref(struct perfmon_pmc *pmc) +{ + assert(perfmon_pmc_used(pmc)); + pmc->nr_refs--; +} + +static unsigned int +perfmon_pmu_get_pmc_index(const struct perfmon_pmu *pmu, + const struct perfmon_pmc *pmc) +{ + size_t pmc_index; + + pmc_index = pmc - pmu->pmcs; + assert(pmc_index < ARRAY_SIZE(pmu->pmcs)); + return pmc_index; +} + +static struct perfmon_pmc * +perfmon_pmu_get_pmc(struct perfmon_pmu *pmu, unsigned int index) +{ + assert(index < ARRAY_SIZE(pmu->pmcs)); + return &pmu->pmcs[index]; +} + +static void __init +perfmon_pmu_init(struct perfmon_pmu *pmu) +{ + pmu->dev = NULL; + spinlock_init(&pmu->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + perfmon_pmc_init(perfmon_pmu_get_pmc(pmu, i)); + } +} + +static void __init +perfmon_pmu_set_dev(struct perfmon_pmu *pmu, struct perfmon_dev *dev) +{ + assert(dev); + assert(!pmu->dev); + pmu->dev = dev; +} + +static struct perfmon_dev * +perfmon_pmu_get_dev(const struct perfmon_pmu *pmu) +{ + return pmu->dev; +} + +static void +perfmon_pmu_handle_overflow_intr(const struct perfmon_pmu *pmu) +{ + pmu->dev->ops->handle_overflow_intr(); +} + +static int +perfmon_pmu_translate(const struct perfmon_pmu *pmu, + unsigned int *raw_event_id, + unsigned int event_id) +{ + if (!pmu->dev) { + return ENODEV; + } + + return pmu->dev->ops->translate(raw_event_id, event_id); +} + +static int +perfmon_pmu_alloc_pmc_id(const struct perfmon_pmu *pmu, + unsigned int *pmc_idp, + unsigned int pmc_index, + unsigned int raw_event_id) +{ + unsigned int pmc_id; + int error; + + if (!pmu->dev) { + return ENODEV; + } + + error = pmu->dev->ops->alloc(&pmc_id, pmc_index, raw_event_id); + + if (error) { + return error; + } + + *pmc_idp = pmc_id; + return 0; +} + +static void +perfmon_pmu_free_pmc_id(const struct perfmon_pmu *pmu, unsigned int pmc_id) +{ + assert(pmu->dev); + pmu->dev->ops->free(pmc_id); +} + +static struct perfmon_pmc * +perfmon_pmu_find_unused_pmc(struct perfmon_pmu *pmu) +{ + struct perfmon_pmc *pmc; + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + pmc = perfmon_pmu_get_pmc(pmu, i); + + if (!perfmon_pmc_used(pmc)) { + return pmc; + } + } + + return NULL; +} + +static int +perfmon_pmu_alloc_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp, + unsigned int raw_event_id) +{ + unsigned int pmc_id = 0, pmc_index; + struct perfmon_pmc *pmc; + int error; + + pmc = perfmon_pmu_find_unused_pmc(pmu); + + if (!pmc) { + return EAGAIN; + } + + pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc); + error = perfmon_pmu_alloc_pmc_id(pmu, &pmc_id, pmc_index, raw_event_id); + + if (error) { + return error; + } + + perfmon_pmc_use(pmc, pmc_id, raw_event_id); + *pmcp = pmc; + return 0; +} + +static void +perfmon_pmu_free_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc) +{ + unsigned int pmc_id; + + assert(!perfmon_pmc_used(pmc)); + pmc_id = perfmon_pmc_id(pmc); + perfmon_pmu_free_pmc_id(pmu, pmc_id); +} + +static struct perfmon_pmc * +perfmon_pmu_get_pmc_by_raw_event_id(struct perfmon_pmu *pmu, + unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + + for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) { + pmc = perfmon_pmu_get_pmc(pmu, i); + + if (!perfmon_pmc_used(pmc)) { + continue; + } + + if (perfmon_pmc_raw_event_id(pmc) == raw_event_id) { + return pmc; + } + } + + return NULL; +} + +static int +perfmon_pmu_take_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp, + unsigned int raw_event_id) +{ + struct perfmon_pmc *pmc; + int error; + + spinlock_lock(&pmu->lock); + + pmc = perfmon_pmu_get_pmc_by_raw_event_id(pmu, raw_event_id); + + if (pmc) { + perfmon_pmc_ref(pmc); + error = 0; + } else { + error = perfmon_pmu_alloc_pmc(pmu, &pmc, raw_event_id); + + if (error) { + pmc = NULL; + } + } + + spinlock_unlock(&pmu->lock); + + if (error) { + return error; + } + + *pmcp = pmc; + return 0; +} + +static void +perfmon_pmu_put_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc) +{ + spinlock_lock(&pmu->lock); + + perfmon_pmc_unref(pmc); + + if (!perfmon_pmc_used(pmc)) { + perfmon_pmu_free_pmc(pmu, pmc); + } + + spinlock_unlock(&pmu->lock); +} + +static int +perfmon_check_event_args(unsigned int id, unsigned int flags) +{ + if (!((flags & PERFMON_EF_PUBLIC_MASK) == flags) + || !((flags & PERFMON_EF_RAW) || (id < PERFMON_NR_GENERIC_EVENTS)) + || !((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)))) { + return EINVAL; + } + + return 0; +} + +int +perfmon_event_init(struct perfmon_event *event, unsigned int id, + unsigned int flags) +{ + int error; + + error = perfmon_check_event_args(id, flags); + + if (error) { + return error; + } + + spinlock_init(&event->lock); + event->flags = flags; + event->id = id; + event->value = 0; + return 0; +} + +static bool +perfmon_event_type_cpu(const struct perfmon_event *event) +{ + return event->flags & PERFMON_EF_TYPE_CPU; +} + +static void +perfmon_event_set_type_cpu(struct perfmon_event *event) +{ + event->flags |= PERFMON_EF_TYPE_CPU; +} + +static void +perfmon_event_clear_type_cpu(struct perfmon_event *event) +{ + event->flags &= ~PERFMON_EF_TYPE_CPU; +} + +static bool +perfmon_event_attached(const struct perfmon_event *event) +{ + return event->flags & PERFMON_EF_ATTACHED; +} + +static unsigned int +perfmon_event_pmc_index(const struct perfmon_event *event) +{ + assert(perfmon_event_attached(event)); + return event->pmc_index; +} + +static void __init +perfmon_cpu_pmc_init(struct perfmon_cpu_pmc *cpu_pmc) +{ + cpu_pmc->nr_refs = 0; +} + +static bool +perfmon_cpu_pmc_used(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->nr_refs != 0; +} + +static void +perfmon_cpu_pmc_use(struct perfmon_cpu_pmc *cpu_pmc, unsigned int pmc_id, + unsigned int raw_event_id, uint64_t raw_value) +{ + assert(!perfmon_cpu_pmc_used(cpu_pmc)); + + cpu_pmc->nr_refs = 1; + cpu_pmc->pmc_id = pmc_id; + cpu_pmc->raw_event_id = raw_event_id; + cpu_pmc->raw_value = raw_value; + cpu_pmc->value = 0; +} + +static void +perfmon_cpu_pmc_ref(struct perfmon_cpu_pmc *cpu_pmc) +{ + assert(perfmon_cpu_pmc_used(cpu_pmc)); + cpu_pmc->nr_refs++; +} + +static void +perfmon_cpu_pmc_unref(struct perfmon_cpu_pmc *cpu_pmc) +{ + assert(perfmon_cpu_pmc_used(cpu_pmc)); + cpu_pmc->nr_refs--; +} + +static unsigned int +perfmon_cpu_pmc_id(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->pmc_id; +} + +static bool +perfmon_cpu_pmc_update(struct perfmon_cpu_pmc *cpu_pmc, uint64_t raw_value, + unsigned int pmc_width) +{ + bool overflowed; + uint64_t delta; + + delta = raw_value - cpu_pmc->raw_value; + + if (pmc_width == 64) { + overflowed = false; + } else { + if (raw_value >= cpu_pmc->raw_value) { + overflowed = false; + } else { + overflowed = true; + delta += (uint64_t)1 << pmc_width; + } + } + + cpu_pmc->value += delta; + cpu_pmc->raw_value = raw_value; + return overflowed; +} + +static uint64_t +perfmon_cpu_pmc_get_value(const struct perfmon_cpu_pmc *cpu_pmc) +{ + return cpu_pmc->value; +} + +static struct perfmon_cpu_pmc * +perfmon_cpu_pmu_get_pmc(struct perfmon_cpu_pmu *cpu_pmu, unsigned int index) +{ + assert(index < ARRAY_SIZE(cpu_pmu->pmcs)); + return &cpu_pmu->pmcs[index]; +} + +static void +perfmon_cpu_pmu_start(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id, + unsigned int raw_event_id) +{ + cpu_pmu->dev->ops->start(pmc_id, raw_event_id); +} + +static void +perfmon_cpu_pmu_stop(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id) +{ + cpu_pmu->dev->ops->stop(pmc_id); +} + +static uint64_t +perfmon_cpu_pmu_read(const struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id) +{ + return cpu_pmu->dev->ops->read(pmc_id); +} + +static void +perfmon_cpu_pmu_use_pmc(struct perfmon_cpu_pmu *cpu_pmu, + struct perfmon_cpu_pmc *cpu_pmc, + unsigned int pmc_id, + unsigned int raw_event_id) +{ + uint64_t raw_value; + + perfmon_cpu_pmu_start(cpu_pmu, pmc_id, raw_event_id); + raw_value = perfmon_cpu_pmu_read(cpu_pmu, pmc_id); + perfmon_cpu_pmc_use(cpu_pmc, pmc_id, raw_event_id, raw_value); +} + +static void +perfmon_cpu_pmu_update_pmc(struct perfmon_cpu_pmu *cpu_pmu, + struct perfmon_cpu_pmc *cpu_pmc) +{ + uint64_t raw_value; + bool overflowed; + + raw_value = perfmon_cpu_pmu_read(cpu_pmu, perfmon_cpu_pmc_id(cpu_pmc)); + overflowed = perfmon_cpu_pmc_update(cpu_pmc, raw_value, + cpu_pmu->dev->pmc_width); + + if (overflowed) { + syscnt_inc(&cpu_pmu->sc_nr_overflows); + } +} + +static void +perfmon_cpu_pmu_check_overflow(void *arg) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmu = arg; + assert(cpu_pmu->cpu == cpu_id()); + + for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, i); + + if (!perfmon_cpu_pmc_used(cpu_pmc)) { + continue; + } + + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + } +} + +static void +perfmon_cpu_pmu_poll(struct timer *timer) +{ + struct perfmon_cpu_pmu *cpu_pmu; + + cpu_pmu = structof(timer, struct perfmon_cpu_pmu, poll_timer); + xcall_call(perfmon_cpu_pmu_check_overflow, cpu_pmu, cpu_pmu->cpu); + timer_schedule(timer, timer_get_time(timer) + cpu_pmu->dev->poll_interval); +} + +static void __init +perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu, unsigned int cpu, + struct perfmon_dev *dev) +{ + char name[SYSCNT_NAME_SIZE]; + + cpu_pmu->dev = dev; + cpu_pmu->cpu = cpu; + + for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { + perfmon_cpu_pmc_init(perfmon_cpu_pmu_get_pmc(cpu_pmu, i)); + } + + if (dev->ops->handle_overflow_intr == NULL) { + assert(dev->poll_interval != 0); + + /* + * XXX Ideally, this would be an interrupt timer instead of a high + * priority one, but it can't be because the handler performs + * cross-calls to remote processors, which requires that interrupts + * be enabled. This is one potential user of CPU-bound timers. + */ + timer_init(&cpu_pmu->poll_timer, perfmon_cpu_pmu_poll, TIMER_HIGH_PRIO); + timer_schedule(&cpu_pmu->poll_timer, dev->poll_interval); + } + + snprintf(name, sizeof(name), "perfmon_nr_overflows/%u", cpu); + syscnt_register(&cpu_pmu->sc_nr_overflows, name); +} + +static uint64_t +perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index, + unsigned int pmc_id, unsigned int raw_event_id) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + + if (perfmon_cpu_pmc_used(cpu_pmc)) { + perfmon_cpu_pmc_ref(cpu_pmc); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + } else { + perfmon_cpu_pmu_use_pmc(cpu_pmu, cpu_pmc, pmc_id, raw_event_id); + } + + return perfmon_cpu_pmc_get_value(cpu_pmc); +} + +static uint64_t +perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + unsigned int pmc_id; + uint64_t value; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + pmc_id = perfmon_cpu_pmc_id(cpu_pmc); + + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + value = perfmon_cpu_pmc_get_value(cpu_pmc); + + perfmon_cpu_pmc_unref(cpu_pmc); + + if (!perfmon_cpu_pmc_used(cpu_pmc)) { + perfmon_cpu_pmu_stop(cpu_pmu, pmc_id); + } + + return value; +} + +static uint64_t +perfmon_cpu_pmu_sync(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) +{ + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); + return perfmon_cpu_pmc_get_value(cpu_pmc); +} + +static void +perfmon_td_pmc_init(struct perfmon_td_pmc *td_pmc) +{ + td_pmc->nr_refs = 0; + td_pmc->loaded = false; + td_pmc->value = 0; +} + +static bool +perfmon_td_pmc_used(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->nr_refs != 0; +} + +static void +perfmon_td_pmc_use(struct perfmon_td_pmc *td_pmc, unsigned int pmc_id, + unsigned int raw_event_id) +{ + assert(!perfmon_td_pmc_used(td_pmc)); + + td_pmc->nr_refs = 1; + td_pmc->loaded = false; + td_pmc->pmc_id = pmc_id; + td_pmc->raw_event_id = raw_event_id; + td_pmc->value = 0; +} + +static unsigned int +perfmon_td_pmc_id(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->pmc_id; +} + +static unsigned int +perfmon_td_pmc_raw_event_id(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->raw_event_id; +} + +static void +perfmon_td_pmc_ref(struct perfmon_td_pmc *td_pmc) +{ + assert(perfmon_td_pmc_used(td_pmc)); + td_pmc->nr_refs++; +} + +static void +perfmon_td_pmc_unref(struct perfmon_td_pmc *td_pmc) +{ + assert(perfmon_td_pmc_used(td_pmc)); + td_pmc->nr_refs--; +} + +static bool +perfmon_td_pmc_loaded(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->loaded; +} + +static void +perfmon_td_pmc_load(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + assert(!perfmon_td_pmc_loaded(td_pmc)); + + td_pmc->cpu_pmc_value = cpu_pmc_value; + td_pmc->loaded = true; +} + +static void +perfmon_td_pmc_update(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + uint64_t delta; + + assert(perfmon_td_pmc_loaded(td_pmc)); + + delta = cpu_pmc_value - td_pmc->cpu_pmc_value; + td_pmc->cpu_pmc_value = cpu_pmc_value; + td_pmc->value += delta; +} + +static void +perfmon_td_pmc_unload(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value) +{ + perfmon_td_pmc_update(td_pmc, cpu_pmc_value); + td_pmc->loaded = false; +} + +static uint64_t +perfmon_td_pmc_read(const struct perfmon_td_pmc *td_pmc) +{ + return td_pmc->value; +} + +static unsigned int +perfmon_td_get_pmc_index(const struct perfmon_td *td, + const struct perfmon_td_pmc *td_pmc) +{ + size_t pmc_index; + + pmc_index = td_pmc - td->pmcs; + assert(pmc_index < ARRAY_SIZE(td->pmcs)); + return pmc_index; +} + +static struct perfmon_td_pmc * +perfmon_td_get_pmc(struct perfmon_td *td, unsigned int index) +{ + assert(index < ARRAY_SIZE(td->pmcs)); + return &td->pmcs[index]; +} + +void +perfmon_td_init(struct perfmon_td *td) +{ + spinlock_init(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + perfmon_td_pmc_init(perfmon_td_get_pmc(td, i)); + } +} + +static void +perfmon_td_load_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + unsigned int pmc_index, pmc_id, raw_event_id; + struct perfmon_cpu_pmu *cpu_pmu; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + pmc_id = perfmon_td_pmc_id(td_pmc); + raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + pmc_id, raw_event_id); + perfmon_td_pmc_load(td_pmc, cpu_pmc_value); +} + +static void +perfmon_td_unload_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_td_pmc_unload(td_pmc, cpu_pmc_value); +} + +static void +perfmon_td_update_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc) +{ + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index); + perfmon_td_pmc_update(td_pmc, cpu_pmc_value); +} + +void +perfmon_td_load(struct perfmon_td *td) +{ + unsigned int pmc_index, pmc_id, raw_event_id; + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_td_pmc *td_pmc; + uint64_t cpu_pmc_value; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + + spinlock_lock(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + td_pmc = perfmon_td_get_pmc(td, i); + + if (!perfmon_td_pmc_used(td_pmc) || perfmon_td_pmc_loaded(td_pmc)) { + continue; + } + + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + pmc_id = perfmon_td_pmc_id(td_pmc); + raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + pmc_id, raw_event_id); + perfmon_td_pmc_load(td_pmc, cpu_pmc_value); + } + + spinlock_unlock(&td->lock); +} + +void +perfmon_td_unload(struct perfmon_td *td) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_td_pmc *td_pmc; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + + spinlock_lock(&td->lock); + + for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) { + td_pmc = perfmon_td_get_pmc(td, i); + + if (!perfmon_td_pmc_loaded(td_pmc)) { + continue; + } + + pmc_index = perfmon_td_get_pmc_index(td, td_pmc); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_td_pmc_unload(td_pmc, cpu_pmc_value); + } + + spinlock_unlock(&td->lock); +} + +static void +perfmon_event_load(struct perfmon_event *event, uint64_t pmc_value) +{ + event->pmc_value = pmc_value; +} + +static void +perfmon_event_update(struct perfmon_event *event, uint64_t pmc_value) +{ + uint64_t delta; + + delta = pmc_value - event->pmc_value; + event->value += delta; + event->pmc_value = pmc_value; +} + +static void +perfmon_event_load_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + const struct perfmon_pmc *pmc; + struct perfmon_pmu *pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmu = perfmon_get_pmu(); + pmc_index = perfmon_event_pmc_index(event); + pmc = perfmon_pmu_get_pmc(pmu, pmc_index); + cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index, + perfmon_pmc_id(pmc), + perfmon_pmc_raw_event_id(pmc)); + perfmon_event_load(event, cpu_pmc_value); +} + +static void +perfmon_event_load_cpu(struct perfmon_event *event, unsigned int cpu) +{ + perfmon_event_set_type_cpu(event); + event->cpu = cpu; + xcall_call(perfmon_event_load_cpu_remote, event, cpu); +} + +static void +perfmon_event_load_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if (thread_self() == event->thread) { + + if (perfmon_td_pmc_loaded(td_pmc)) { + perfmon_td_update_pmc(td, td_pmc); + } else { + perfmon_td_load_pmc(td, td_pmc); + } + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_load(event, td_pmc_value); +} + +static void +perfmon_event_load_thread(struct perfmon_event *event, struct thread *thread) +{ + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + struct perfmon_pmu *pmu; + const struct perfmon_pmc *pmc; + unsigned int pmc_index; + unsigned long flags; + + pmu = perfmon_get_pmu(); + + thread_ref(thread); + event->thread = thread; + + pmc_index = perfmon_event_pmc_index(event); + pmc = perfmon_pmu_get_pmc(pmu, pmc_index); + td = thread_get_perfmon_td(thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock_intr_save(&td->lock, &flags); + + if (perfmon_td_pmc_used(td_pmc)) { + perfmon_td_pmc_ref(td_pmc); + } else { + perfmon_td_pmc_use(td_pmc, perfmon_pmc_id(pmc), + perfmon_pmc_raw_event_id(pmc)); + } + + spinlock_unlock_intr_restore(&td->lock, flags); + + xcall_call(perfmon_event_load_thread_remote, event, thread_cpu(thread)); +} + +static void +perfmon_event_unload_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_event_pmc_index(event); + cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index); + perfmon_event_update(event, cpu_pmc_value); +} + +static void +perfmon_event_unload_cpu(struct perfmon_event *event) +{ + xcall_call(perfmon_event_unload_cpu_remote, event, event->cpu); + perfmon_event_clear_type_cpu(event); +} + +static void +perfmon_event_unload_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if ((thread_self() == event->thread) && perfmon_td_pmc_loaded(td_pmc)) { + if (perfmon_td_pmc_used(td_pmc)) { + perfmon_td_update_pmc(td, td_pmc); + } else { + perfmon_td_unload_pmc(td, td_pmc); + } + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_update(event, td_pmc_value); +} + +static void +perfmon_event_unload_thread(struct perfmon_event *event) +{ + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + unsigned long flags; + + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock_intr_save(&td->lock, &flags); + perfmon_td_pmc_unref(td_pmc); + spinlock_unlock_intr_restore(&td->lock, flags); + + xcall_call(perfmon_event_unload_thread_remote, event, + thread_cpu(event->thread)); + + thread_unref(event->thread); + event->thread = NULL; +} + +static void +perfmon_event_sync_cpu_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_cpu_pmu *cpu_pmu; + unsigned int pmc_index; + uint64_t cpu_pmc_value; + + event = arg; + cpu_pmu = perfmon_get_local_cpu_pmu(); + pmc_index = perfmon_event_pmc_index(event); + cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index); + perfmon_event_update(event, cpu_pmc_value); +} + +static void +perfmon_event_sync_cpu(struct perfmon_event *event) +{ + xcall_call(perfmon_event_sync_cpu_remote, event, event->cpu); +} + +static void +perfmon_event_sync_thread_remote(void *arg) +{ + struct perfmon_event *event; + struct perfmon_td_pmc *td_pmc; + struct perfmon_td *td; + unsigned int pmc_index; + uint64_t td_pmc_value; + + event = arg; + pmc_index = perfmon_event_pmc_index(event); + td = thread_get_perfmon_td(event->thread); + td_pmc = perfmon_td_get_pmc(td, pmc_index); + + spinlock_lock(&td->lock); + + if (thread_self() == event->thread) { + perfmon_td_update_pmc(td, td_pmc); + } + + td_pmc_value = perfmon_td_pmc_read(td_pmc); + + spinlock_unlock(&td->lock); + + perfmon_event_update(event, td_pmc_value); +} + +static void +perfmon_event_sync_thread(struct perfmon_event *event) +{ + xcall_call(perfmon_event_sync_thread_remote, event, + thread_cpu(event->thread)); +} + +static int +perfmon_event_attach_pmu(struct perfmon_event *event) +{ + unsigned int raw_event_id = 0; + struct perfmon_pmu *pmu; + struct perfmon_pmc *pmc; + int error; + + pmu = perfmon_get_pmu(); + + if (!(event->flags & PERFMON_EF_RAW)) { + error = perfmon_pmu_translate(pmu, &raw_event_id, event->id); + + if (error) { + return error; + } + } + + error = perfmon_pmu_take_pmc(pmu, &pmc, raw_event_id); + + if (error) { + return error; + } + + event->pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc); + event->flags |= PERFMON_EF_ATTACHED; + event->value = 0; + return 0; +} + +static void +perfmon_event_detach_pmu(struct perfmon_event *event) +{ + struct perfmon_pmu *pmu; + struct perfmon_pmc *pmc; + + pmu = perfmon_get_pmu(); + pmc = perfmon_pmu_get_pmc(pmu, perfmon_event_pmc_index(event)); + perfmon_pmu_put_pmc(pmu, pmc); + event->flags &= ~PERFMON_EF_ATTACHED; +} + +int +perfmon_event_attach(struct perfmon_event *event, struct thread *thread) +{ + int error; + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + error = EINVAL; + goto error; + } + + error = perfmon_event_attach_pmu(event); + + if (error) { + goto error; + } + + perfmon_event_load_thread(event, thread); + + spinlock_unlock(&event->lock); + + return 0; + +error: + spinlock_unlock(&event->lock); + + return error; +} + +int +perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu) +{ + int error; + + if (cpu >= cpu_count()) { + return EINVAL; + } + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + error = EINVAL; + goto out; + } + + error = perfmon_event_attach_pmu(event); + + if (error) { + goto out; + } + + perfmon_event_load_cpu(event, cpu); + error = 0; + +out: + spinlock_unlock(&event->lock); + + return error; +} + +int +perfmon_event_detach(struct perfmon_event *event) +{ + int error; + + spinlock_lock(&event->lock); + + if (!perfmon_event_attached(event)) { + error = EINVAL; + goto out; + } + + if (perfmon_event_type_cpu(event)) { + perfmon_event_unload_cpu(event); + } else { + perfmon_event_unload_thread(event); + } + + perfmon_event_detach_pmu(event); + error = 0; + +out: + spinlock_unlock(&event->lock); + + return error; +} + +uint64_t +perfmon_event_read(struct perfmon_event *event) +{ + uint64_t value; + + spinlock_lock(&event->lock); + + if (perfmon_event_attached(event)) { + if (perfmon_event_type_cpu(event)) { + perfmon_event_sync_cpu(event); + } else { + perfmon_event_sync_thread(event); + } + } + + value = event->value; + + spinlock_unlock(&event->lock); + + return value; +} + +static uint64_t __init +perfmon_compute_poll_interval(uint64_t pmc_width) +{ + uint64_t cycles, time; + + if (pmc_width == 64) { + cycles = (uint64_t)-1; + } else { + cycles = (uint64_t)1 << pmc_width; + } + + /* + * Assume an unrealistically high upper bound on the number of + * events per cycle to otbain a comfortable margin of safety. + */ + cycles /= 100; + time = cycles / (cpu_get_freq() / 1000); + + if (time < PERFMON_MIN_POLL_INTERVAL) { + log_warning("perfmon: invalid poll interval %llu, forced to %llu", + (unsigned long long)time, + (unsigned long long)PERFMON_MIN_POLL_INTERVAL); + time = PERFMON_MIN_POLL_INTERVAL; + } + + return clock_ticks_from_ms(time); +} + +void __init +perfmon_register(struct perfmon_dev *dev) +{ + const struct perfmon_dev_ops *ops; + + ops = dev->ops; + assert(ops->translate && ops->alloc && ops->free + && ops->start && ops->stop && ops->read); + assert(dev->pmc_width <= 64); + + if ((dev->ops->handle_overflow_intr == NULL) && (dev->poll_interval == 0)) { + dev->poll_interval = perfmon_compute_poll_interval(dev->pmc_width); + } + + perfmon_pmu_set_dev(perfmon_get_pmu(), dev); +} + +void +perfmon_overflow_intr(void) +{ + perfmon_pmu_handle_overflow_intr(perfmon_get_pmu()); +} + +void +perfmon_report_overflow(unsigned int pmc_index) +{ + struct perfmon_cpu_pmu *cpu_pmu; + struct perfmon_cpu_pmc *cpu_pmc; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_pmu = perfmon_get_local_cpu_pmu(); + cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index); + perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc); +} + +static int __init +perfmon_bootstrap(void) +{ + perfmon_pmu_init(perfmon_get_pmu()); + return 0; +} + +INIT_OP_DEFINE(perfmon_bootstrap, + INIT_OP_DEP(log_setup, true), + INIT_OP_DEP(spinlock_setup, true)); + +static int __init +perfmon_setup(void) +{ + struct perfmon_dev *dev; + + dev = perfmon_pmu_get_dev(perfmon_get_pmu()); + + if (!dev) { + return ENODEV; + } + + for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) { + perfmon_cpu_pmu_init(perfmon_get_cpu_pmu(cpu), cpu, dev); + } + + return 0; +} + +INIT_OP_DEFINE(perfmon_setup, + INIT_OP_DEP(boot_setup_pmu, true), + INIT_OP_DEP(cpu_mp_probe, true), + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(percpu_setup, true), + INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(spinlock_setup, true), + INIT_OP_DEP(syscnt_setup, true)); diff --git a/kern/perfmon.h b/kern/perfmon.h new file mode 100644 index 00000000..0c17752c --- /dev/null +++ b/kern/perfmon.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Performance monitoring based on hardware performance counters. + * + * The hardware layer is represented by a performance monitoring unit (PMU), + * which provides performance monitoring counters (PMCs). + */ + +#ifndef KERN_PERFMON_H +#define KERN_PERFMON_H + +#include <stdint.h> + +#include <kern/init.h> +#include <kern/perfmon_types.h> +#include <kern/thread.h> + +/* + * IDs of generic performance monitoring events. + */ +#define PERFMON_EV_CYCLE 0 +#define PERFMON_EV_REF_CYCLE 1 +#define PERFMON_EV_INSTRUCTION 2 +#define PERFMON_EV_CACHE_REF 3 +#define PERFMON_EV_CACHE_MISS 4 +#define PERFMON_EV_BRANCH 5 +#define PERFMON_EV_BRANCH_MISS 6 +#define PERFMON_NR_GENERIC_EVENTS 7 + +/* + * Event flags. + */ +#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */ +#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */ +#define PERFMON_EF_RAW 0x4 /* Raw event ID, generic if unset */ + +/* + * Performance monitoring operations. + * + * This is a public structure. + * + * All operations are either global but serialized by the caller, or + * processor-local and called with interrupts and preemption disabled. + * + * If the hardware doesn't efficiently support overflow interrupts, the + * handler must be set to NULL, making the perfmon module perdiocally + * check the raw value of the hardware counters. + */ +struct perfmon_dev_ops { + /* + * Convert a generic event ID into a raw event ID. + * + * Global operation. + */ + int (*translate)(unsigned int *raw_event_idp, unsigned int event_id); + + /* + * Allocate a performance monitoring counter globally for the given + * raw event ID, and return the counter ID through the given pointer. + * The range of IDs must start from 0 and increase contiguously. + * + * The PMC index is to be used by the driver when reporting overflows, + * if a custom overflow interrupt handler. + * + * Global operation. + */ + int (*alloc)(unsigned int *pmc_idp, unsigned int pmc_index, + unsigned int raw_event_id); + + /* + * Free an allocated performance monitoring counter. + * + * Global operation. + */ + void (*free)(unsigned int pmc_id); + + /* + * Start a performance monitoring counter for the given raw event ID. + * + * Processor-local operation. + */ + void (*start)(unsigned int pmc_id, unsigned int raw_event_id); + + /* + * Stop a performance monitoring counter. + * + * Processor-local operation. + */ + void (*stop)(unsigned int pmc_id); + + /* + * Read the value of a performance monitoring counter. + * + * Processor-local operation. + */ + uint64_t (*read)(unsigned int pmc_id); + + /* + * Custom overflow interrupt handler. + * + * Processor-local operation. + */ + void (*handle_overflow_intr)(void); +}; + +/* + * Performance monitoring device. + * + * This is a public structure. + * + * The PMC width is expressed in bits. + * + * If the driver doesn't provide an overflow interrupt handler, it may set + * the poll interval, in ticks, to a duration that safely allows the detection + * of a single overflow. A value of 0 lets the perfmon module compute a poll + * interval itself. + */ +struct perfmon_dev { + const struct perfmon_dev_ops *ops; + unsigned int pmc_width; + uint64_t poll_interval; +}; + +/* + * Performance monitoring thread data. + */ +struct perfmon_td; + +/* + * Performance monitoring event. + * + * An event describes a single, well-defined hardware condition and tracks + * its occurrences over a period of time. + */ +struct perfmon_event; + +/* + * Initialize thread-specific data. + */ +void perfmon_td_init(struct perfmon_td *td); + +/* + * Load/unload events attached to a thread on the current processor. + * + * These functions should only be used by the scheduler on a context switch. + * Interrupts and preemption must be disabled when calling these functions. + */ +void perfmon_td_load(struct perfmon_td *td); +void perfmon_td_unload(struct perfmon_td *td); + +/* + * Initialize an event. + */ +int perfmon_event_init(struct perfmon_event *event, unsigned int id, + unsigned int flags); + +/* + * Attach/detach an event to/from a thread or a processor. + * + * Attaching an event allocates hardware resources and enables monitoring. + * The number of occurrences for the given event is reset. + * + * An event can only be attached to one thread or processor at a time. + */ +int perfmon_event_attach(struct perfmon_event *event, struct thread *thread); +int perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu); +int perfmon_event_detach(struct perfmon_event *event); + +/* + * Obtain the number of occurrences of an event. + */ +uint64_t perfmon_event_read(struct perfmon_event *event); + +/* + * Register a PMU device. + * + * Currently, there can only be a single system-wide PMU device, which + * assumes the driver is the same for all processors. + */ +void perfmon_register(struct perfmon_dev *dev); + +/* + * Handle an overflow interrupt. + * + * This function must be called in interrupt context. + */ +void perfmon_overflow_intr(void); + +/* + * Report a PMC overflow. + * + * This function is intended to be used by PMU drivers using a custom + * overflow interrupt handler. + * + * This function must be called in interrupt context. + */ +void perfmon_report_overflow(unsigned int pmc_index); + +/* + * This init operation provides : + * - PMU device registration + */ +INIT_OP_DECLARE(perfmon_bootstrap); + +#endif /* KERN_PERFMON_H */ diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h new file mode 100644 index 00000000..c316312a --- /dev/null +++ b/kern/perfmon_types.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definitions used to avoid inclusion circular dependencies. + */ + +#ifndef KERN_PERFMON_TYPES_H +#define KERN_PERFMON_TYPES_H + +#ifdef CONFIG_PERFMON + +#include <stdbool.h> +#include <stdint.h> + +#include <kern/spinlock_types.h> + +/* + * Maximum number of supported hardware counters. + */ +#define PERFMON_MAX_PMCS CONFIG_PERFMON_MAX_PMCS + +/* + * Performance monitoring event. + * + * An event may be unattached, attached to a thread, or attached to a CPU. + * When it is loaded, the current value of the underlying PMC is saved. + * When it is updated, the delta between the current and saved PMC values + * is added to the event value. + */ +struct perfmon_event { + struct spinlock lock; + unsigned int flags; + unsigned int id; + uint64_t pmc_value; + uint64_t value; + + union { + struct thread *thread; + unsigned int cpu; + }; + + unsigned int pmc_index; +}; + +/* + * Per-thread performance monitoring counter. + * + * Per-thread PMCs are indexed the same way as global PMCs. + * + * A per-thread PMC is referenced when an event is attached to a thread. + * The PMC may only be loaded if the thread is running on a processor, + * as a result of an event being attached to the thread, or the thread + * being dispatched by the scheduler. Note that this allows a transient + * state to be seen where a per-thread PMC is both unused and loaded. + * This happens after detaching an event from a thread, resulting in + * the underlying per-thread PMC to become unused, but if the thread + * is running concurrently, the counter is still loaded. The implementation + * resolves the situation by unloading the counter, which is either + * done by an explicit unload cross-call, or when the scheduler preempts + * the thread and unloads its thread data. + * + * When a per-thread PMC is loaded, the current value of the underlying + * PMC is saved, and when it's updated, the delta between the current + * and saved PMC values is added to the per-thread PMC value. + */ +struct perfmon_td_pmc { + unsigned int nr_refs; + bool loaded; + unsigned int pmc_id; + unsigned int raw_event_id; + uint64_t cpu_pmc_value; + uint64_t value; +}; + +/* + * Per-thread performance monitoring data. + * + * Interrupts must be disabled when locking thread data. + */ +struct perfmon_td { + struct spinlock lock; + struct perfmon_td_pmc pmcs[PERFMON_MAX_PMCS]; +}; + +#endif /* CONFIG_PERFMON */ + +#endif /* KERN_PERFMON_TYPES_H */ diff --git a/kern/task.c b/kern/task.c index 5df72251..3ad863bd 100644 --- a/kern/task.c +++ b/kern/task.c @@ -257,7 +257,7 @@ task_info(struct task *task) printf(TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT " %.2s:%02hu %02u %s\n", (unsigned long)thread, - thread_state_to_chr(thread), + thread_state_to_chr(thread_state(thread)), thread_wchan_desc(thread), (unsigned long)thread_wchan_addr(thread), thread_sched_class_to_str(thread_user_sched_class(thread)), diff --git a/kern/thread.c b/kern/thread.c index e79ef311..a8f58b39 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -100,6 +100,7 @@ #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> +#include <kern/perfmon.h> #include <kern/rcu.h> #include <kern/shell.h> #include <kern/sleepq.h> @@ -600,14 +601,28 @@ thread_runq_wakeup_balancer(struct thread_runq *runq) } thread_clear_wchan(runq->balancer); - runq->balancer->state = THREAD_RUNNING; + atomic_store(&runq->balancer->state, THREAD_RUNNING, ATOMIC_RELAXED); thread_runq_wakeup(runq, runq->balancer); } static void -thread_runq_schedule_prepare(struct thread *thread) +thread_runq_schedule_load(struct thread *thread) { pmap_load(thread->task->map->pmap); + +#ifdef CONFIG_PERFMON + perfmon_td_load(thread_get_perfmon_td(thread)); +#endif +} + +static void +thread_runq_schedule_unload(struct thread *thread) +{ +#ifdef CONFIG_PERFMON + perfmon_td_unload(thread_get_perfmon_td(thread)); +#else + (void)thread; +#endif } static struct thread_runq * @@ -639,6 +654,8 @@ thread_runq_schedule(struct thread_runq *runq) assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL); if (likely(prev != next)) { + thread_runq_schedule_unload(prev); + rcu_report_context_switch(thread_rcu_reader(prev)); spinlock_transfer_owner(&runq->lock, next); @@ -660,10 +677,10 @@ thread_runq_schedule(struct thread_runq *runq) * - The current thread may have been migrated to another processor. */ barrier(); + thread_runq_schedule_load(prev); + next = NULL; runq = thread_runq_local(); - - thread_runq_schedule_prepare(prev); } else { next = NULL; } @@ -1750,7 +1767,7 @@ thread_main(void (*fn)(void *), void *arg) assert(!thread_preempt_enabled()); thread = thread_self(); - thread_runq_schedule_prepare(thread); + thread_runq_schedule_load(thread); spinlock_unlock(&thread_runq_local()->lock); cpu_intr_enable(); @@ -1843,6 +1860,10 @@ thread_init(struct thread *thread, void *stack, thread->stack = stack; strlcpy(thread->name, attr->name, sizeof(thread->name)); +#ifdef CONFIG_PERFMON + perfmon_td_init(thread_get_perfmon_td(thread)); +#endif + if (attr->flags & THREAD_ATTR_DETACHED) { thread->flags |= THREAD_DETACHED; } @@ -1989,8 +2010,9 @@ static void thread_join_common(struct thread *thread) { struct thread_runq *runq; - unsigned long flags, state; struct thread *self; + unsigned long flags; + unsigned int state; self = thread_self(); assert(thread != self); @@ -2060,7 +2082,7 @@ thread_balance(void *arg) for (;;) { runq->idle_balance_ticks = THREAD_IDLE_BALANCE_TICKS; thread_set_wchan(self, runq, "runq"); - self->state = THREAD_SLEEPING; + atomic_store(&self->state, THREAD_SLEEPING, ATOMIC_RELAXED); runq = thread_runq_schedule(runq); assert(runq == arg); @@ -2309,6 +2331,13 @@ thread_setup(void) #define THREAD_STACK_GUARD_INIT_OP_DEPS #endif /* CONFIG_THREAD_STACK_GUARD */ +#ifdef CONFIG_PERFMON +#define THREAD_PERFMON_INIT_OP_DEPS \ + INIT_OP_DEP(perfmon_bootstrap, true), +#else /* CONFIG_PERFMON */ +#define THREAD_PERFMON_INIT_OP_DEPS +#endif /* CONFIG_PERFMON */ + INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(cpumap_setup, true), INIT_OP_DEP(kmem_setup, true), @@ -2318,6 +2347,7 @@ INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(turnstile_setup, true), THREAD_STACK_GUARD_INIT_OP_DEPS + THREAD_PERFMON_INIT_OP_DEPS ); void __init @@ -2421,7 +2451,7 @@ thread_exit(void) runq = thread_runq_local(); spinlock_lock_intr_save(&runq->lock, &flags); - thread->state = THREAD_DEAD; + atomic_store(&thread->state, THREAD_DEAD, ATOMIC_RELAXED); thread_runq_schedule(runq); panic("thread: dead thread walking"); @@ -2461,7 +2491,7 @@ thread_wakeup_common(struct thread *thread, int error) } thread_clear_wchan(thread); - thread->state = THREAD_RUNNING; + atomic_store(&thread->state, THREAD_RUNNING, ATOMIC_RELAXED); thread_unlock_runq(runq, flags); } @@ -2532,7 +2562,7 @@ thread_sleep_common(struct spinlock *interlock, const void *wchan_addr, } thread_set_wchan(thread, wchan_addr, wchan_desc); - thread->state = THREAD_SLEEPING; + atomic_store(&thread->state, THREAD_SLEEPING, ATOMIC_RELAXED); runq = thread_runq_schedule(runq); assert(thread->state == THREAD_RUNNING); @@ -2699,9 +2729,9 @@ thread_report_periodic_event(void) } char -thread_state_to_chr(const struct thread *thread) +thread_state_to_chr(unsigned int state) { - switch (thread->state) { + switch (state) { case THREAD_RUNNING: return 'R'; case THREAD_SLEEPING: @@ -2906,6 +2936,21 @@ thread_key_create(unsigned int *keyp, thread_dtor_fn_t dtor) *keyp = key; } +unsigned int +thread_cpu(const struct thread *thread) +{ + const struct thread_runq *runq; + + runq = atomic_load(&thread->runq, ATOMIC_RELAXED); + return runq->cpu; +} + +unsigned int +thread_state(const struct thread *thread) +{ + return atomic_load(&thread->state, ATOMIC_RELAXED); +} + bool thread_is_running(const struct thread *thread) { diff --git a/kern/thread.h b/kern/thread.h index 6e696fc7..5b5729ce 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -75,6 +75,13 @@ struct thread_sched_data { #define THREAD_KERNEL_PREFIX KERNEL_NAME "_" /* + * Thread states. + */ +#define THREAD_RUNNING 0 +#define THREAD_SLEEPING 1 +#define THREAD_DEAD 2 + +/* * Scheduling policies. * * The idle policy is reserved for the per-CPU idle threads. @@ -323,7 +330,7 @@ thread_wchan_desc(const struct thread *thread) /* * Return a character representation of the state of a thread. */ -char thread_state_to_chr(const struct thread *thread); +char thread_state_to_chr(unsigned int state); static inline const struct thread_sched_data * thread_get_user_sched_data(const struct thread *thread) @@ -705,6 +712,28 @@ thread_get_specific(unsigned int key) return thread_tsd_get(thread_self(), key); } +#ifdef CONFIG_PERFMON +static inline struct perfmon_td * +thread_get_perfmon_td(struct thread *thread) +{ + return &thread->perfmon_td; +} +#endif /* CONFIG_PERFMON */ + +/* + * Return the last CPU on which the thread has been scheduled. + * + * This call isn't synchronized, and the caller may obtain an outdated value. + */ +unsigned int thread_cpu(const struct thread *thread); + +/* + * Return the current state of the given thread. + * + * This call isn't synchronized, and the caller may obtain an outdated value. + */ +unsigned int thread_state(const struct thread *thread); + /* * Return true if the given thread is running. * diff --git a/kern/thread_i.h b/kern/thread_i.h index 0be1e773..9c9a705b 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -24,6 +24,7 @@ #include <kern/atomic.h> #include <kern/cpumap.h> #include <kern/list_types.h> +#include <kern/perfmon_types.h> #include <kern/rcu_types.h> #include <kern/spinlock_types.h> #include <kern/turnstile_types.h> @@ -45,16 +46,6 @@ struct thread_fs_runq; #define THREAD_DETACHED 0x2UL /* Resources automatically released on exit */ /* - * Thread states. - * - * Threads in the running state may not be on a run queue if they're being - * awaken. - */ -#define THREAD_RUNNING 0 -#define THREAD_SLEEPING 1 -#define THREAD_DEAD 2 - -/* * Scheduling data for a real-time thread. */ struct thread_rt_data { @@ -113,7 +104,7 @@ struct thread { const void *wchan_addr; /* (r) */ const char *wchan_desc; /* (r) */ int wakeup_error; /* (r) */ - unsigned short state; /* (r) */ + unsigned int state; /* (a,r) */ /* Sleep queue available for lending */ struct sleepq *priv_sleepq; /* (-) */ @@ -185,6 +176,10 @@ struct thread { struct list task_node; /* (T) */ void *stack; /* (-) */ char name[THREAD_NAME_SIZE]; /* ( ) */ + +#ifdef CONFIG_PERFMON + struct perfmon_td perfmon_td; /* ( ) */ +#endif }; #define THREAD_ATTR_DETACHED 0x1 diff --git a/test/Kconfig b/test/Kconfig index 3f1c3b69..9f0faf44 100644 --- a/test/Kconfig +++ b/test/Kconfig @@ -34,6 +34,18 @@ config TEST_MODULE_MUTEX config TEST_MODULE_MUTEX_PI bool "mutex_pi" +config TEST_MODULE_PERFMON_CPU + bool "perfmon_cpu" + depends on PERFMON + +config TEST_MODULE_PERFMON_THREAD + bool "perfmon_thread" + depends on PERFMON + +config TEST_MODULE_PERFMON_TORTURE + bool "perfmon_torture" + depends on PERFMON + config TEST_MODULE_PMAP_UPDATE_MP bool "pmap_update_mp" diff --git a/test/Makefile b/test/Makefile index cdce6130..76edbf0e 100644 --- a/test/Makefile +++ b/test/Makefile @@ -2,6 +2,9 @@ x15_SOURCES-$(CONFIG_TEST_MODULE_ATOMIC) += test/test_atomic.c x15_SOURCES-$(CONFIG_TEST_MODULE_BULLETIN) += test/test_bulletin.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX) += test/test_mutex.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI) += test/test_mutex_pi.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP) += test/test_pmap_update_mp.c x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER) += test/test_rcu_defer.c x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES) += test/test_sref_dirty_zeroes.c diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c new file mode 100644 index 00000000..75f69d3f --- /dev/null +++ b/test/test_perfmon_cpu.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This test checks the behavior of performance monitoring on a CPU. + * It creates a group with two events, cycle and instruction, and attaches + * that group to CPU1, where a thread is bound and runs a tight loop to + * make sure the target CPU is never idle. After some time, the measurement + * stops and values are reported. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <kern/atomic.h> +#include <kern/clock.h> +#include <kern/cpumap.h> +#include <kern/error.h> +#include <kern/list.h> +#include <kern/log.h> +#include <kern/panic.h> +#include <kern/perfmon.h> +#include <kern/thread.h> +#include <machine/cpu.h> +#include <test/test.h> + +#define TEST_WAIT_DELAY_MS 1000 + +/* + * Using another CPU than the BSP as the monitored CPU checks that PMUs are + * correctly initialized on APs. + */ +#define TEST_CONTROL_CPU 0 +#define TEST_MONITORED_CPU (TEST_CONTROL_CPU + 1) +#define TEST_MIN_CPUS (TEST_MONITORED_CPU + 1) + +#define TEST_EVENT_NAME_MAX_SIZE 32 + +struct test_event { + struct list node; + struct perfmon_event pm_event; + char name[TEST_EVENT_NAME_MAX_SIZE]; +}; + +struct test_group { + struct list events; +}; + +static unsigned int test_run_stop; + +static void +test_wait(void) +{ + thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false); +} + +static void +test_event_init(struct test_event *event, unsigned int id, const char *name) +{ + int error; + + error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN); + error_check(error, "perfmon_event_init"); + strlcpy(event->name, name, sizeof(event->name)); +} + +static void +test_event_report(struct test_event *event) +{ + uint64_t count; + int error; + + count = perfmon_event_read(&event->pm_event); + error = (count == 0) ? EINVAL : 0; + error_check(error, __func__); + log_info("test: %s: %llu", event->name, (unsigned long long)count); +} + +static void +test_event_attach_cpu(struct test_event *event, unsigned int cpu) +{ + int error; + + error = perfmon_event_attach_cpu(&event->pm_event, cpu); + error_check(error, "perfmon_event_attach_cpu"); +} + +static void +test_event_detach(struct test_event *event) +{ + int error; + + error = perfmon_event_detach(&event->pm_event); + error_check(error, "perfmon_event_detach"); +} + +static void +test_group_init(struct test_group *group) +{ + list_init(&group->events); +} + +static void +test_group_add(struct test_group *group, struct test_event *event) +{ + list_insert_tail(&group->events, &event->node); +} + +static void +test_group_attach_cpu(struct test_group *group, unsigned int cpu) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_attach_cpu(event, cpu); + } +} + +static void +test_group_detach(struct test_group *group) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_detach(event); + } +} + +static void +test_group_report(struct test_group *group) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_report(event); + } +} + +static void +test_run(void *arg) +{ + unsigned int stop; + + (void)arg; + + do { + stop = atomic_load(&test_run_stop, ATOMIC_RELAXED); + } while (!stop); +} + +static void +test_control(void *arg) +{ + struct test_event cycle, instruction; + struct test_group group; + struct thread *thread; + + thread = arg; + + test_event_init(&cycle, PERFMON_EV_CYCLE, "cycle"); + test_event_init(&instruction, PERFMON_EV_INSTRUCTION, "instruction"); + test_group_init(&group); + test_group_add(&group, &cycle); + test_group_add(&group, &instruction); + test_group_attach_cpu(&group, TEST_MONITORED_CPU); + test_wait(); + test_group_report(&group); + test_wait(); + test_group_detach(&group); + test_group_report(&group); + + atomic_store(&test_run_stop, 1, ATOMIC_RELAXED); + thread_join(thread); + log_info("test: done"); +} + +void +test_setup(void) +{ + struct thread *thread; + struct thread_attr attr; + struct cpumap *cpumap; + int error; + + if (cpu_count() < TEST_MIN_CPUS) { + panic("test: %u processors required", TEST_MIN_CPUS); + } + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create"); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run"); + cpumap_zero(cpumap); + cpumap_set(cpumap, TEST_MONITORED_CPU); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_run, NULL); + error_check(error, "thread_create"); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control"); + thread_attr_set_detached(&attr); + cpumap_zero(cpumap); + cpumap_set(cpumap, TEST_CONTROL_CPU); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(NULL, &attr, test_control, thread); + error_check(error, "thread_create"); + + cpumap_destroy(cpumap); +} diff --git a/test/test_perfmon_thread.c b/test/test_perfmon_thread.c new file mode 100644 index 00000000..0213777b --- /dev/null +++ b/test/test_perfmon_thread.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This test checks the behavior of performance monitoring on a thread. + * It creates a group with a single event, cycle, and attaches that group to + * a runner thread. Two checks are then performed : + * - the first makes sure the number of cycles changes when the runner + * thread is running + * - the second makes sure the number of cycles doesn't change when the + * runner thread is sleeping + * + * Another group with a cycle event is created and attached to CPU0 to make + * sure that a shared event is correctly handled, and the runner thread is + * bound to CPU0 to force sharing. A third thread is created to fill CPU0 + * time with cycles so that the cycle counter of the CPU-attached group + * changes while the runner thread is sleeping. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <string.h> + +#include <kern/atomic.h> +#include <kern/clock.h> +#include <kern/condition.h> +#include <kern/cpumap.h> +#include <kern/error.h> +#include <kern/kmem.h> +#include <kern/list.h> +#include <kern/log.h> +#include <kern/mutex.h> +#include <kern/panic.h> +#include <kern/perfmon.h> +#include <kern/thread.h> +#include <test/test.h> + +#define TEST_WAIT_DELAY_MS 1000 + +#define TEST_EVENT_NAME_MAX_SIZE 32 + +struct test_event { + struct list node; + struct perfmon_event pm_event; + uint64_t last_value; + char name[TEST_EVENT_NAME_MAX_SIZE]; +}; + +struct test_group { + struct list events; +}; + +enum test_state { + TEST_STATE_RUNNING, + TEST_STATE_SUSPENDED, + TEST_STATE_TERMINATED, +}; + +static struct condition test_condition; +static struct mutex test_mutex; +static enum test_state test_state; + +static void +test_wait(void) +{ + log_info("test: controller waiting"); + thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false); + log_info("test: controller resuming"); +} + +static void +test_event_init(struct test_event *event, unsigned int id, const char *name) +{ + int error; + + error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN); + error_check(error, "perfmon_event_init"); + strlcpy(event->name, name, sizeof(event->name)); +} + +static void +test_event_attach(struct test_event *event, struct thread *thread) +{ + int error; + + error = perfmon_event_attach(&event->pm_event, thread); + error_check(error, "perfmon_event_attach"); +} + +static void +test_event_attach_cpu(struct test_event *event, unsigned int cpu) +{ + int error; + + error = perfmon_event_attach_cpu(&event->pm_event, cpu); + error_check(error, "perfmon_event_attach_cpu"); +} + +static void +test_event_detach(struct test_event *event) +{ + int error; + + error = perfmon_event_detach(&event->pm_event); + error_check(error, "perfmon_event_detach"); +} + +static uint64_t +test_event_read(struct test_event *event) +{ + uint64_t value; + + value = perfmon_event_read(&event->pm_event); + log_info("test: %s: %llu", event->name, (unsigned long long)value); + return value; +} + +static void +test_event_save(struct test_event *event) +{ + event->last_value = test_event_read(event); +} + +static void +test_event_check(struct test_event *event, bool change_expected) +{ + uint64_t value; + bool changed; + + value = test_event_read(event); + changed = (value != event->last_value); + + if (changed != change_expected) { + panic("test: invalid value"); + } + + event->last_value = value; +} + +static void +test_group_init(struct test_group *group) +{ + list_init(&group->events); +} + +static void +test_group_add(struct test_group *group, struct test_event *event) +{ + list_insert_tail(&group->events, &event->node); +} + +static void +test_group_attach(struct test_group *group, struct thread *thread) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_attach(event, thread); + } +} + +static void +test_group_attach_cpu(struct test_group *group, unsigned int cpu) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_attach_cpu(event, cpu); + } +} + +static void +test_group_detach(struct test_group *group) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_detach(event); + } +} + +static void +test_group_save(struct test_group *group) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_save(event); + } +} + +static void +test_group_check(struct test_group *group, bool change_expected) +{ + struct test_event *event; + + list_for_each_entry(&group->events, event, node) { + test_event_check(event, change_expected); + } +} + +static void +test_run(void *arg) +{ + bool report; + + (void)arg; + + report = true; + + mutex_lock(&test_mutex); + + while (test_state != TEST_STATE_TERMINATED) { + if (test_state == TEST_STATE_SUSPENDED) { + log_info("test: runner suspended"); + report = true; + condition_wait(&test_condition, &test_mutex); + } else { + mutex_unlock(&test_mutex); + + if (report) { + log_info("test: runner running"); + report = false; + } + + mutex_lock(&test_mutex); + } + } + + mutex_unlock(&test_mutex); +} + +static void +test_fill(void *arg) +{ + enum test_state state; + + (void)arg; + + do { + state = atomic_load(&test_state, ATOMIC_RELAXED); + } while (state != TEST_STATE_TERMINATED); +} + +static void +test_wait_state(const struct thread *thread, unsigned short state) +{ + for (;;) { + if (thread_state(thread) == state) { + break; + } + + thread_delay(1, false); + } +} + +static void +test_resume(struct thread *thread) +{ + test_wait_state(thread, THREAD_SLEEPING); + + mutex_lock(&test_mutex); + assert(test_state == TEST_STATE_SUSPENDED); + atomic_store(&test_state, TEST_STATE_RUNNING, ATOMIC_RELAXED); + condition_signal(&test_condition); + mutex_unlock(&test_mutex); + + test_wait_state(thread, THREAD_RUNNING); +} + +static void +test_suspend(struct thread *thread) +{ + test_wait_state(thread, THREAD_RUNNING); + + mutex_lock(&test_mutex); + assert(test_state == TEST_STATE_RUNNING); + atomic_store(&test_state, TEST_STATE_SUSPENDED, ATOMIC_RELAXED); + mutex_unlock(&test_mutex); + + test_wait_state(thread, THREAD_SLEEPING); +} + +static void +test_terminate(void) +{ + mutex_lock(&test_mutex); + test_state = TEST_STATE_TERMINATED; + condition_signal(&test_condition); + mutex_unlock(&test_mutex); +} + +static void +test_control(void *arg) +{ + struct test_event thread_cycle, cpu_cycle; + struct test_group thread_group, cpu_group; + struct thread *runner; + + runner = arg; + + test_event_init(&thread_cycle, PERFMON_EV_CYCLE, "thread_cycle"); + test_group_init(&thread_group); + test_group_add(&thread_group, &thread_cycle); + + test_event_init(&cpu_cycle, PERFMON_EV_CYCLE, "cpu_cycle"); + test_group_init(&cpu_group); + test_group_add(&cpu_group, &cpu_cycle); + + test_group_attach(&thread_group, runner); + test_group_attach_cpu(&cpu_group, 0); + + test_group_save(&thread_group); + test_group_save(&cpu_group); + test_resume(runner); + test_wait(); + test_suspend(runner); + test_group_check(&thread_group, true); + test_group_check(&cpu_group, true); + test_wait(); + test_group_check(&thread_group, false); + test_group_check(&cpu_group, true); + test_terminate(); + + test_group_detach(&cpu_group); + test_group_detach(&thread_group); + + thread_join(runner); + log_info("test: done"); +} + +void +test_setup(void) +{ + struct thread_attr attr; + struct thread *runner; + struct cpumap *cpumap; + int error; + + condition_init(&test_condition); + mutex_init(&test_mutex); + test_state = TEST_STATE_SUSPENDED; + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 0); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run"); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&runner, &attr, test_run, NULL); + error_check(error, "thread_create"); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_fill"); + thread_attr_set_detached(&attr); + thread_attr_set_cpumap(&attr, cpumap); + thread_attr_set_priority(&attr, THREAD_SCHED_FS_PRIO_MIN); + error = thread_create(NULL, &attr, test_fill, NULL); + error_check(error, "thread_create"); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control"); + thread_attr_set_detached(&attr); + error = thread_create(NULL, &attr, test_control, runner); + error_check(error, "thread_create"); + + cpumap_destroy(cpumap); +} diff --git a/test/test_perfmon_torture.c b/test/test_perfmon_torture.c new file mode 100644 index 00000000..171cb99c --- /dev/null +++ b/test/test_perfmon_torture.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * Copyright (c) 2014-2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This module is a stress test, expected to never terminate, of the + * performance monitoring module. It creates a control thread which + * maintains a couple of test threads running while toggling performance + * monitoring on them, attempting to produce many regular and corner + * cases. In particular, the thread pool is randomly resized by destroying + * and creating the underlying kernel threads. + * + * The control thread regularly prints some stats about the thread pool + * and the associated performance monitoring events to report that it's + * making progress. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> + +#include <kern/atomic.h> +#include <kern/clock.h> +#include <kern/error.h> +#include <kern/kmem.h> +#include <kern/log.h> +#include <kern/panic.h> +#include <kern/perfmon.h> +#include <kern/thread.h> +#include <test/test.h> + +struct test_thread { + unsigned int id; + struct thread *thread; + struct perfmon_event event; + unsigned int must_stop; + bool monitored; + unsigned long long count; +}; + +struct test_controller { + struct test_thread **threads; + unsigned int nr_threads; + unsigned int monitoring_lid; + unsigned int state_lid; + unsigned long nr_current_events; + unsigned long nr_total_events; + unsigned long nr_current_threads; + unsigned long nr_total_threads; +}; + +#define TEST_WAIT_DELAY_MS 100 +#define TEST_LOOPS_PER_PRINT 20 + +#define TEST_MONITORING_SEED 12345 +#define TEST_STATE_SEED 23456 + +static void +test_wait(void) +{ + thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false); +} + +static unsigned int +test_rand(unsigned int x) +{ + /* Basic 32-bit xorshift PRNG */ + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return x; +} + +static bool +test_thread_monitored(const struct test_thread *thread) +{ + return thread->monitored; +} + +static void +test_thread_start_monitoring(struct test_thread *thread) +{ + int error; + + error = perfmon_event_attach(&thread->event, thread->thread); + error_check(error, __func__); + thread->monitored = true; +} + +static void +test_thread_stop_monitoring(struct test_thread *thread) +{ + int error; + + thread->count += perfmon_event_read(&thread->event); + error = perfmon_event_detach(&thread->event); + error_check(error, __func__); + thread->monitored = false; +} + +static void +test_thread_report(const struct test_thread *thread) +{ + log_info("test: thread:%u count:%llu", thread->id, thread->count); +} + +static void +test_run(void *arg) +{ + struct test_thread *thread; + + thread = arg; + + for (;;) { + if (atomic_load(&thread->must_stop, ATOMIC_RELAXED)) { + break; + } + } +} + +static bool +test_thread_started(const struct test_thread *thread) +{ + return thread->thread; +} + +static void +test_thread_start(struct test_thread *thread) +{ + char name[THREAD_NAME_SIZE]; + struct thread_attr attr; + int error; + + assert(!thread->monitored); + + if (test_thread_started(thread)) { + return; + } + + thread->must_stop = 0; + + snprintf(name, sizeof(name), + THREAD_KERNEL_PREFIX "test_run:%u", thread->id); + thread_attr_init(&attr, name); + error = thread_create(&thread->thread, &attr, test_run, thread); + error_check(error, "thread_create"); +} + +static void +test_thread_request_stop(struct test_thread *thread) +{ + atomic_store(&thread->must_stop, 1, ATOMIC_RELAXED); +} + +static void +test_thread_join(struct test_thread *thread) +{ + assert(test_thread_started(thread)); + assert(!test_thread_monitored(thread)); + + thread_join(thread->thread); + thread->thread = NULL; +} + +static struct test_thread * +test_thread_create(unsigned int id) +{ + struct test_thread *thread; + + thread = kmem_alloc(sizeof(*thread)); + + if (thread == NULL) { + panic("thread allocation failed"); + } + + thread->id = id; + thread->thread = NULL; + thread->must_stop = 0; + thread->monitored = false; + thread->count = 0; + + perfmon_event_init(&thread->event, PERFMON_EV_CYCLE, PERFMON_EF_KERN); + test_thread_start(thread); + + return thread; +} + +static struct test_thread * +test_controller_get(struct test_controller *controller, unsigned int id) +{ + assert(id < controller->nr_threads); + return controller->threads[id]; +} + +static struct test_thread * +test_controller_get_by_lid(struct test_controller *controller, unsigned int lid) +{ + return test_controller_get(controller, lid % controller->nr_threads); +} + +static void +test_toggle_monitoring(struct test_controller *controller, + struct test_thread *thread) +{ + if (!test_thread_started(thread)) { + return; + } + + if (thread->monitored) { + test_thread_stop_monitoring(thread); + controller->nr_current_events--; + } else { + test_thread_start_monitoring(thread); + controller->nr_total_events++; + controller->nr_current_events++; + } +} + +static void +test_toggle_state(struct test_controller *controller, + struct test_thread *thread) +{ + if (test_thread_started(thread)) { + /* + * Make the thread stop asynchronously with monitoring to test + * thread referencing. + */ + test_thread_request_stop(thread); + + if (test_thread_monitored(thread)) { + test_thread_stop_monitoring(thread); + controller->nr_current_events--; + } + + test_thread_join(thread); + controller->nr_current_threads--; + } else { + test_thread_start(thread); + controller->nr_total_threads++; + controller->nr_current_threads++; + } +} + +static void +test_controller_report(struct test_controller *controller) +{ + log_info("test: events:%lu total:%lu threads:%lu total:%lu", + controller->nr_current_events, controller->nr_total_events, + controller->nr_current_threads, controller->nr_total_threads); + + for (unsigned int i = 0; i < controller->nr_threads; i++) { + test_thread_report(test_controller_get(controller, i)); + } +} + +static void +test_control(void *arg) +{ + struct test_controller *controller; + struct test_thread *thread; + + controller = arg; + + log_info("test: %u threads", controller->nr_threads); + + for (unsigned long nr_loops = 1; /* no condition */; nr_loops++) { + controller->monitoring_lid = test_rand(controller->monitoring_lid); + thread = test_controller_get_by_lid(controller, + controller->monitoring_lid); + test_toggle_monitoring(controller, thread); + + controller->state_lid = test_rand(controller->state_lid); + thread = test_controller_get_by_lid(controller, + controller->state_lid); + test_toggle_state(controller, thread); + + test_wait(); + + if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) { + test_controller_report(controller); + } + } +} + +static void +test_controller_create(void) +{ + struct test_controller *controller; + struct thread_attr attr; + int error; + + controller = kmem_alloc(sizeof(*controller)); + + if (!controller) { + panic("test: unable to create controller"); + } + + /* + * At least two threads are required by the monitoring/state toggling + * operations, otherwise they always apply to the same thread, severely + * restricting their usefulness. + */ + controller->nr_threads = MAX(cpu_count() - 1, 2); + controller->threads = kmem_alloc(controller->nr_threads + * sizeof(*controller->threads)); + + if (!controller->threads) { + panic("test: unable to allocate thread array"); + } + + for (unsigned int i = 0; i < controller->nr_threads; i++) { + controller->threads[i] = test_thread_create(i); + } + + controller->monitoring_lid = TEST_MONITORING_SEED; + controller->state_lid = TEST_STATE_SEED; + controller->nr_current_events = 0; + controller->nr_total_events = 0; + controller->nr_current_threads = controller->nr_threads; + controller->nr_total_threads = controller->nr_threads; + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control"); + thread_attr_set_detached(&attr); + error = thread_create(NULL, &attr, test_control, controller); + error_check(error, "thread_create"); +} + +void +test_setup(void) +{ + test_controller_create(); +} |