From a441643cb427b8600bfb5ae3313522765a6f6521 Mon Sep 17 00:00:00 2001 From: Richard Braun Date: Sun, 29 Apr 2018 17:15:03 +0200 Subject: Minor changes --- arch/x86/machine/cpu.c | 13 ++++---- arch/x86/machine/cpu.h | 8 ++--- arch/x86/machine/lapic.c | 4 ++- arch/x86/machine/pmu.h | 7 ++-- arch/x86/machine/pmu_amd.c | 52 ++++++++++++++---------------- arch/x86/machine/pmu_intel.c | 76 ++++++++++++++++++++++++-------------------- kern/perfmon.c | 16 +++++----- kern/perfmon.h | 16 ++++------ kern/perfmon_i.h | 13 ++++---- kern/perfmon_types.h | 26 +++++++++++++++ kern/thread.c | 15 +++++---- kern/thread_i.h | 5 +-- 12 files changed, 136 insertions(+), 115 deletions(-) create mode 100644 kern/perfmon_types.h diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c index 54a2676..b60bea9 100644 --- a/arch/x86/machine/cpu.c +++ b/arch/x86/machine/cpu.c @@ -180,7 +180,7 @@ cpu_delay(unsigned long usecs) static const struct cpu_vendor cpu_vendors[] = { { CPU_VENDOR_INTEL, "GenuineIntel" }, - { CPU_VENDOR_AMD, "AuthenticAMD" }, + { CPU_VENDOR_AMD, "AuthenticAMD" }, }; void * __init @@ -442,11 +442,11 @@ cpu_load_idt(const void *idt, size_t size) static const struct cpu_vendor * cpu_vendor_lookup(const char *str) { - size_t i; - - for (i = 0; i < ARRAY_SIZE(cpu_vendors); i++) - if (strcmp(str, cpu_vendors[i].str) == 0) + for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) { + if (strcmp(str, cpu_vendors[i].str) == 0) { return &cpu_vendors[i]; + } + } return NULL; } @@ -458,8 +458,9 @@ cpu_init_vendor_id(struct cpu *cpu) vendor = cpu_vendor_lookup(cpu->vendor_str); - if (vendor == NULL) + if (vendor == NULL) { return; + } cpu->vendor_id = vendor->id; } diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h index eebacc6..334a51c 100644 --- a/arch/x86/machine/cpu.h +++ b/arch/x86/machine/cpu.h @@ -554,7 +554,7 @@ cpu_get_msr(uint32_t msr, uint32_t *high, uint32_t *low) } /* - * uint64 version of cpu_get_msr. + * Implies a compiler barrier. */ static __always_inline uint64_t cpu_get_msr64(uint32_t msr) @@ -562,7 +562,6 @@ cpu_get_msr64(uint32_t msr) uint32_t high, low; cpu_get_msr(msr, &high, &low); - return (((uint64_t)high << 32) | low); } @@ -574,15 +573,16 @@ cpu_set_msr(uint32_t msr, uint32_t high, uint32_t low) { asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high) : "memory"); } + /* - * uint64 version of cpu_set_msr. + * Implies a full memory barrier. */ static inline void cpu_set_msr64(uint32_t msr, uint64_t value) { uint32_t low, high; - low = value & 0xffffffff; + low = value & 0xffffffff; high = value >> 32; cpu_set_msr(msr, high, low); diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c index 7e8299e..6ba3ddd 100644 --- a/arch/x86/machine/lapic.c +++ b/arch/x86/machine/lapic.c @@ -339,9 +339,11 @@ void lapic_pmc_of_intr(struct trap_frame *frame) { (void)frame; + #ifdef CONFIG_PERFMON - perfmon_handle_of_intr(frame); + perfmon_of_intr(); #endif + lapic_eoi(); } diff --git a/arch/x86/machine/pmu.h b/arch/x86/machine/pmu.h index fdd79e7..009aac5 100644 --- a/arch/x86/machine/pmu.h +++ b/arch/x86/machine/pmu.h @@ -17,8 +17,8 @@ * Pmu driver modules. */ -#ifndef _X86_PMU_H -#define _X86_PMU_H +#ifndef X86_PMU_H +#define X86_PMU_H #include @@ -29,5 +29,4 @@ INIT_OP_DECLARE(pmu_intel_setup); INIT_OP_DECLARE(pmu_amd_setup); -#endif /* _X86_PMU_H */ - +#endif /* X86_PMU_H */ diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c index 4d2a4f8..105ba50 100644 --- a/arch/x86/machine/pmu_amd.c +++ b/arch/x86/machine/pmu_amd.c @@ -14,14 +14,12 @@ * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * - * AMD PMU driver module. */ +#include +#include #include -#include -#include #include #include #include @@ -40,7 +38,7 @@ #define PMU_AMD_RE_DCACHE_REF 6 #define PMU_AMD_RE_DCACHE_MISS 7 #define PMU_AMD_RE_IFETCH_STALL 8 - +#define PMU_AMD_RE_INVALID ((unsigned int)-1) /* * PMU MSR addresses @@ -57,23 +55,29 @@ #define PMU_AMD_EVTSEL_EN 0x00400000 /* - * AMD PMU properties seem to be identical across all processors despite - * many of them being implementation-specific. + * XXX These properties have the minimum values required by the architecture. + * TODO Per-family/model event availability database. */ #define PMU_AMD_NR_PMCS 4 #define PMU_AMD_PMC_WIDTH 48 +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ struct pmu_amd { unsigned int pmc_bm; }; +static struct pmu_amd pmu_amd; + struct pmu_amd_event_code { unsigned short event_select; unsigned short umask; }; -static struct pmu_amd pmu_amd; - /* * TODO Per-family/model event availability database. */ @@ -89,8 +93,6 @@ static const struct pmu_amd_event_code pmu_amd_event_codes[] = { [PMU_AMD_RE_IFETCH_STALL] = { 0x87, 0x00 }, }; -#define PMU_AMD_RE_INVALID ((unsigned int)-1) - static const unsigned int pmu_amd_generic_events[] = { [PERFMON_EV_CYCLE] = PMU_AMD_RE_CYCLE, [PERFMON_EV_REF_CYCLE] = PMU_AMD_RE_INVALID, @@ -120,7 +122,6 @@ pmu_amd_translate(unsigned int *raw_event_idp, unsigned int event_id) assert(event_id < ARRAY_SIZE(pmu_amd_generic_events)); *raw_event_idp = pmu_amd_generic_events[event_id]; - return 0; } @@ -130,7 +131,7 @@ pmu_amd_alloc(unsigned int *pmc_idp, unsigned int raw_event_id) struct pmu_amd *pmu; unsigned int pmc_id; - /* TODO Check raw event availability */ + /* TODO Per-family/model event availability database */ (void)raw_event_id; pmu = pmu_amd_get(); @@ -156,9 +157,7 @@ pmu_amd_free(unsigned int pmc_id) pmu = pmu_amd_get(); mask = (1U << pmc_id); - assert(!(pmu->pmc_bm & mask)); - pmu->pmc_bm |= mask; } @@ -200,37 +199,35 @@ pmu_amd_read(unsigned int pmc_id) return cpu_get_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id); } -#ifdef CONFIG_PERFMON_TEST - static void pmu_amd_write(unsigned int pmc_id, uint64_t value) { cpu_set_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id, value); } -#endif /* CONFIG_PERFMON_TEST */ - +/* + * TODO Make the perfmon module handle basic overflow handling by polling + * counters. + */ static void -pmu_amd_handle_of_intr_v1(struct trap_frame *frame) +pmu_amd_handle_of_intr_v1(void) { struct pmu_amd *pmu; + uint64_t value, prev; unsigned int mask; - uint64_t value; - uint64_t prev; - - (void)frame; pmu = pmu_amd_get(); for (unsigned int pmc_id = 0; pmc_id != PMU_AMD_NR_PMCS; pmc_id++) { mask = (1U << pmc_id); + if (pmu->pmc_bm & mask) { - /* counter not enabled: can't overflow. */ continue; } value = pmu_amd_read(pmc_id); prev = perfmon_cpu_pmc_get_prev(pmc_id); + if (prev > value) { /* Overflow */ perfmon_cpu_pmc_inc_of(pmc_id); @@ -274,11 +271,9 @@ pmu_amd_setup(void) pmu_driver.start = pmu_amd_start; pmu_driver.stop = pmu_amd_stop; pmu_driver.read = pmu_amd_read; + pmu_driver.write = pmu_amd_write; pmu_driver.get_pmc_width = pmu_amd_get_pmc_width; pmu_driver.handle_of_intr = pmu_amd_handle_of_intr_v1; -#ifdef CONFIG_PERFMON_TEST - pmu_driver.write = pmu_amd_write; -#endif return perfmon_pmu_register(&pmu_driver); } @@ -287,4 +282,3 @@ INIT_OP_DEFINE(pmu_amd_setup, INIT_OP_DEP(perfmon_bootstrap, true), INIT_OP_DEP(cpu_setup, true), INIT_OP_DEP(log_setup, true)); - diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c index 0833a71..f8168bb 100644 --- a/arch/x86/machine/pmu_intel.c +++ b/arch/x86/machine/pmu_intel.c @@ -14,14 +14,12 @@ * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * - * INTEL PMU driver module. */ +#include +#include #include -#include -#include #include #include #include @@ -76,6 +74,12 @@ #define PMU_INTEL_ID_EVLEN_OFFSET 24 #define PMU_INTEL_ID_EVLEN_MAX 7 +/* + * Global PMU properties. + * + * The bitmap is used to implement counter allocation, where each bit denotes + * whether a counter is available or not. + */ struct pmu_intel { unsigned int version; unsigned int nr_pmcs; @@ -84,12 +88,6 @@ struct pmu_intel { unsigned int events; }; -struct pmu_intel_event_code { - unsigned int hw_event_id; - unsigned short event_select; - unsigned short umask; -}; - static struct pmu_intel pmu_intel; /* @@ -103,6 +101,12 @@ static struct pmu_intel pmu_intel; #define PMU_INTEL_EVENT_BRANCH 0x20 #define PMU_INTEL_EVENT_BRANCH_MISS 0x40 +struct pmu_intel_event_code { + unsigned int hw_event_id; + unsigned short event_select; + unsigned short umask; +}; + static const unsigned int pmu_intel_raw_events[] = { [PERFMON_EV_CYCLE] = PMU_INTEL_RE_CYCLE, [PERFMON_EV_REF_CYCLE] = PMU_INTEL_RE_REF_CYCLE, @@ -141,20 +145,24 @@ pmu_intel_ack_status(uint64_t status) return cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_OVF_CTRL, status); } +/* + * TODO use the compiler built-in once libgcc is linked again. + */ static unsigned int pmu_popcount(unsigned int bits) { - unsigned int count = 0; + unsigned int count; + + count = 0; - /* XXX: Dummy version of popcount. We should implement a faster one if it - * gets needed somewhere else. - */ while (bits) { if (bits & 1) { count++; } + bits >>= 1; } + return count; } @@ -167,8 +175,8 @@ pmu_intel_info(void) pmu = pmu_intel_get(); nr_events = pmu_popcount(pmu->events); log_info("pmu: driver: intel, architectural v1\n" - "pmu: nr_pmcs: %u, pmc_width: %u, events: %#x, nr_events: %u\n", - pmu->nr_pmcs, pmu->pmc_width, pmu->events, nr_events); + "pmu: nr_pmcs: %u, pmc_width: %u, events: %#x, nr_events: %u\n", + pmu->nr_pmcs, pmu->pmc_width, pmu->events, nr_events); } static int @@ -179,7 +187,6 @@ pmu_intel_translate(unsigned int *raw_event_idp, unsigned event_id) } *raw_event_idp = pmu_intel_raw_events[event_id]; - return 0; } @@ -217,9 +224,7 @@ pmu_intel_free(unsigned int pmc_id) pmu = pmu_intel_get(); mask = (1U << pmc_id); - assert(!(pmu->pmc_bm & mask)); - pmu->pmc_bm |= mask; } @@ -255,26 +260,24 @@ pmu_intel_read(unsigned int pmc_id) return cpu_get_msr64(PMU_INTEL_MSR_PMC0 + pmc_id); } -#ifdef CONFIG_PERFMON_TEST - static void pmu_intel_write(unsigned int pmc_id, uint64_t value) { cpu_set_msr64(PMU_INTEL_MSR_PMC0 + pmc_id, value); } -#endif /* CONFIG_PERFMON_TEST */ - +/* + * TODO Make the perfmon module handle basic overflow handling by polling + * counters. + */ static void -pmu_intel_handle_of_intr_v1(struct trap_frame *frame) +pmu_intel_handle_of_intr_v1(void) { struct pmu_intel *pmu; unsigned int mask; uint64_t value; uint64_t prev; - (void)frame; - pmu = pmu_intel_get(); for (unsigned int pmc_id = 0; pmc_id != pmu->nr_pmcs; pmc_id++) { @@ -301,37 +304,40 @@ pmu_intel_consume_bits(uint64_t *bits) int bit; bit = __builtin_ffsll(*bits) - 1; + if (bit < 0) { return bit; } - *bits &= ~(1U << bit); + *bits &= ~(1U << bit); return bit; } static void -pmu_intel_handle_of_intr_v2(struct trap_frame *frame) +pmu_intel_handle_of_intr_v2(void) { struct pmu_intel *pmu; uint64_t status; int pmc_id; - (void)frame; - status = pmu_intel_get_status(); + if (status == 0) { return; } + pmu_intel_ack_status(status); pmu = pmu_intel_get(); - /* XXX: Mask on all PMCs (we do not check FIXED counters status */ status &= ((1U << pmu->pmc_width) - 1); - for(;;) { + + for (;;) { pmc_id = pmu_intel_consume_bits(&status); + if (pmc_id < 0) { break; } + perfmon_cpu_pmc_inc_of(pmc_id); } } @@ -369,6 +375,7 @@ pmu_intel_setup(void) cpu_cpuid(&eax, &ebx, &ecx, &edx); pmu->version = eax & PMU_INTEL_ID_VERSION_MASK; + /* TODO Check this */ if ((pmu->version == 0) || (pmu->version > 3)) { return ENODEV; } @@ -391,15 +398,14 @@ pmu_intel_setup(void) pmu_driver.start = pmu_intel_start; pmu_driver.stop = pmu_intel_stop; pmu_driver.read = pmu_intel_read; + pmu_driver.write = pmu_intel_write; pmu_driver.get_pmc_width = pmu_intel_get_pmc_width; + if (pmu->version > 2) { pmu_driver.handle_of_intr = pmu_intel_handle_of_intr_v1; } else { pmu_driver.handle_of_intr = pmu_intel_handle_of_intr_v2; } -#ifdef CONFIG_PERFMON_TEST - pmu_driver.write = pmu_intel_write; -#endif return perfmon_pmu_register(&pmu_driver); } diff --git a/kern/perfmon.c b/kern/perfmon.c index e7a1dfd..156ec7a 100644 --- a/kern/perfmon.c +++ b/kern/perfmon.c @@ -15,16 +15,12 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . * - * Permormance Monitoring module. - * Provide an interface to monitor threads and/or cpu performances events. * - * Monitoring is handled through event groups (perfmon group). + * TODO Description. * * Locking order : interrupts -> thread runq -> grouplist -> group * - * TODO Interupt overflow handling. - * TODO Api to differenciate user and kernel events. - * TODO Test on multi-group events and multi-groups threads/cpus. + * TODO API to differenciate user and kernel events. */ #include @@ -129,6 +125,10 @@ struct perfmon_group { unsigned short type; }; +/* + * List of all groups attached to a single monitored object, either a CPU + * or a thread. + */ struct perfmon_grouplist { struct list groups; struct spinlock lock; @@ -480,9 +480,9 @@ perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) } void -perfmon_handle_of_intr(struct trap_frame *frame) +perfmon_of_intr(void) { - pmu_driver.handle_of_intr(frame); + pmu_driver.handle_of_intr(); } int diff --git a/kern/perfmon.h b/kern/perfmon.h index b979e65..b9a3882 100644 --- a/kern/perfmon.h +++ b/kern/perfmon.h @@ -18,8 +18,8 @@ * Performance monitoring based on hardware performance counters. */ -#ifndef _KERN_PERFMON_H -#define _KERN_PERFMON_H +#ifndef KERN_PERFMON_H +#define KERN_PERFMON_H #include @@ -64,11 +64,9 @@ struct perfmon_pmu_ops { void (*start)(unsigned int pmc_id, unsigned int raw_event_id); void (*stop)(unsigned int pmc_id); uint64_t (*read)(unsigned int pmc_id); - uint8_t (*get_pmc_width)(void); - void (*handle_of_intr)(struct trap_frame *frame); -#ifdef CONFIG_PERFMON_TEST void (*write)(unsigned int pmc_id, uint64_t value); -#endif /* CONFIG_PERFMON_TEST */ + uint8_t (*get_pmc_width)(void); + void (*handle_of_intr)(void); }; /* @@ -223,9 +221,7 @@ INIT_OP_DECLARE(perfmon_setup); /* * Handle overflow interrupt. */ -void perfmon_handle_of_intr(struct trap_frame *frame); - -int perfmon_on_overflow(struct perfmon_pmu_ops *driver); +void perfmon_of_intr(void); /* * Register an architecture-specific driver. @@ -247,4 +243,4 @@ void perfmon_cpu_pmc_set_prev(unsigned int pmc_id, uint64_t prev); */ void perfmon_cpu_pmc_inc_of(unsigned int pmc_id); -#endif /* _KERN_PERFMON_H */ +#endif /* KERN_PERFMON_H */ diff --git a/kern/perfmon_i.h b/kern/perfmon_i.h index 28cf16b..3072171 100644 --- a/kern/perfmon_i.h +++ b/kern/perfmon_i.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Remy Noel. + * Copyright (c) 2014-2018 Remy Noel. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,17 +16,17 @@ * * * Performance monitoring based on performance counters internal functions. - * */ -#ifndef _KERN_PERFMON_I_H -#define _KERN_PERFMON_I_H + +#ifndef KERN_PERFMON_I_H +#define KERN_PERFMON_I_H #include #ifdef CONFIG_PERFMON_TEST /* - * Set an running event hardware counter value for overflow tests purposes. + * Set a running event hardware counter value for overflow tests purposes. * * Beware, this will affect all events associated to the same hardware counter. */ @@ -39,5 +39,4 @@ int perfmon_get_pmc_width(void); #endif /* CONFIG_PERFMON_TEST */ -#endif /* _KERN_PERFMON_H */ - +#endif /* KERN_PERFMON_I_H */ diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h new file mode 100644 index 0000000..6f9be0b --- /dev/null +++ b/kern/perfmon_types.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2014-2018 Remy Noel. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef KERN_PERFMON_TYPES_H +#define KERN_PERFMON_TYPES_H + +struct perfmon_grouplist; + +#endif /* KERN_PERFMON_TYPES_H */ diff --git a/kern/thread.c b/kern/thread.c index a5d63b9..77960ec 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -617,9 +617,10 @@ thread_runq_schedule_load(struct thread *thread) static void thread_runq_schedule_unload(struct thread *thread) { - (void)thread; #ifdef CONFIG_PERFMON perfmon_thread_unload(thread); +#else + (void)thread; #endif } @@ -1868,7 +1869,7 @@ thread_init(struct thread *thread, void *stack, if (error) { goto error_perfmon; } -#endif +#endif /* CONFIG_PERFMON */ error = tcb_build(&thread->tcb, stack, fn, arg); @@ -1884,7 +1885,7 @@ error_tcb: #ifdef CONFIG_PERFMON perfmon_thread_destroy(thread); error_perfmon: -#endif +#endif /* CONFIG_PERFMON */ thread_destroy_tsd(thread); turnstile_destroy(thread->priv_turnstile); error_turnstile: @@ -2341,7 +2342,7 @@ thread_setup(void) #ifdef CONFIG_PERFMON #define THREAD_PERFMON_INIT_OP_DEPS \ - INIT_OP_DEP(perfmon_bootstrap, true), + INIT_OP_DEP(perfmon_bootstrap, true), #else /* CONFIG_PERFMON */ #define THREAD_PERFMON_INIT_OP_DEPS #endif /* CONFIG_PERFMON */ @@ -2734,10 +2735,10 @@ thread_report_periodic_event(void) spinlock_unlock(&runq->lock); } -unsigned int thread_cpu(const struct thread *thread) +unsigned int +thread_cpu(const struct thread *thread) { - assert (thread->runq != NULL); - + assert(thread->runq); return thread->runq->cpu; } diff --git a/kern/thread_i.h b/kern/thread_i.h index 066e6b7..9c24d3a 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -74,10 +75,6 @@ struct thread_fs_data { unsigned short work; }; -#ifdef CONFIG_PERFMON -struct perfmon_grouplist; -#endif - /* * Maximum number of thread-specific data keys. */ -- cgit v1.2.3