From 8f7aeebfc21af051ecb914d286564a10492927a0 Mon Sep 17 00:00:00 2001 From: Remy Noel Date: Mon, 30 Apr 2018 20:55:28 +0200 Subject: perfmon: add polling overflow implementation in perfmon --- arch/x86/machine/pmu_amd.c | 38 ++---------------- arch/x86/machine/pmu_intel.c | 49 ++++++------------------ arch/x86/machine/trap.c | 1 + kern/perfmon.c | 91 +++++++++++++++++++++++++++++++++++++------- test/test_perfmon_cpu.c | 29 +++++++++++--- 5 files changed, 118 insertions(+), 90 deletions(-) diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c index 34df620..8e56bfa 100644 --- a/arch/x86/machine/pmu_amd.c +++ b/arch/x86/machine/pmu_amd.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -175,7 +176,6 @@ pmu_amd_start(unsigned int pmc_id, unsigned int raw_event_id) /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ high = code->event_select >> 8; low = PMU_AMD_EVTSEL_EN - | PMU_AMD_EVTSEL_INT | PMU_AMD_EVTSEL_OS | PMU_AMD_EVTSEL_USR | (code->umask << 8) @@ -205,38 +205,6 @@ pmu_amd_write(unsigned int pmc_id, uint64_t value) cpu_set_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id, value); } -/* - * TODO Make the perfmon module handle basic overflow handling by polling - * counters. - */ -static void -pmu_amd_handle_of_intr_v1(void) -{ - struct pmu_amd *pmu; - uint64_t value, prev; - unsigned int mask; - - pmu = pmu_amd_get(); - - for (unsigned int pmc_id = 0; pmc_id != PMU_AMD_NR_PMCS; pmc_id++) { - mask = (1U << pmc_id); - - if (pmu->pmc_bm & mask) { - continue; - } - - value = pmu_amd_read(pmc_id); - prev = perfmon_cpu_pmc_get_prev(pmc_id); - - if (prev > value) { - /* Overflow */ - perfmon_cpu_pmc_inc_of(pmc_id); - /* Prevents us from overflowing twice */ - perfmon_cpu_pmc_set_prev(pmc_id, value); - } - } -} - static int __init pmu_amd_setup(void) { @@ -259,7 +227,9 @@ pmu_amd_setup(void) pmu->pmc_bm = (1U << PMU_AMD_NR_PMCS) - 1; pmu_driver.pmc_width = PMU_AMD_PMC_WIDTH; - pmu_driver.of_max_ticks = 1UL << (pmu_driver.pmc_width - 1); + /* Set max_tick to half the number of instruction per seconds. */ + pmu_driver.of_max_ticks = + (1UL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ); pmu_driver.ops.info = pmu_amd_info; pmu_driver.ops.translate = pmu_amd_translate; diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c index ccc2294..e5d9ef7 100644 --- a/arch/x86/machine/pmu_intel.c +++ b/arch/x86/machine/pmu_intel.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -174,7 +175,7 @@ pmu_intel_info(void) pmu = pmu_intel_get(); nr_events = pmu_popcount(pmu->events); - log_info("pmu: driver: intel, architectural v%d\n" + log_info("pmu: driver: intel, architectural v%d " "pmu: nr_pmcs: %u, pmc_width: %u, events: %#x, nr_events: %u\n", pmu->version, pmu->nr_pmcs, pmu->pmc_width, pmu->events, nr_events); @@ -233,19 +234,23 @@ static void pmu_intel_start(unsigned int pmc_id, unsigned int raw_event_id) { const struct pmu_intel_event_code *code; + struct pmu_intel *pmu; uint32_t evtsel; assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes)); code = &pmu_intel_event_codes[raw_event_id]; + pmu = pmu_intel_get(); /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */ evtsel = PMU_INTEL_EVTSEL_EN | PMU_INTEL_EVTSEL_OS | PMU_INTEL_EVTSEL_USR - | PMU_INTEL_EVTSEL_INT | (code->umask << 8) | code->event_select; + if (pmu->version >= 2) { + evtsel |= PMU_INTEL_EVTSEL_INT; + } cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, evtsel); } @@ -267,38 +272,6 @@ pmu_intel_write(unsigned int pmc_id, uint64_t value) cpu_set_msr64(PMU_INTEL_MSR_PMC0 + pmc_id, value); } -/* - * TODO Make the perfmon module handle basic overflow handling by polling - * counters. - */ -static void -pmu_intel_handle_of_intr_v1(void) -{ - struct pmu_intel *pmu; - unsigned int mask; - uint64_t value; - uint64_t prev; - - pmu = pmu_intel_get(); - - for (unsigned int pmc_id = 0; pmc_id != pmu->nr_pmcs; pmc_id++) { - mask = (1U << pmc_id); - if (pmu->pmc_bm & mask) { - /* counter not enabled: can't overflow. */ - continue; - } - - value = pmu_intel_read(pmc_id); - prev = perfmon_cpu_pmc_get_prev(pmc_id); - if (prev > value) { - /* Overflow */ - perfmon_cpu_pmc_inc_of(pmc_id); - /* Prevents us from overflowing twice */ - perfmon_cpu_pmc_set_prev(pmc_id, value); - } - } -} - static int pmu_intel_consume_bits(uint64_t *bits) { @@ -330,7 +303,7 @@ pmu_intel_handle_of_intr_v2(void) pmu_intel_ack_status(status); pmu = pmu_intel_get(); - status &= ((1U << pmu->pmc_width) - 1); + status &= ((1ULL << pmu->pmc_width) - 1); for (;;) { pmc_id = pmu_intel_consume_bits(&status); @@ -366,7 +339,6 @@ pmu_intel_setup(void) cpu_cpuid(&eax, &ebx, &ecx, &edx); pmu->version = eax & PMU_INTEL_ID_VERSION_MASK; - /* TODO Check this */ if (pmu->version == 0) { return ENODEV; } @@ -396,7 +368,10 @@ pmu_intel_setup(void) pmu_driver.ops.handle_of_intr = pmu_intel_handle_of_intr_v2; pmu_driver.of_max_ticks = 0; } else { - pmu_driver.of_max_ticks = 1UL << (pmu_driver.pmc_width - 1); + /* Set max_tick to half the number of instruction per seconds. */ + pmu_driver.ops.handle_of_intr = NULL; + pmu_driver.of_max_ticks = + (1ULL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ); } return perfmon_pmu_register(&pmu_driver); diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c index 101adf8..a7a5cbd 100644 --- a/arch/x86/machine/trap.c +++ b/arch/x86/machine/trap.c @@ -214,6 +214,7 @@ trap_setup(void) trap_install(TRAP_LAPIC_TIMER, TRAP_HF_INTR, lapic_timer_intr); trap_install(TRAP_LAPIC_ERROR, TRAP_HF_INTR, lapic_error_intr); trap_install(TRAP_LAPIC_SPURIOUS, TRAP_HF_INTR, lapic_spurious_intr); + trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, lapic_pmc_of_intr); return 0; } diff --git a/kern/perfmon.c b/kern/perfmon.c index 17175ca..0211141 100644 --- a/kern/perfmon.c +++ b/kern/perfmon.c @@ -48,10 +48,10 @@ #include #include #include +#include #include #include #include -#include /* * Performance monitoring event. @@ -394,10 +394,23 @@ perfmon_grouplist_destroy(struct perfmon_grouplist *grouplist) kmem_cache_free(&perfmon_grouplist_cache, grouplist); } +static void perfmon_check_of(struct timer *timer); + static void __init -perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu) +perfmon_cpu_pmu_init(unsigned int cpuid) { unsigned int i; + struct perfmon_cpu_pmu *cpu_pmu; + + cpu_pmu = percpu_ptr(perfmon_cpu_pmu, cpuid); + cpu_pmu->cpu_id = cpuid; + if (!pmu_driver.ops.handle_of_intr) { + /* XXX: using high prio instead or INTR because we might xcall from the + * callbacks. + */ + timer_init(&cpu_pmu->of_timer, &perfmon_check_of, TIMER_HIGH_PRIO); + timer_schedule(&cpu_pmu->of_timer, pmu_driver.of_max_ticks); + } for (i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) { struct perfmon_cpu_pmc *pmc; @@ -405,7 +418,6 @@ perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu) pmc = &cpu_pmu->pmcs[i]; pmc->nr_refs = 0; - pmc->prev_value = pmu_driver.ops.read(perfmon_pmu.pmcs[i].id); pmc->overflow_id = 0; } @@ -461,6 +473,55 @@ perfmon_cpu_pmc_inc_of(unsigned int pmc_id) cpu_pmc->overflow_id++; } +static void +perfmon_check_of_remote(void *arg) +{ + perfmon_check_of(arg); +} + +static void +perfmon_check_pmc_of(struct perfmon_cpu_pmc *cpu_pmc, uint64_t value) +{ + uint64_t prev; + + prev = cpu_pmc->prev_value; + if (prev > value) { + /* Overflow */ + cpu_pmc->overflow_id++; + } + cpu_pmc->prev_value = value; +} + +static void +perfmon_check_of(struct timer *timer) +{ + struct perfmon_pmc *pmc; + struct perfmon_cpu_pmc *cpu_pmc; + struct perfmon_cpu_pmu *cpu_pmu; + uint64_t value; + + cpu_pmu = structof(timer, struct perfmon_cpu_pmu, of_timer); + if (cpu_pmu->cpu_id != cpu_id()) + { + xcall_call(perfmon_check_of_remote, timer, cpu_pmu->cpu_id); + return; + } + + for (size_t i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) { + pmc = perfmon_pmc_from_index(i); + if (pmc->nr_refs == 0) { + continue; + } + + cpu_pmc = &cpu_pmu->pmcs[i]; + value = pmu_driver.ops.read(pmc->id); + + perfmon_check_pmc_of(cpu_pmc, value); + } + + timer_schedule(timer, pmu_driver.of_max_ticks); +} + static void perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) { @@ -471,6 +532,7 @@ perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) if (cpu_pmc->nr_refs == 0) { pmu_driver.ops.start(perfmon_pmu.pmcs[pmc_index].id, perfmon_pmu.pmcs[pmc_index].raw_event_id); + cpu_pmc->prev_value = pmu_driver.ops.read(perfmon_pmu.pmcs[pmc_index].id); } cpu_pmc->nr_refs++; @@ -493,6 +555,7 @@ perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index) void perfmon_of_intr(void) { + assert(pmu_driver.ops.handle_of_intr); pmu_driver.ops.handle_of_intr(); } @@ -545,7 +608,7 @@ perfmon_setup(void) } for (i = 0; i < cpu_count(); i++) { - perfmon_cpu_pmu_init(percpu_ptr(perfmon_cpu_pmu, i)); + perfmon_cpu_pmu_init(i); } for (i = 0; i < cpu_count(); i++) { @@ -563,10 +626,6 @@ perfmon_setup(void) return ENODEV; } pmu_driver.ops.info(); - if (pmu_driver.ops.handle_of_intr) { - /* FIXME: this should not require an architectural api call. */ - trap_register(TRAP_LAPIC_PMC_OF, lapic_pmc_of_intr); - } return 0; } @@ -671,12 +730,16 @@ perfmon_event_sync(struct perfmon_cpu_pmu *cpu_pmu, cpu_pmc = &cpu_pmu->pmcs[event->pmc_index]; count = pmu_driver.ops.read(pmc->id); + if (!pmu_driver.ops.handle_of_intr) { + /* Force pmc overflow status update */ + perfmon_check_pmc_of(cpu_pmc, count); + } + if (unlikely(event->overflow_id != cpu_pmc->overflow_id)) { assert(cpu_pmc->overflow_id > event->overflow_id); - diff = cpu_pmc->overflow_id > event->overflow_id; /* diff is very likely 1. */ - event->count += (1UL << pmu_driver.pmc_width) * diff + event->count += (1ULL << pmu_driver.pmc_width) * diff - event->prev + count; event->overflow_id = cpu_pmc->overflow_id; } else { @@ -942,9 +1005,8 @@ perfmon_group_load(struct perfmon_group *group) { struct perfmon_cpu_pmu *cpu_pmu; struct perfmon_event *event; -#ifdef CONFIG_PERFMON_TEST struct perfmon_pmc *pmc; -#endif + uint64_t prev; assert(!thread_preempt_enabled()); assert(perfmon_group_enabled(group)); @@ -967,8 +1029,11 @@ perfmon_group_load(struct perfmon_group *group) #endif list_for_each_entry(&group->events, event, node) { + pmc = perfmon_pmc_from_index(event->pmc_index); + prev = pmu_driver.ops.read(pmc->id); + perfmon_cpu_pmu_load(cpu_pmu, event->pmc_index); - event->prev = pmu_driver.ops.read(perfmon_pmu.pmcs[event->pmc_index].id); + event->prev = prev; event->overflow_id = cpu_pmu->pmcs[event->pmc_index].overflow_id; } diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c index 8ecb241..6f1414c 100644 --- a/test/test_perfmon_cpu.c +++ b/test/test_perfmon_cpu.c @@ -49,17 +49,30 @@ test_report_event(const struct perfmon_event *event, const char *name) printf("test: %s: %llu\n", name, count); } +static uint64_t +test_get_pre_overflow_value(uint64_t value) +{ + uint64_t pmc_max; + unsigned int pmc_width; + + pmc_width = perfmon_get_pmc_width(); + pmc_max = (1ULL << pmc_width) - 1; + pmc_max &= 0xffffffff80000000; + + /* XXX: workaround most processor not allowing full width writes */ + return ((~value + 1) & 0x7fffffff) | pmc_max; +} + static void test_run(void *arg) { struct perfmon_event *ev_cycle, *ev_instruction; struct perfmon_group *group; int error; - uint64_t pmc_max; + uint64_t value; (void)arg; - pmc_max = (1 << perfmon_get_pmc_width()) - 1; error = perfmon_group_create(&group); error_check(error, "perfmon_group_create"); @@ -86,10 +99,14 @@ test_run(void *arg) test_report_event(ev_instruction, "instruction"); printf("checking with overflow ...\n"); - /* TODO: choose value depending of architecture */ - perfmon_event_write(ev_cycle, pmc_max - perfmon_event_read(ev_cycle) / 2); - perfmon_event_write(ev_instruction, - pmc_max - perfmon_event_read(ev_instruction) / 3); + value = test_get_pre_overflow_value( perfmon_event_read(ev_cycle) / 2); + error = perfmon_event_write(ev_cycle, value); + error_check(error, "perfmon_event_write"); + + value = test_get_pre_overflow_value(perfmon_event_read(ev_instruction) / 3); + error = perfmon_event_write(ev_instruction, value); + error_check(error, "perfmon_event_write"); + perfmon_event_reset(ev_cycle); perfmon_event_reset(ev_instruction); -- cgit v1.2.3