Merge branch 'perfmon'

author: Richard Braun <rbraun@sceen.net> 2018-06-25 21:56:01 +0200
committer: Richard Braun <rbraun@sceen.net> 2018-06-25 21:56:01 +0200
commit: 30dd97fb786ef5f7ca28049684b17bdc2ee7a718 (patch)
tree: 330f0514edcfaaa4e3266edb1191c90a39edcb80
parent: 0a7c73d2e06172a1210e2bbdfba5718040f4f007 (diff)
parent: 7686bfcb703049db5d3711e59133ca4b2259e1f1 (diff)
31 files changed, 3781 insertions, 39 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 251c4a4c..eeb999cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -13,6 +13,26 @@ config X86_PAE
 	  PAE allows addressing physical memory beyond 4 GiB at the cost
 	  of more pagetable lookup and memory overhead.
 
+config X86_PMU_AMD
+	bool "Enable AMD PMU driver"
+	select PERFMON
+	default n
+	---help---
+	  Enable support for the performance monitoring unit on AMD
+	  processors.
+
+	  If unsure, disable.
+
+config X86_PMU_INTEL
+	bool "Enable Intel PMU driver"
+	select PERFMON
+	default n
+	---help---
+	  Enable support for the performance monitoring unit on Intel
+	  processors.
+
+	  If unsure, disable.
+
 endmenu
 
 config X86_32
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9866d93a..226f4a90 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -61,3 +61,6 @@ x15_SOURCES-y += \
         arch/x86/machine/trap_asm.S \
         arch/x86/machine/trap.c \
         arch/x86/machine/uart.c
+
+x15_SOURCES-$(CONFIG_X86_PMU_AMD) += arch/x86/machine/pmu_amd.c
+x15_SOURCES-$(CONFIG_X86_PMU_INTEL) += arch/x86/machine/pmu_intel.c
diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c
index 6934896e..d540d8d8 100644
--- a/arch/x86/machine/boot.c
+++ b/arch/x86/machine/boot.c
@@ -56,6 +56,7 @@
 #include <kern/log.h>
 #include <kern/macros.h>
 #include <kern/panic.h>
+#include <kern/percpu.h>
 #include <kern/thread.h>
 #include <machine/acpi.h>
 #include <machine/atcons.h>
@@ -67,6 +68,8 @@
 #include <machine/multiboot.h>
 #include <machine/page.h>
 #include <machine/pmap.h>
+#include <machine/pmu_amd.h>
+#include <machine/pmu_intel.h>
 #include <machine/strace.h>
 #include <machine/uart.h>
 #include <vm/vm_kmem.h>
@@ -504,6 +507,7 @@ boot_ap_main(void)
     cpu_ap_setup();
     thread_ap_setup();
     pmap_ap_setup();
+    percpu_ap_setup();
     kernel_ap_main();
 
     /* Never reached */
@@ -551,6 +555,32 @@ boot_setup_intr(void)
 INIT_OP_DEFINE(boot_setup_intr,
                INIT_OP_DEP(acpi_setup, true));
 
+#ifdef CONFIG_PERFMON
+static int __init
+boot_setup_pmu(void)
+{
+    return 0;
+}
+
+#ifdef CONFIG_X86_PMU_AMD
+#define BOOT_PMU_AMD_INIT_OP_DEPS \
+               INIT_OP_DEP(pmu_amd_setup, false),
+#else /* CONFIG_X86_PMU_AMD */
+#define BOOT_PMU_AMD_INIT_OP_DEPS
+#endif /* CONFIG_X86_PMU_AMD */
+
+#ifdef CONFIG_X86_PMU_INTEL
+#define BOOT_PMU_INTEL_INIT_OP_DEPS \
+               INIT_OP_DEP(pmu_intel_setup, false),
+#else /* CONFIG_X86_PMU_INTEL */
+#define BOOT_PMU_INTEL_INIT_OP_DEPS
+#endif /* CONFIG_X86_PMU_INTEL */
+
+INIT_OP_DEFINE(boot_setup_pmu,
+               BOOT_PMU_AMD_INIT_OP_DEPS
+               BOOT_PMU_INTEL_INIT_OP_DEPS);
+#endif /* CONFIG_PERFMON */
+
 static int __init
 boot_setup_shutdown(void)
 {
diff --git a/arch/x86/machine/boot.h b/arch/x86/machine/boot.h
index d30b3beb..087f0c2a 100644
--- a/arch/x86/machine/boot.h
+++ b/arch/x86/machine/boot.h
@@ -160,6 +160,12 @@ INIT_OP_DECLARE(boot_setup_intr);
 
 /*
  * This init operation provides :
+ *  - all PMU drivers have probed hardware
+ */
+INIT_OP_DECLARE(boot_setup_pmu);
+
+/*
+ * This init operation provides :
  *  - all shutdown operations have been registered
  */
 INIT_OP_DECLARE(boot_setup_shutdown);
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c
index 98d3680e..6cbe168a 100644
--- a/arch/x86/machine/cpu.c
+++ b/arch/x86/machine/cpu.c
@@ -69,6 +69,11 @@
 
 #define CPU_INVALID_APIC_ID ((unsigned int)-1)
 
+struct cpu_vendor {
+    unsigned int id;
+    const char *str;
+};
+
 /*
  * MP related CMOS ports, registers and values.
  */
@@ -155,6 +160,12 @@ static alignas(8) struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __read_mostly;
 static unsigned long cpu_double_fault_handler;
 static alignas(CPU_DATA_ALIGN) char cpu_double_fault_stack[TRAP_STACK_SIZE];
 
+uint64_t
+cpu_get_freq(void)
+{
+    return cpu_freq;
+}
+
 void
 cpu_delay(unsigned long usecs)
 {
@@ -173,6 +184,11 @@ cpu_delay(unsigned long usecs)
     } while (total > 0);
 }
 
+static const struct cpu_vendor cpu_vendors[] = {
+    { CPU_VENDOR_INTEL, "GenuineIntel" },
+    { CPU_VENDOR_AMD,   "AuthenticAMD" },
+};
+
 void * __init
 cpu_get_boot_stack(void)
 {
@@ -182,10 +198,9 @@ cpu_get_boot_stack(void)
 static void __init
 cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id)
 {
+    memset(cpu, 0, sizeof(*cpu));
     cpu->id = id;
     cpu->apic_id = apic_id;
-    cpu->state = CPU_STATE_OFF;
-    cpu->boot_stack = NULL;
 }
 
 static void
@@ -430,6 +445,32 @@ cpu_load_idt(const void *idt, size_t size)
     asm volatile("lidt %0" : : "m" (idtr));
 }
 
+static const struct cpu_vendor *
+cpu_vendor_lookup(const char *str)
+{
+    for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) {
+        if (strcmp(str, cpu_vendors[i].str) == 0) {
+            return &cpu_vendors[i];
+        }
+    }
+
+    return NULL;
+}
+
+static void __init
+cpu_init_vendor_id(struct cpu *cpu)
+{
+    const struct cpu_vendor *vendor;
+
+    vendor = cpu_vendor_lookup(cpu->vendor_str);
+
+    if (vendor == NULL) {
+        return;
+    }
+
+    cpu->vendor_id = vendor->id;
+}
+
 /*
  * Initialize the given cpu structure for the current processor.
  */
@@ -456,10 +497,12 @@ cpu_init(struct cpu *cpu)
     eax = 0;
     cpu_cpuid(&eax, &ebx, &ecx, &edx);
     max_basic = eax;
-    memcpy(cpu->vendor_id, &ebx, sizeof(ebx));
-    memcpy(cpu->vendor_id + 4, &edx, sizeof(edx));
-    memcpy(cpu->vendor_id + 8, &ecx, sizeof(ecx));
-    cpu->vendor_id[sizeof(cpu->vendor_id) - 1] = '\0';
+    cpu->cpuid_max_basic = max_basic;
+    memcpy(cpu->vendor_str, &ebx, sizeof(ebx));
+    memcpy(cpu->vendor_str + 4, &edx, sizeof(edx));
+    memcpy(cpu->vendor_str + 8, &ecx, sizeof(ecx));
+    cpu->vendor_str[sizeof(cpu->vendor_str) - 1] = '\0';
+    cpu_init_vendor_id(cpu);
 
     /* Some fields are only initialized if supported by the processor */
     cpu->model_name[0] = '\0';
@@ -498,6 +541,8 @@ cpu_init(struct cpu *cpu)
         max_extended = eax;
     }
 
+    cpu->cpuid_max_extended = max_extended;
+
     if (max_extended < 0x80000001) {
         cpu->features3 = 0;
         cpu->features4 = 0;
@@ -617,7 +662,7 @@ void
 cpu_log_info(const struct cpu *cpu)
 {
     log_info("cpu%u: %s, type %u, family %u, model %u, stepping %u",
-             cpu->id, cpu->vendor_id, cpu->type, cpu->family, cpu->model,
+             cpu->id, cpu->vendor_str, cpu->type, cpu->family, cpu->model,
              cpu->stepping);
 
     if (strlen(cpu->model_name) > 0) {
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index 8f9b23c4..28308042 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -218,9 +218,13 @@ struct cpu_tss {
     uint16_t iobp_base;
 } __packed;
 
-#define CPU_VENDOR_ID_SIZE  13
+#define CPU_VENDOR_STR_SIZE 13
 #define CPU_MODEL_NAME_SIZE 49
 
+#define CPU_VENDOR_UNKNOWN  0
+#define CPU_VENDOR_INTEL    1
+#define CPU_VENDOR_AMD      2
+
 /*
  * CPU states.
  */
@@ -230,8 +234,11 @@ struct cpu_tss {
 struct cpu {
     unsigned int id;
     unsigned int apic_id;
-    char vendor_id[CPU_VENDOR_ID_SIZE];
+    char vendor_str[CPU_VENDOR_STR_SIZE];
     char model_name[CPU_MODEL_NAME_SIZE];
+    unsigned int cpuid_max_basic;
+    unsigned int cpuid_max_extended;
+    unsigned int vendor_id;
     unsigned int type;
     unsigned int family;
     unsigned int model;
@@ -537,16 +544,41 @@ cpu_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
                  : : "memory");
 }
 
-static __always_inline void
+static inline void
 cpu_get_msr(uint32_t msr, uint32_t *high, uint32_t *low)
 {
-    asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr));
+    asm("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr));
 }
 
-static __always_inline void
+static inline uint64_t
+cpu_get_msr64(uint32_t msr)
+{
+    uint32_t high, low;
+
+    cpu_get_msr(msr, &high, &low);
+    return (((uint64_t)high << 32) | low);
+}
+
+/*
+ * Implies a full memory barrier.
+ */
+static inline void
 cpu_set_msr(uint32_t msr, uint32_t high, uint32_t low)
 {
-    asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high));
+    asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high) : "memory");
+}
+
+/*
+ * Implies a full memory barrier.
+ */
+static inline void
+cpu_set_msr64(uint32_t msr, uint64_t value)
+{
+    uint32_t low, high;
+
+    low = value & 0xffffffff;
+    high = value >> 32;
+    cpu_set_msr(msr, high, low);
 }
 
 static __always_inline uint64_t
@@ -607,6 +639,11 @@ cpu_tlb_flush_va(unsigned long va)
 }
 
 /*
+ * Get CPU frequency in Hz.
+ */
+uint64_t cpu_get_freq(void);
+
+/*
  * Busy-wait for a given amount of time, in microseconds.
  */
 void cpu_delay(unsigned long usecs);
diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c
index 3f6d0c22..a15bd5f1 100644
--- a/arch/x86/machine/lapic.c
+++ b/arch/x86/machine/lapic.c
@@ -25,6 +25,7 @@
 #include <kern/log.h>
 #include <kern/macros.h>
 #include <kern/panic.h>
+#include <kern/perfmon.h>
 #include <machine/cpu.h>
 #include <machine/lapic.h>
 #include <machine/pmap.h>
@@ -159,7 +160,7 @@ struct lapic_map {
     struct lapic_register icr_high;
     struct lapic_register lvt_timer;
     const struct lapic_register reserved14; /* Thermal sensor register */
-    const struct lapic_register reserved15; /* Performance counters register */
+    struct lapic_register lvt_pmc; /* Performance counters register */
     struct lapic_register lvt_lint0;
     struct lapic_register lvt_lint1;
     struct lapic_register lvt_error;
@@ -239,6 +240,7 @@ lapic_setup_registers(void)
     lapic_write(&lapic_map->lvt_error, TRAP_LAPIC_ERROR);
     lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1);
     lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ);
+    lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF);
 }
 
 void __init
@@ -333,6 +335,21 @@ lapic_ipi_broadcast(uint32_t vector)
                  | (vector & LAPIC_ICR_VECTOR_MASK));
 }
 
+#ifdef CONFIG_PERFMON
+void
+lapic_pmc_overflow_intr(struct trap_frame *frame)
+{
+    (void)frame;
+
+    lapic_eoi();
+
+    /* Reset the LVT entry as it is automatically cleared when triggered */
+    lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF);
+
+    perfmon_overflow_intr();
+}
+#endif /* CONFIG_PERFMON */
+
 void
 lapic_timer_intr(struct trap_frame *frame)
 {
diff --git a/arch/x86/machine/lapic.h b/arch/x86/machine/lapic.h
index 6355da48..eac225d7 100644
--- a/arch/x86/machine/lapic.h
+++ b/arch/x86/machine/lapic.h
@@ -54,6 +54,7 @@ void lapic_ipi_broadcast(uint32_t vector);
 /*
  * Interrupt handlers.
  */
+void lapic_pmc_overflow_intr(struct trap_frame *frame);
 void lapic_timer_intr(struct trap_frame *frame);
 void lapic_error_intr(struct trap_frame *frame);
 void lapic_spurious_intr(struct trap_frame *frame);
diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c
new file mode 100644
index 00000000..c3e56429
--- /dev/null
+++ b/arch/x86/machine/pmu_amd.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <kern/clock.h>
+#include <kern/init.h>
+#include <kern/log.h>
+#include <kern/macros.h>
+#include <kern/perfmon.h>
+#include <machine/cpu.h>
+#include <machine/pmu_amd.h>
+
+/*
+ * AMD raw event IDs.
+ */
+#define PMU_AMD_RE_CYCLE            0
+#define PMU_AMD_RE_INSTRUCTION      1
+#define PMU_AMD_RE_CACHE_REF        2
+#define PMU_AMD_RE_CACHE_MISS       3
+#define PMU_AMD_RE_BRANCH           4
+#define PMU_AMD_RE_BRANCH_MISS      5
+#define PMU_AMD_RE_DCACHE_REF       6
+#define PMU_AMD_RE_DCACHE_MISS      7
+#define PMU_AMD_RE_IFETCH_STALL     8
+#define PMU_AMD_RE_INVALID          ((unsigned int)-1)
+
+/*
+ * PMU MSR addresses
+ */
+#define PMU_AMD_MSR_PERFEVTSEL0 0xc0010000
+#define PMU_AMD_MSR_PERCTR0     0xc0010004
+
+/*
+ * Event Select Register addresses
+ */
+#define PMU_AMD_EVTSEL_USR  0x00010000
+#define PMU_AMD_EVTSEL_OS   0x00020000
+#define PMU_AMD_EVTSEL_INT  0x00100000
+#define PMU_AMD_EVTSEL_EN   0x00400000
+
+/*
+ * XXX These properties have the minimum values required by the architecture.
+ * TODO Per-family/model event availability database.
+ */
+#define PMU_AMD_NR_PMCS     4
+#define PMU_AMD_PMC_WIDTH   48
+
+/*
+ * Global PMU properties.
+ *
+ * The bitmap is used to implement counter allocation, where each bit denotes
+ * whether a counter is available or not.
+ */
+struct pmu_amd {
+    unsigned int pmc_bm;
+};
+
+static struct pmu_amd pmu_amd;
+
+struct pmu_amd_event_code {
+    unsigned short event_select;
+    unsigned short umask;
+};
+
+/*
+ * TODO Per-family/model event availability database.
+ */
+static const struct pmu_amd_event_code pmu_amd_event_codes[] = {
+    [PMU_AMD_RE_CYCLE]          = { 0x76, 0x00 },
+    [PMU_AMD_RE_INSTRUCTION]    = { 0xc0, 0x00 },
+    [PMU_AMD_RE_CACHE_REF]      = { 0x80, 0x00 },
+    [PMU_AMD_RE_CACHE_MISS]     = { 0x81, 0x00 },
+    [PMU_AMD_RE_BRANCH]         = { 0xc2, 0x00 },
+    [PMU_AMD_RE_BRANCH_MISS]    = { 0xc3, 0x00 },
+    [PMU_AMD_RE_DCACHE_REF]     = { 0x40, 0x00 },
+    [PMU_AMD_RE_DCACHE_MISS]    = { 0x41, 0x00 },
+    [PMU_AMD_RE_IFETCH_STALL]   = { 0x87, 0x00 },
+};
+
+static const unsigned int pmu_amd_generic_events[] = {
+    [PERFMON_EV_CYCLE]          = PMU_AMD_RE_CYCLE,
+    [PERFMON_EV_REF_CYCLE]      = PMU_AMD_RE_INVALID,
+    [PERFMON_EV_INSTRUCTION]    = PMU_AMD_RE_INSTRUCTION,
+    [PERFMON_EV_CACHE_REF]      = PMU_AMD_RE_CACHE_REF,
+    [PERFMON_EV_CACHE_MISS]     = PMU_AMD_RE_CACHE_MISS,
+    [PERFMON_EV_BRANCH]         = PMU_AMD_RE_BRANCH,
+    [PERFMON_EV_BRANCH_MISS]    = PMU_AMD_RE_BRANCH_MISS,
+};
+
+static struct pmu_amd *
+pmu_amd_get(void)
+{
+    return &pmu_amd;
+}
+
+static int
+pmu_amd_translate(unsigned int *raw_event_idp, unsigned int event_id)
+{
+    assert(event_id < ARRAY_SIZE(pmu_amd_generic_events));
+
+    *raw_event_idp = pmu_amd_generic_events[event_id];
+    return 0;
+}
+
+static int
+pmu_amd_alloc(unsigned int *pmc_idp, unsigned int pmc_index,
+              unsigned int raw_event_id)
+{
+    struct pmu_amd *pmu;
+    unsigned int pmc_id;
+
+    /* TODO Per-family/model event availability database */
+
+    (void)pmc_index;
+    (void)raw_event_id;
+
+    pmu = pmu_amd_get();
+
+    if (pmu->pmc_bm == 0) {
+        return EAGAIN;
+    }
+
+    pmc_id = __builtin_ffs(pmu->pmc_bm) - 1;
+    pmu->pmc_bm &= ~(1U << pmc_id);
+    *pmc_idp = pmc_id;
+
+    return 0;
+}
+
+static void
+pmu_amd_free(unsigned int pmc_id)
+{
+    struct pmu_amd *pmu;
+    unsigned int mask;
+
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    pmu = pmu_amd_get();
+    mask = (1U << pmc_id);
+    assert(!(pmu->pmc_bm & mask));
+    pmu->pmc_bm |= mask;
+}
+
+static void
+pmu_amd_start(unsigned int pmc_id, unsigned int raw_event_id)
+{
+    const struct pmu_amd_event_code *code;
+    uint32_t high, low;
+
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+    assert(raw_event_id < ARRAY_SIZE(pmu_amd_event_codes));
+
+    code = &pmu_amd_event_codes[raw_event_id];
+
+    /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */
+    high = code->event_select >> 8;
+    low = PMU_AMD_EVTSEL_EN
+          | PMU_AMD_EVTSEL_OS
+          | PMU_AMD_EVTSEL_USR
+          | (code->umask << 8)
+          | (code->event_select & 0xff);
+    cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, high, low);
+}
+
+static void
+pmu_amd_stop(unsigned int pmc_id)
+{
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, 0, 0);
+}
+
+static uint64_t
+pmu_amd_read(unsigned int pmc_id)
+{
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    return cpu_get_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id);
+}
+
+static const struct perfmon_dev_ops pmu_amd_ops = {
+    .translate  = pmu_amd_translate,
+    .alloc      = pmu_amd_alloc,
+    .free       = pmu_amd_free,
+    .start      = pmu_amd_start,
+    .stop       = pmu_amd_stop,
+    .read       = pmu_amd_read,
+};
+
+static struct perfmon_dev pmu_amd_dev __read_mostly;
+
+static int __init
+pmu_amd_setup(void)
+{
+    const struct cpu *cpu;
+    struct pmu_amd *pmu;
+
+    cpu = cpu_current();
+
+    if (cpu->vendor_id != CPU_VENDOR_AMD) {
+        return ENODEV;
+    }
+
+    if (cpu->family < 0x10) {
+        return ENODEV;
+    }
+
+    pmu = pmu_amd_get();
+    pmu->pmc_bm = (1U << PMU_AMD_NR_PMCS) - 1;
+
+    pmu_amd_dev.ops = &pmu_amd_ops;
+    pmu_amd_dev.pmc_width = PMU_AMD_PMC_WIDTH;
+    perfmon_register(&pmu_amd_dev);
+    log_info("pmu: amd, nr_pmcs:%u pmc_width:%u",
+             PMU_AMD_NR_PMCS, PMU_AMD_PMC_WIDTH);
+    return 0;
+}
+
+INIT_OP_DEFINE(pmu_amd_setup,
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(log_setup, true),
+               INIT_OP_DEP(perfmon_bootstrap, true));
diff --git a/arch/x86/machine/pmu_amd.h b/arch/x86/machine/pmu_amd.h
new file mode 100644
index 00000000..db74355c
--- /dev/null
+++ b/arch/x86/machine/pmu_amd.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Remy Noel.
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * PMU driver for AMD processors.
+ */
+
+#ifndef X86_PMU_AMD_H
+#define X86_PMU_AMD_H
+
+#include <kern/init.h>
+
+/*
+ * This init operation provides :
+ *  - module fully initialized
+ */
+INIT_OP_DECLARE(pmu_amd_setup);
+
+#endif /* X86_PMU_AMD_H */
diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c
new file mode 100644
index 00000000..f2a26499
--- /dev/null
+++ b/arch/x86/machine/pmu_intel.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <kern/clock.h>
+#include <kern/init.h>
+#include <kern/log.h>
+#include <kern/perfmon.h>
+#include <kern/percpu.h>
+#include <machine/cpu.h>
+#include <machine/pmu_intel.h>
+
+/*
+ * Intel raw event IDs.
+ */
+#define PMU_INTEL_RE_CYCLE          0
+#define PMU_INTEL_RE_REF_CYCLE      1
+#define PMU_INTEL_RE_INSTRUCTION    2
+#define PMU_INTEL_RE_CACHE_REF      3
+#define PMU_INTEL_RE_CACHE_MISS     4
+#define PMU_INTEL_RE_BRANCH         5
+#define PMU_INTEL_RE_BRANCH_MISS    6
+
+/*
+ * PMU MSR addresses
+ */
+#define PMU_INTEL_MSR_PMC0      0x0c1
+#define PMU_INTEL_MSR_EVTSEL0   0x186
+
+/*
+ * V2 MSR addresses
+ */
+#define PMU_INTEL_MSR_GLOBAL_STATUS     0x038e
+#define PMU_INTEL_MSR_GLOBAL_CTRL       0x038f
+#define PMU_INTEL_MSR_GLOBAL_OVF_CTRL   0x0390
+
+/*
+ * Event Select Register addresses
+ */
+#define PMU_INTEL_EVTSEL_USR    0x00010000
+#define PMU_INTEL_EVTSEL_OS     0x00020000
+#define PMU_INTEL_EVTSEL_INT    0x00100000
+#define PMU_INTEL_EVTSEL_EN     0x00400000
+
+#define PMU_INTEL_ID_VERSION_MASK       0x000000ff
+#define PMU_INTEL_ID_NR_PMCS_MASK       0x0000ff00
+#define PMU_INTEL_ID_NR_PMCS_OFFSET     8
+#define PMU_INTEL_ID_PMC_WIDTH_MASK     0x00ff0000
+#define PMU_INTEL_ID_PMC_WIDTH_OFFSET   16
+#define PMU_INTEL_ID_EVLEN_MASK         0xff000000
+#define PMU_INTEL_ID_EVLEN_OFFSET       24
+#define PMU_INTEL_ID_EVLEN_MAX          7
+
+#define PMU_INTEL_MAX_NR_PMCS 8
+
+/*
+ * Global PMU properties.
+ *
+ * The bitmap is used to implement counter allocation, where each bit denotes
+ * whether a counter is available or not.
+ */
+struct pmu_intel {
+    unsigned int version;
+    unsigned int nr_pmcs;
+    unsigned int pmc_bm;
+    unsigned int pmc_indexes[PMU_INTEL_MAX_NR_PMCS];
+    unsigned int pmc_width;
+    unsigned int events;
+};
+
+static struct pmu_intel pmu_intel;
+
+/*
+ * Intel hardware events.
+ */
+#define PMU_INTEL_EVENT_CYCLE          0x01
+#define PMU_INTEL_EVENT_INSTRUCTION    0x02
+#define PMU_INTEL_EVENT_REF_CYCLE      0x04
+#define PMU_INTEL_EVENT_CACHE_REF      0x08
+#define PMU_INTEL_EVENT_CACHE_MISS     0x10
+#define PMU_INTEL_EVENT_BRANCH         0x20
+#define PMU_INTEL_EVENT_BRANCH_MISS    0x40
+
+struct pmu_intel_event_code {
+    unsigned int hw_event_id;
+    unsigned short event_select;
+    unsigned short umask;
+};
+
+static const unsigned int pmu_intel_raw_events[] = {
+    [PERFMON_EV_CYCLE]          = PMU_INTEL_RE_CYCLE,
+    [PERFMON_EV_REF_CYCLE]      = PMU_INTEL_RE_REF_CYCLE,
+    [PERFMON_EV_INSTRUCTION]    = PMU_INTEL_RE_INSTRUCTION,
+    [PERFMON_EV_CACHE_REF]      = PMU_INTEL_RE_CACHE_REF,
+    [PERFMON_EV_CACHE_MISS]     = PMU_INTEL_RE_CACHE_MISS,
+    [PERFMON_EV_BRANCH]         = PMU_INTEL_RE_BRANCH,
+    [PERFMON_EV_BRANCH_MISS]    = PMU_INTEL_RE_BRANCH_MISS,
+};
+
+static const struct pmu_intel_event_code pmu_intel_event_codes[] = {
+    [PMU_INTEL_RE_CYCLE]        = { PMU_INTEL_EVENT_CYCLE,        0x3c, 0x00 },
+    [PMU_INTEL_RE_REF_CYCLE]    = { PMU_INTEL_EVENT_REF_CYCLE,    0x3c, 0x01 },
+    [PMU_INTEL_RE_INSTRUCTION]  = { PMU_INTEL_EVENT_INSTRUCTION,  0xc0, 0x00 },
+    [PMU_INTEL_RE_CACHE_REF]    = { PMU_INTEL_EVENT_CACHE_REF,    0x2e, 0x4f },
+    [PMU_INTEL_RE_CACHE_MISS]   = { PMU_INTEL_EVENT_CACHE_MISS,   0x2e, 0x41 },
+    [PMU_INTEL_RE_BRANCH]       = { PMU_INTEL_EVENT_BRANCH,       0xc4, 0x00 },
+    [PMU_INTEL_RE_BRANCH_MISS]  = { PMU_INTEL_EVENT_BRANCH_MISS,  0xc5, 0x00 },
+};
+
+static struct pmu_intel *
+pmu_intel_get(void)
+{
+    return &pmu_intel;
+}
+
+static uint64_t
+pmu_intel_get_status(void)
+{
+    return cpu_get_msr64(PMU_INTEL_MSR_GLOBAL_STATUS);
+}
+
+static void
+pmu_intel_ack_status(uint64_t status)
+{
+    return cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_OVF_CTRL, status);
+}
+
+/*
+ * TODO Use the compiler built-in once libgcc is linked again.
+ */
+static unsigned int
+pmu_popcount(unsigned int bits)
+{
+    unsigned int count;
+
+    count = 0;
+
+    while (bits) {
+        if (bits & 1) {
+            count++;
+        }
+
+        bits >>= 1;
+    }
+
+    return count;
+}
+
+static int
+pmu_intel_translate(unsigned int *raw_event_idp, unsigned event_id)
+{
+    if (event_id >= ARRAY_SIZE(pmu_intel_raw_events)) {
+        return EINVAL;
+    }
+
+    *raw_event_idp = pmu_intel_raw_events[event_id];
+    return 0;
+}
+
+static int
+pmu_intel_alloc(unsigned int *pmc_idp, unsigned int pmc_index,
+                unsigned int raw_event_id)
+{
+    struct pmu_intel *pmu;
+    unsigned int pmc_id;
+    unsigned int hw_event_id;
+
+    assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes));
+
+    pmu = pmu_intel_get();
+    hw_event_id = pmu_intel_event_codes[raw_event_id].hw_event_id;
+
+    if (!(pmu->events & hw_event_id)) {
+        return EINVAL;
+    }
+
+    if (pmu->pmc_bm == 0) {
+        return EAGAIN;
+    }
+
+    pmc_id = __builtin_ffs(pmu->pmc_bm) - 1;
+    assert(pmc_id < ARRAY_SIZE(pmu->pmc_indexes));
+    pmu->pmc_indexes[pmc_id] = pmc_index;
+    pmu->pmc_bm &= ~(1U << pmc_id);
+    *pmc_idp = pmc_id;
+    return 0;
+}
+
+static void
+pmu_intel_free(unsigned int pmc_id)
+{
+    struct pmu_intel *pmu;
+    unsigned int mask;
+
+    pmu = pmu_intel_get();
+    mask = (1U << pmc_id);
+    assert(!(pmu->pmc_bm & mask));
+    pmu->pmc_bm |= mask;
+}
+
+static void
+pmu_intel_start(unsigned int pmc_id, unsigned int raw_event_id)
+{
+    const struct pmu_intel_event_code *code;
+    struct pmu_intel *pmu;
+    uint32_t evtsel;
+
+    assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes));
+
+    code = &pmu_intel_event_codes[raw_event_id];
+    pmu = pmu_intel_get();
+
+    /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */
+    evtsel = PMU_INTEL_EVTSEL_EN
+             | PMU_INTEL_EVTSEL_OS
+             | PMU_INTEL_EVTSEL_USR
+             | (code->umask << 8)
+             | code->event_select;
+
+    if (pmu->version >= 2) {
+        evtsel |= PMU_INTEL_EVTSEL_INT;
+    }
+
+    cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, evtsel);
+}
+
+static void
+pmu_intel_stop(unsigned int pmc_id)
+{
+    cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, 0);
+}
+
+static uint64_t
+pmu_intel_read(unsigned int pmc_id)
+{
+    return cpu_get_msr64(PMU_INTEL_MSR_PMC0 + pmc_id);
+}
+
+static int
+pmu_intel_consume_bits(uint64_t *bits)
+{
+    int bit;
+
+    bit = __builtin_ffsll(*bits) - 1;
+
+    if (bit < 0) {
+        return bit;
+    }
+
+    *bits &= ~(1U << bit);
+    return bit;
+}
+
+static void
+pmu_intel_handle_overflow_intr(void)
+{
+    struct pmu_intel *pmu;
+    unsigned int pmc_index;
+    uint64_t status;
+    int pmc_id;
+
+    status = pmu_intel_get_status();
+
+    if (status == 0) {
+        return;
+    }
+
+    pmu_intel_ack_status(status);
+    pmu = pmu_intel_get();
+
+    status &= ((1ULL << pmu->pmc_width) - 1);
+
+    for (;;) {
+        pmc_id = pmu_intel_consume_bits(&status);
+
+        if (pmc_id < 0) {
+            break;
+        }
+
+        pmc_index = pmu->pmc_indexes[pmc_id];
+        perfmon_report_overflow(pmc_index);
+    }
+}
+
+static struct perfmon_dev_ops pmu_intel_ops __read_mostly = {
+    .translate  = pmu_intel_translate,
+    .alloc      = pmu_intel_alloc,
+    .free       = pmu_intel_free,
+    .start      = pmu_intel_start,
+    .stop       = pmu_intel_stop,
+    .read       = pmu_intel_read,
+};
+
+static struct perfmon_dev pmu_intel_dev __read_mostly;
+
+static void
+pmu_intel_percpu_init(void)
+{
+    const struct pmu_intel *pmu;
+    uint64_t pmc_mask;
+
+    pmu = pmu_intel_get();
+
+    pmc_mask = (1U << pmu->nr_pmcs) - 1;
+    cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_CTRL, 0x700000000 | pmc_mask);
+}
+
+static struct percpu_op pmu_intel_percpu_op = \
+    PERCPU_OP_INITIALIZER(pmu_intel_percpu_init);
+
+static int __init
+pmu_intel_setup(void)
+{
+    unsigned int eax, ebx, ecx, edx, ev_len;
+    const struct cpu *cpu;
+    struct pmu_intel *pmu;
+
+    cpu = cpu_current();
+    eax = 0xa;
+
+    if (cpu->vendor_id != CPU_VENDOR_INTEL) {
+        return 0;
+    }
+
+    if (cpu->cpuid_max_basic < eax) {
+        return ENODEV;
+    }
+
+    pmu = pmu_intel_get();
+    cpu_cpuid(&eax, &ebx, &ecx, &edx);
+    pmu->version = eax & PMU_INTEL_ID_VERSION_MASK;
+
+    if (pmu->version == 0) {
+        return ENODEV;
+    }
+
+    pmu->nr_pmcs = (eax & PMU_INTEL_ID_NR_PMCS_MASK)
+                   >> PMU_INTEL_ID_NR_PMCS_OFFSET;
+
+    if (pmu->nr_pmcs > ARRAY_SIZE(pmu->pmc_indexes)) {
+        log_err("pmu: invalid number of PMCs (%u)", pmu->nr_pmcs);
+        return ENODEV;
+    }
+
+    pmu->pmc_bm = (1U << pmu->nr_pmcs ) - 1;
+    pmu->pmc_width = (eax & PMU_INTEL_ID_PMC_WIDTH_MASK)
+                     >> PMU_INTEL_ID_PMC_WIDTH_OFFSET;
+    ev_len = (eax & PMU_INTEL_ID_EVLEN_MASK) >> PMU_INTEL_ID_EVLEN_OFFSET;
+
+    assert(ev_len <= PMU_INTEL_ID_EVLEN_MAX);
+
+    pmu->events = ~ebx & ((1U << ev_len) - 1);
+
+    pmu_intel_dev.ops = &pmu_intel_ops;
+    pmu_intel_dev.pmc_width = pmu->pmc_width;
+
+    if (pmu->version >= 2) {
+        percpu_register_op(&pmu_intel_percpu_op);
+        pmu_intel_ops.handle_overflow_intr = pmu_intel_handle_overflow_intr;
+    }
+
+    perfmon_register(&pmu_intel_dev);
+    log_info("pmu: intel v%d, nr_pmcs:%u pmc_width:%u events:%#x nr_events:%u",
+             pmu->version, pmu->nr_pmcs, pmu->pmc_width, pmu->events,
+             pmu_popcount(pmu->events));
+    return 0;
+}
+
+INIT_OP_DEFINE(pmu_intel_setup,
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(log_setup, true),
+               INIT_OP_DEP(percpu_setup, true),
+               INIT_OP_DEP(perfmon_bootstrap, true));
diff --git a/arch/x86/machine/pmu_intel.h b/arch/x86/machine/pmu_intel.h
new file mode 100644
index 00000000..400017c6
--- /dev/null
+++ b/arch/x86/machine/pmu_intel.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2018 Remy Noel.
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * PMU driver for Intel processors.
+ */
+
+#ifndef X86_PMU_INTEL_H
+#define X86_PMU_INTEL_H
+
+#include <kern/init.h>
+
+/*
+ * This init operation provides :
+ *  - module fully initialized
+ */
+INIT_OP_DECLARE(pmu_intel_setup);
+
+#endif /* X86_PMU_INTEL_H */
diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c
index 534b3f6f..90c8bf66 100644
--- a/arch/x86/machine/trap.c
+++ b/arch/x86/machine/trap.c
@@ -210,6 +210,9 @@ trap_setup(void)
     trap_install(TRAP_XCALL, TRAP_HF_INTR, cpu_xcall_intr);
     trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_INTR, cpu_thread_schedule_intr);
     trap_install(TRAP_CPU_HALT, TRAP_HF_INTR, cpu_halt_intr);
+#ifdef CONFIG_PERFMON
+    trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, lapic_pmc_overflow_intr);
+#endif
     trap_install(TRAP_LAPIC_TIMER, TRAP_HF_INTR, lapic_timer_intr);
     trap_install(TRAP_LAPIC_ERROR, TRAP_HF_INTR, lapic_error_intr);
     trap_install(TRAP_LAPIC_SPURIOUS, TRAP_HF_INTR, lapic_spurious_intr);
diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h
index af6fd6b5..c5bdc1f2 100644
--- a/arch/x86/machine/trap.h
+++ b/arch/x86/machine/trap.h
@@ -62,6 +62,7 @@
 #define TRAP_XCALL              238
 #define TRAP_THREAD_SCHEDULE    239
 #define TRAP_CPU_HALT           240
+#define TRAP_LAPIC_PMC_OF       252
 #define TRAP_LAPIC_TIMER        253
 #define TRAP_LAPIC_ERROR        254
 #define TRAP_LAPIC_SPURIOUS     255
diff --git a/doc/intro.9.txt b/doc/intro.9.txt
index 281db50e..6fcd9618 100644
--- a/doc/intro.9.txt
+++ b/doc/intro.9.txt
@@ -153,6 +153,8 @@ module:kern/list::
   Doubly-linked list.
 module:kern/macros::
   Useful generic macros.
+module:kern/perfmon::
+  Performance monitoring.
 module:kern/rbtree::
   Red-black tree.
 module:kern/rdxtree::
diff --git a/kern/Kconfig b/kern/Kconfig
index 7dd04a6a..ea61937f 100644
--- a/kern/Kconfig
+++ b/kern/Kconfig
@@ -94,6 +94,19 @@ config THREAD_STACK_GUARD
 
 	  If unsure, disable.
 
+config PERFMON
+	def_bool n
+
+config PERFMON_MAX_PMCS
+	int "Number of performance monitoring counters"
+	default 8
+	depends on PERFMON
+	---help---
+	  Number of performance monitoring counters.
+
+	  This value affects the minimum duration of some critical sections
+	  that run with interrupts disabled.
+
 endmenu
 
 menu "Debugging"
diff --git a/kern/Makefile b/kern/Makefile
index ab7d6b59..5b04fcb3 100644
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c
 
 x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c
 x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c
+
+x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c
diff --git a/kern/percpu.c b/kern/percpu.c
index 53861a30..f344bd70 100644
--- a/kern/percpu.c
+++ b/kern/percpu.c
@@ -26,6 +26,7 @@
 #include <kern/macros.h>
 #include <kern/panic.h>
 #include <kern/percpu.h>
+#include <kern/slist.h>
 #include <machine/cpu.h>
 #include <vm/vm_kmem.h>
 #include <vm/vm_page.h>
@@ -36,6 +37,14 @@ static void *percpu_area_content __initdata;
 static size_t percpu_area_size __initdata;
 static int percpu_skip_warning __initdata;
 
+static struct slist percpu_ops __initdata;
+
+static void __init
+percpu_op_run(const struct percpu_op *op)
+{
+    op->fn();
+}
+
 static int __init
 percpu_bootstrap(void)
 {
@@ -51,6 +60,8 @@ percpu_setup(void)
     struct vm_page *page;
     unsigned int order;
 
+    slist_init(&percpu_ops);
+
     percpu_area_size = &_percpu_end - &_percpu;
     log_info("percpu: max_cpus: %u, section size: %zuk", CONFIG_MAX_CPUS,
              percpu_area_size >> 10);
@@ -76,6 +87,15 @@ INIT_OP_DEFINE(percpu_setup,
                INIT_OP_DEP(percpu_bootstrap, true),
                INIT_OP_DEP(vm_page_setup, true));
 
+void __init
+percpu_register_op(struct percpu_op *op)
+{
+    slist_insert_tail(&percpu_ops, &op->node);
+
+    /* Run on BSP */
+    percpu_op_run(op);
+}
+
 int __init
 percpu_add(unsigned int cpu)
 {
@@ -116,6 +136,16 @@ out:
     return 0;
 }
 
+void __init
+percpu_ap_setup(void)
+{
+    struct percpu_op *op;
+
+    slist_for_each_entry(&percpu_ops, op, node) {
+        percpu_op_run(op);
+    }
+}
+
 static int __init
 percpu_cleanup(void)
 {
diff --git a/kern/percpu.h b/kern/percpu.h
index 96f706ea..f77e7fd8 100644
--- a/kern/percpu.h
+++ b/kern/percpu.h
@@ -59,10 +59,26 @@
 
 #include <kern/init.h>
 #include <kern/macros.h>
+#include <kern/slist_types.h>
 
 #define PERCPU_SECTION .percpu
 #define __percpu __section(QUOTE(PERCPU_SECTION))
 
+typedef void (*percpu_op_fn_t)(void);
+
+/*
+ * Per-CPU operation.
+ *
+ * These operations allow initialization code to register functions to be run
+ * on APs when they're started.
+ */
+struct percpu_op {
+    struct slist_node node;
+    percpu_op_fn_t fn;
+};
+
+#define PERCPU_OP_INITIALIZER(op_fn) { .fn = op_fn }
+
 /*
  * Boundaries of the percpu section.
  *
@@ -96,6 +112,15 @@ percpu_area(unsigned int cpu)
 }
 
 /*
+ * Register a percpu operation to be run on all processors when
+ * they're started.
+ *
+ * The operation is run on the BSP when it's registered. It's run as late as
+ * possible on APs, normally right before scheduling is enabled.
+ */
+void percpu_register_op(struct percpu_op *op);
+
+/*
  * Register a processor.
  *
  * This function creates a percpu area from kernel virtual memory for the
@@ -105,6 +130,11 @@ percpu_area(unsigned int cpu)
 int percpu_add(unsigned int cpu);
 
 /*
+ * Run registered percpu operations on an AP.
+ */
+void percpu_ap_setup(void);
+
+/*
  * This init operation provides :
  *  - access to percpu variables on processor 0
  */
@@ -112,6 +142,7 @@ INIT_OP_DECLARE(percpu_bootstrap);
 
 /*
  * This init operation provides :
+ *  - percpu operations can be registered
  *  - new percpu areas can be created
  *
  * The dependency that provides access to percpu variables on all processors
diff --git a/kern/perfmon.c b/kern/perfmon.c
new file mode 100644
index 00000000..6fd319e8
--- /dev/null
+++ b/kern/perfmon.c
@@ -0,0 +1,1443 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Locking order :
+ *
+ *             thread_runq -+
+ *                          |
+ *   event -+-> interrupts -+-> td
+ *          |
+ *          +-> pmu
+ *
+ * TODO Kernel/user mode seggregation.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <kern/clock.h>
+#include <kern/init.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/macros.h>
+#include <kern/percpu.h>
+#include <kern/perfmon.h>
+#include <kern/perfmon_types.h>
+#include <kern/spinlock.h>
+#include <kern/syscnt.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/xcall.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+
+/*
+ * Minimum hardware counter poll interval, in milliseconds.
+ *
+ * The main purpose of polling hardware counters is to detect overflows
+ * when the driver is unable to reliably use overflow interrupts.
+ */
+#define PERFMON_MIN_POLL_INTERVAL 50
+
+/*
+ * Internal event flags.
+ */
+#define PERFMON_EF_TYPE_CPU         0x100
+#define PERFMON_EF_ATTACHED         0x200
+#define PERFMON_EF_PUBLIC_MASK      (PERFMON_EF_KERN \
+                                     | PERFMON_EF_USER \
+                                     | PERFMON_EF_RAW)
+
+/*
+ * Per-CPU performance monitoring counter.
+ *
+ * When an event is attached to a processor, the matching per-CPU PMC get
+ * referenced. When a per-CPU PMC is referenced, its underlying hardware
+ * counter is active.
+ *
+ * Interrupts and preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmc {
+    unsigned int nr_refs;
+    unsigned int pmc_id;
+    unsigned int raw_event_id;
+    uint64_t raw_value;
+    uint64_t value;
+};
+
+/*
+ * Per-CPU performance monitoring unit.
+ *
+ * Per-CPU PMCs are indexed the same way as global PMCs.
+ *
+ * Interrupts and preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmu {
+    struct perfmon_dev *dev;
+    unsigned int cpu;
+    struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS];
+    struct timer poll_timer;
+    struct syscnt sc_nr_overflows;
+};
+
+/*
+ * Performance monitoring counter.
+ *
+ * When a PMC is used, it maps a raw event to a hardware counter.
+ * A PMC is used if and only if its reference counter isn't zero.
+ */
+struct perfmon_pmc {
+    unsigned int nr_refs;
+    unsigned int pmc_id;
+    unsigned int raw_event_id;
+};
+
+/*
+ * Performance monitoring unit.
+ *
+ * There is a single system-wide logical PMU, used to globally allocate
+ * PMCs. Reserving a counter across the entire system ensures thread
+ * migration isn't hindered by performance monitoring.
+ *
+ * Locking the global PMU is only required when allocating or releasing
+ * a PMC. Once allocated, the PMC may safely be accessed without hodling
+ * the lock.
+ */
+struct perfmon_pmu {
+    struct perfmon_dev *dev;
+    struct spinlock lock;
+    struct perfmon_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+static struct perfmon_pmu perfmon_pmu;
+static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu;
+
+static struct perfmon_pmu *
+perfmon_get_pmu(void)
+{
+    return &perfmon_pmu;
+}
+
+static struct perfmon_cpu_pmu *
+perfmon_get_local_cpu_pmu(void)
+{
+    assert(!thread_preempt_enabled());
+    return cpu_local_ptr(perfmon_cpu_pmu);
+}
+
+static struct perfmon_cpu_pmu *
+perfmon_get_cpu_pmu(unsigned int cpu)
+{
+    return percpu_ptr(perfmon_cpu_pmu, cpu);
+}
+
+static void __init
+perfmon_pmc_init(struct perfmon_pmc *pmc)
+{
+    pmc->nr_refs = 0;
+}
+
+static bool
+perfmon_pmc_used(const struct perfmon_pmc *pmc)
+{
+    return pmc->nr_refs != 0;
+}
+
+static unsigned int
+perfmon_pmc_id(const struct perfmon_pmc *pmc)
+{
+    return pmc->pmc_id;
+}
+
+static unsigned int
+perfmon_pmc_raw_event_id(const struct perfmon_pmc *pmc)
+{
+    return pmc->raw_event_id;
+}
+
+static void
+perfmon_pmc_use(struct perfmon_pmc *pmc, unsigned int pmc_id,
+                unsigned int raw_event_id)
+{
+    assert(!perfmon_pmc_used(pmc));
+
+    pmc->nr_refs = 1;
+    pmc->pmc_id = pmc_id;
+    pmc->raw_event_id = raw_event_id;
+}
+
+static void
+perfmon_pmc_ref(struct perfmon_pmc *pmc)
+{
+    assert(perfmon_pmc_used(pmc));
+    pmc->nr_refs++;
+}
+
+static void
+perfmon_pmc_unref(struct perfmon_pmc *pmc)
+{
+    assert(perfmon_pmc_used(pmc));
+    pmc->nr_refs--;
+}
+
+static unsigned int
+perfmon_pmu_get_pmc_index(const struct perfmon_pmu *pmu,
+                          const struct perfmon_pmc *pmc)
+{
+    size_t pmc_index;
+
+    pmc_index = pmc - pmu->pmcs;
+    assert(pmc_index < ARRAY_SIZE(pmu->pmcs));
+    return pmc_index;
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_get_pmc(struct perfmon_pmu *pmu, unsigned int index)
+{
+    assert(index < ARRAY_SIZE(pmu->pmcs));
+    return &pmu->pmcs[index];
+}
+
+static void __init
+perfmon_pmu_init(struct perfmon_pmu *pmu)
+{
+    pmu->dev = NULL;
+    spinlock_init(&pmu->lock);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+        perfmon_pmc_init(perfmon_pmu_get_pmc(pmu, i));
+    }
+}
+
+static void __init
+perfmon_pmu_set_dev(struct perfmon_pmu *pmu, struct perfmon_dev *dev)
+{
+    assert(dev);
+    assert(!pmu->dev);
+    pmu->dev = dev;
+}
+
+static struct perfmon_dev *
+perfmon_pmu_get_dev(const struct perfmon_pmu *pmu)
+{
+    return pmu->dev;
+}
+
+static void
+perfmon_pmu_handle_overflow_intr(const struct perfmon_pmu *pmu)
+{
+    pmu->dev->ops->handle_overflow_intr();
+}
+
+static int
+perfmon_pmu_translate(const struct perfmon_pmu *pmu,
+                      unsigned int *raw_event_id,
+                      unsigned int event_id)
+{
+    if (!pmu->dev) {
+        return ENODEV;
+    }
+
+    return pmu->dev->ops->translate(raw_event_id, event_id);
+}
+
+static int
+perfmon_pmu_alloc_pmc_id(const struct perfmon_pmu *pmu,
+                         unsigned int *pmc_idp,
+                         unsigned int pmc_index,
+                         unsigned int raw_event_id)
+{
+    unsigned int pmc_id;
+    int error;
+
+    if (!pmu->dev) {
+        return ENODEV;
+    }
+
+    error = pmu->dev->ops->alloc(&pmc_id, pmc_index, raw_event_id);
+
+    if (error) {
+        return error;
+    }
+
+    *pmc_idp = pmc_id;
+    return 0;
+}
+
+static void
+perfmon_pmu_free_pmc_id(const struct perfmon_pmu *pmu, unsigned int pmc_id)
+{
+    assert(pmu->dev);
+    pmu->dev->ops->free(pmc_id);
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_find_unused_pmc(struct perfmon_pmu *pmu)
+{
+    struct perfmon_pmc *pmc;
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+        pmc = perfmon_pmu_get_pmc(pmu, i);
+
+        if (!perfmon_pmc_used(pmc)) {
+            return pmc;
+        }
+    }
+
+    return NULL;
+}
+
+static int
+perfmon_pmu_alloc_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp,
+                      unsigned int raw_event_id)
+{
+    unsigned int pmc_id = 0, pmc_index;
+    struct perfmon_pmc *pmc;
+    int error;
+
+    pmc = perfmon_pmu_find_unused_pmc(pmu);
+
+    if (!pmc) {
+        return EAGAIN;
+    }
+
+    pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc);
+    error = perfmon_pmu_alloc_pmc_id(pmu, &pmc_id, pmc_index, raw_event_id);
+
+    if (error) {
+        return error;
+    }
+
+    perfmon_pmc_use(pmc, pmc_id, raw_event_id);
+    *pmcp = pmc;
+    return 0;
+}
+
+static void
+perfmon_pmu_free_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc)
+{
+    unsigned int pmc_id;
+
+    assert(!perfmon_pmc_used(pmc));
+    pmc_id = perfmon_pmc_id(pmc);
+    perfmon_pmu_free_pmc_id(pmu, pmc_id);
+}
+
+static struct perfmon_pmc *
+perfmon_pmu_get_pmc_by_raw_event_id(struct perfmon_pmu *pmu,
+                                    unsigned int raw_event_id)
+{
+    struct perfmon_pmc *pmc;
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(pmu->pmcs); i++) {
+        pmc = perfmon_pmu_get_pmc(pmu, i);
+
+        if (!perfmon_pmc_used(pmc)) {
+            continue;
+        }
+
+        if (perfmon_pmc_raw_event_id(pmc) == raw_event_id) {
+            return pmc;
+        }
+    }
+
+    return NULL;
+}
+
+static int
+perfmon_pmu_take_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc **pmcp,
+                     unsigned int raw_event_id)
+{
+    struct perfmon_pmc *pmc;
+    int error;
+
+    spinlock_lock(&pmu->lock);
+
+    pmc = perfmon_pmu_get_pmc_by_raw_event_id(pmu, raw_event_id);
+
+    if (pmc) {
+        perfmon_pmc_ref(pmc);
+        error = 0;
+    } else {
+        error = perfmon_pmu_alloc_pmc(pmu, &pmc, raw_event_id);
+
+        if (error) {
+            pmc = NULL;
+        }
+    }
+
+    spinlock_unlock(&pmu->lock);
+
+    if (error) {
+        return error;
+    }
+
+    *pmcp = pmc;
+    return 0;
+}
+
+static void
+perfmon_pmu_put_pmc(struct perfmon_pmu *pmu, struct perfmon_pmc *pmc)
+{
+    spinlock_lock(&pmu->lock);
+
+    perfmon_pmc_unref(pmc);
+
+    if (!perfmon_pmc_used(pmc)) {
+        perfmon_pmu_free_pmc(pmu, pmc);
+    }
+
+    spinlock_unlock(&pmu->lock);
+}
+
+static int
+perfmon_check_event_args(unsigned int id, unsigned int flags)
+{
+    if (!((flags & PERFMON_EF_PUBLIC_MASK) == flags)
+        || !((flags & PERFMON_EF_RAW) || (id < PERFMON_NR_GENERIC_EVENTS))
+        || !((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)))) {
+        return EINVAL;
+    }
+
+    return 0;
+}
+
+int
+perfmon_event_init(struct perfmon_event *event, unsigned int id,
+                   unsigned int flags)
+{
+    int error;
+
+    error = perfmon_check_event_args(id, flags);
+
+    if (error) {
+        return error;
+    }
+
+    spinlock_init(&event->lock);
+    event->flags = flags;
+    event->id = id;
+    event->value = 0;
+    return 0;
+}
+
+static bool
+perfmon_event_type_cpu(const struct perfmon_event *event)
+{
+    return event->flags & PERFMON_EF_TYPE_CPU;
+}
+
+static void
+perfmon_event_set_type_cpu(struct perfmon_event *event)
+{
+    event->flags |= PERFMON_EF_TYPE_CPU;
+}
+
+static void
+perfmon_event_clear_type_cpu(struct perfmon_event *event)
+{
+    event->flags &= ~PERFMON_EF_TYPE_CPU;
+}
+
+static bool
+perfmon_event_attached(const struct perfmon_event *event)
+{
+    return event->flags & PERFMON_EF_ATTACHED;
+}
+
+static unsigned int
+perfmon_event_pmc_index(const struct perfmon_event *event)
+{
+    assert(perfmon_event_attached(event));
+    return event->pmc_index;
+}
+
+static void __init
+perfmon_cpu_pmc_init(struct perfmon_cpu_pmc *cpu_pmc)
+{
+    cpu_pmc->nr_refs = 0;
+}
+
+static bool
+perfmon_cpu_pmc_used(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+    return cpu_pmc->nr_refs != 0;
+}
+
+static void
+perfmon_cpu_pmc_use(struct perfmon_cpu_pmc *cpu_pmc, unsigned int pmc_id,
+                    unsigned int raw_event_id, uint64_t raw_value)
+{
+    assert(!perfmon_cpu_pmc_used(cpu_pmc));
+
+    cpu_pmc->nr_refs = 1;
+    cpu_pmc->pmc_id = pmc_id;
+    cpu_pmc->raw_event_id = raw_event_id;
+    cpu_pmc->raw_value = raw_value;
+    cpu_pmc->value = 0;
+}
+
+static void
+perfmon_cpu_pmc_ref(struct perfmon_cpu_pmc *cpu_pmc)
+{
+    assert(perfmon_cpu_pmc_used(cpu_pmc));
+    cpu_pmc->nr_refs++;
+}
+
+static void
+perfmon_cpu_pmc_unref(struct perfmon_cpu_pmc *cpu_pmc)
+{
+    assert(perfmon_cpu_pmc_used(cpu_pmc));
+    cpu_pmc->nr_refs--;
+}
+
+static unsigned int
+perfmon_cpu_pmc_id(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+    return cpu_pmc->pmc_id;
+}
+
+static bool
+perfmon_cpu_pmc_update(struct perfmon_cpu_pmc *cpu_pmc, uint64_t raw_value,
+                       unsigned int pmc_width)
+{
+    bool overflowed;
+    uint64_t delta;
+
+    delta = raw_value - cpu_pmc->raw_value;
+
+    if (pmc_width == 64) {
+        overflowed = false;
+    } else {
+        if (raw_value >= cpu_pmc->raw_value) {
+            overflowed = false;
+        } else {
+            overflowed = true;
+            delta += (uint64_t)1 << pmc_width;
+        }
+    }
+
+    cpu_pmc->value += delta;
+    cpu_pmc->raw_value = raw_value;
+    return overflowed;
+}
+
+static uint64_t
+perfmon_cpu_pmc_get_value(const struct perfmon_cpu_pmc *cpu_pmc)
+{
+    return cpu_pmc->value;
+}
+
+static struct perfmon_cpu_pmc *
+perfmon_cpu_pmu_get_pmc(struct perfmon_cpu_pmu *cpu_pmu, unsigned int index)
+{
+    assert(index < ARRAY_SIZE(cpu_pmu->pmcs));
+    return &cpu_pmu->pmcs[index];
+}
+
+static void
+perfmon_cpu_pmu_start(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id,
+                      unsigned int raw_event_id)
+{
+    cpu_pmu->dev->ops->start(pmc_id, raw_event_id);
+}
+
+static void
+perfmon_cpu_pmu_stop(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id)
+{
+    cpu_pmu->dev->ops->stop(pmc_id);
+}
+
+static uint64_t
+perfmon_cpu_pmu_read(const struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_id)
+{
+    return cpu_pmu->dev->ops->read(pmc_id);
+}
+
+static void
+perfmon_cpu_pmu_use_pmc(struct perfmon_cpu_pmu *cpu_pmu,
+                        struct perfmon_cpu_pmc *cpu_pmc,
+                        unsigned int pmc_id,
+                        unsigned int raw_event_id)
+{
+    uint64_t raw_value;
+
+    perfmon_cpu_pmu_start(cpu_pmu, pmc_id, raw_event_id);
+    raw_value = perfmon_cpu_pmu_read(cpu_pmu, pmc_id);
+    perfmon_cpu_pmc_use(cpu_pmc, pmc_id, raw_event_id, raw_value);
+}
+
+static void
+perfmon_cpu_pmu_update_pmc(struct perfmon_cpu_pmu *cpu_pmu,
+                           struct perfmon_cpu_pmc *cpu_pmc)
+{
+    uint64_t raw_value;
+    bool overflowed;
+
+    raw_value = perfmon_cpu_pmu_read(cpu_pmu, perfmon_cpu_pmc_id(cpu_pmc));
+    overflowed = perfmon_cpu_pmc_update(cpu_pmc, raw_value,
+                                        cpu_pmu->dev->pmc_width);
+
+    if (overflowed) {
+        syscnt_inc(&cpu_pmu->sc_nr_overflows);
+    }
+}
+
+static void
+perfmon_cpu_pmu_check_overflow(void *arg)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    assert(!cpu_intr_enabled());
+
+    cpu_pmu = arg;
+    assert(cpu_pmu->cpu == cpu_id());
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+        cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, i);
+
+        if (!perfmon_cpu_pmc_used(cpu_pmc)) {
+            continue;
+        }
+
+        perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+    }
+}
+
+static void
+perfmon_cpu_pmu_poll(struct timer *timer)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+
+    cpu_pmu = structof(timer, struct perfmon_cpu_pmu, poll_timer);
+    xcall_call(perfmon_cpu_pmu_check_overflow, cpu_pmu, cpu_pmu->cpu);
+    timer_schedule(timer, timer_get_time(timer) + cpu_pmu->dev->poll_interval);
+}
+
+static void __init
+perfmon_cpu_pmu_init(struct perfmon_cpu_pmu *cpu_pmu, unsigned int cpu,
+                     struct perfmon_dev *dev)
+{
+    char name[SYSCNT_NAME_SIZE];
+
+    cpu_pmu->dev = dev;
+    cpu_pmu->cpu = cpu;
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+        perfmon_cpu_pmc_init(perfmon_cpu_pmu_get_pmc(cpu_pmu, i));
+    }
+
+    if (dev->ops->handle_overflow_intr == NULL) {
+        assert(dev->poll_interval != 0);
+
+        /*
+         * XXX Ideally, this would be an interrupt timer instead of a high
+         * priority one, but it can't be because the handler performs
+         * cross-calls to remote processors, which requires that interrupts
+         * be enabled. This is one potential user of CPU-bound timers.
+         */
+        timer_init(&cpu_pmu->poll_timer, perfmon_cpu_pmu_poll, TIMER_HIGH_PRIO);
+        timer_schedule(&cpu_pmu->poll_timer, dev->poll_interval);
+    }
+
+    snprintf(name, sizeof(name), "perfmon_nr_overflows/%u", cpu);
+    syscnt_register(&cpu_pmu->sc_nr_overflows, name);
+}
+
+static uint64_t
+perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index,
+                     unsigned int pmc_id, unsigned int raw_event_id)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    assert(!cpu_intr_enabled());
+
+    cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+
+    if (perfmon_cpu_pmc_used(cpu_pmc)) {
+        perfmon_cpu_pmc_ref(cpu_pmc);
+        perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+    } else {
+        perfmon_cpu_pmu_use_pmc(cpu_pmu, cpu_pmc, pmc_id, raw_event_id);
+    }
+
+    return perfmon_cpu_pmc_get_value(cpu_pmc);
+}
+
+static uint64_t
+perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+    unsigned int pmc_id;
+    uint64_t value;
+
+    assert(!cpu_intr_enabled());
+
+    cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+    pmc_id = perfmon_cpu_pmc_id(cpu_pmc);
+
+    perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+    value = perfmon_cpu_pmc_get_value(cpu_pmc);
+
+    perfmon_cpu_pmc_unref(cpu_pmc);
+
+    if (!perfmon_cpu_pmc_used(cpu_pmc)) {
+        perfmon_cpu_pmu_stop(cpu_pmu, pmc_id);
+    }
+
+    return value;
+}
+
+static uint64_t
+perfmon_cpu_pmu_sync(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    assert(!cpu_intr_enabled());
+
+    cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+    perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+    return perfmon_cpu_pmc_get_value(cpu_pmc);
+}
+
+static void
+perfmon_td_pmc_init(struct perfmon_td_pmc *td_pmc)
+{
+    td_pmc->nr_refs = 0;
+    td_pmc->loaded = false;
+    td_pmc->value = 0;
+}
+
+static bool
+perfmon_td_pmc_used(const struct perfmon_td_pmc *td_pmc)
+{
+    return td_pmc->nr_refs != 0;
+}
+
+static void
+perfmon_td_pmc_use(struct perfmon_td_pmc *td_pmc, unsigned int pmc_id,
+                   unsigned int raw_event_id)
+{
+    assert(!perfmon_td_pmc_used(td_pmc));
+
+    td_pmc->nr_refs = 1;
+    td_pmc->loaded = false;
+    td_pmc->pmc_id = pmc_id;
+    td_pmc->raw_event_id = raw_event_id;
+    td_pmc->value = 0;
+}
+
+static unsigned int
+perfmon_td_pmc_id(const struct perfmon_td_pmc *td_pmc)
+{
+    return td_pmc->pmc_id;
+}
+
+static unsigned int
+perfmon_td_pmc_raw_event_id(const struct perfmon_td_pmc *td_pmc)
+{
+    return td_pmc->raw_event_id;
+}
+
+static void
+perfmon_td_pmc_ref(struct perfmon_td_pmc *td_pmc)
+{
+    assert(perfmon_td_pmc_used(td_pmc));
+    td_pmc->nr_refs++;
+}
+
+static void
+perfmon_td_pmc_unref(struct perfmon_td_pmc *td_pmc)
+{
+    assert(perfmon_td_pmc_used(td_pmc));
+    td_pmc->nr_refs--;
+}
+
+static bool
+perfmon_td_pmc_loaded(const struct perfmon_td_pmc *td_pmc)
+{
+    return td_pmc->loaded;
+}
+
+static void
+perfmon_td_pmc_load(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+    assert(!perfmon_td_pmc_loaded(td_pmc));
+
+    td_pmc->cpu_pmc_value = cpu_pmc_value;
+    td_pmc->loaded = true;
+}
+
+static void
+perfmon_td_pmc_update(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+    uint64_t delta;
+
+    assert(perfmon_td_pmc_loaded(td_pmc));
+
+    delta = cpu_pmc_value - td_pmc->cpu_pmc_value;
+    td_pmc->cpu_pmc_value = cpu_pmc_value;
+    td_pmc->value += delta;
+}
+
+static void
+perfmon_td_pmc_unload(struct perfmon_td_pmc *td_pmc, uint64_t cpu_pmc_value)
+{
+    perfmon_td_pmc_update(td_pmc, cpu_pmc_value);
+    td_pmc->loaded = false;
+}
+
+static uint64_t
+perfmon_td_pmc_read(const struct perfmon_td_pmc *td_pmc)
+{
+    return td_pmc->value;
+}
+
+static unsigned int
+perfmon_td_get_pmc_index(const struct perfmon_td *td,
+                         const struct perfmon_td_pmc *td_pmc)
+{
+    size_t pmc_index;
+
+    pmc_index = td_pmc - td->pmcs;
+    assert(pmc_index < ARRAY_SIZE(td->pmcs));
+    return pmc_index;
+}
+
+static struct perfmon_td_pmc *
+perfmon_td_get_pmc(struct perfmon_td *td, unsigned int index)
+{
+    assert(index < ARRAY_SIZE(td->pmcs));
+    return &td->pmcs[index];
+}
+
+void
+perfmon_td_init(struct perfmon_td *td)
+{
+    spinlock_init(&td->lock);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+        perfmon_td_pmc_init(perfmon_td_get_pmc(td, i));
+    }
+}
+
+static void
+perfmon_td_load_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+    unsigned int pmc_index, pmc_id, raw_event_id;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    uint64_t cpu_pmc_value;
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+    pmc_id = perfmon_td_pmc_id(td_pmc);
+    raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc);
+    cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+                                         pmc_id, raw_event_id);
+    perfmon_td_pmc_load(td_pmc, cpu_pmc_value);
+}
+
+static void
+perfmon_td_unload_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+    cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+    perfmon_td_pmc_unload(td_pmc, cpu_pmc_value);
+}
+
+static void
+perfmon_td_update_pmc(struct perfmon_td *td, struct perfmon_td_pmc *td_pmc)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+    cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index);
+    perfmon_td_pmc_update(td_pmc, cpu_pmc_value);
+}
+
+void
+perfmon_td_load(struct perfmon_td *td)
+{
+    unsigned int pmc_index, pmc_id, raw_event_id;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_td_pmc *td_pmc;
+    uint64_t cpu_pmc_value;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+
+    spinlock_lock(&td->lock);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+        td_pmc = perfmon_td_get_pmc(td, i);
+
+        if (!perfmon_td_pmc_used(td_pmc) || perfmon_td_pmc_loaded(td_pmc)) {
+            continue;
+        }
+
+        pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+        pmc_id = perfmon_td_pmc_id(td_pmc);
+        raw_event_id = perfmon_td_pmc_raw_event_id(td_pmc);
+        cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+                                             pmc_id, raw_event_id);
+        perfmon_td_pmc_load(td_pmc, cpu_pmc_value);
+    }
+
+    spinlock_unlock(&td->lock);
+}
+
+void
+perfmon_td_unload(struct perfmon_td *td)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_td_pmc *td_pmc;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+
+    spinlock_lock(&td->lock);
+
+    for (unsigned int i = 0; i < ARRAY_SIZE(td->pmcs); i++) {
+        td_pmc = perfmon_td_get_pmc(td, i);
+
+        if (!perfmon_td_pmc_loaded(td_pmc)) {
+            continue;
+        }
+
+        pmc_index = perfmon_td_get_pmc_index(td, td_pmc);
+        cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+        perfmon_td_pmc_unload(td_pmc, cpu_pmc_value);
+    }
+
+    spinlock_unlock(&td->lock);
+}
+
+static void
+perfmon_event_load(struct perfmon_event *event, uint64_t pmc_value)
+{
+    event->pmc_value = pmc_value;
+}
+
+static void
+perfmon_event_update(struct perfmon_event *event, uint64_t pmc_value)
+{
+    uint64_t delta;
+
+    delta = pmc_value - event->pmc_value;
+    event->value += delta;
+    event->pmc_value = pmc_value;
+}
+
+static void
+perfmon_event_load_cpu_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    const struct perfmon_pmc *pmc;
+    struct perfmon_pmu *pmu;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    event = arg;
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmu = perfmon_get_pmu();
+    pmc_index = perfmon_event_pmc_index(event);
+    pmc = perfmon_pmu_get_pmc(pmu, pmc_index);
+    cpu_pmc_value = perfmon_cpu_pmu_load(cpu_pmu, pmc_index,
+                                         perfmon_pmc_id(pmc),
+                                         perfmon_pmc_raw_event_id(pmc));
+    perfmon_event_load(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_load_cpu(struct perfmon_event *event, unsigned int cpu)
+{
+    perfmon_event_set_type_cpu(event);
+    event->cpu = cpu;
+    xcall_call(perfmon_event_load_cpu_remote, event, cpu);
+}
+
+static void
+perfmon_event_load_thread_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_td_pmc *td_pmc;
+    struct perfmon_td *td;
+    unsigned int pmc_index;
+    uint64_t td_pmc_value;
+
+    event = arg;
+    pmc_index = perfmon_event_pmc_index(event);
+    td = thread_get_perfmon_td(event->thread);
+    td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+    spinlock_lock(&td->lock);
+
+    if (thread_self() == event->thread) {
+
+        if (perfmon_td_pmc_loaded(td_pmc)) {
+            perfmon_td_update_pmc(td, td_pmc);
+        } else {
+            perfmon_td_load_pmc(td, td_pmc);
+        }
+    }
+
+    td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+    spinlock_unlock(&td->lock);
+
+    perfmon_event_load(event, td_pmc_value);
+}
+
+static void
+perfmon_event_load_thread(struct perfmon_event *event, struct thread *thread)
+{
+    struct perfmon_td_pmc *td_pmc;
+    struct perfmon_td *td;
+    struct perfmon_pmu *pmu;
+    const struct perfmon_pmc *pmc;
+    unsigned int pmc_index;
+    unsigned long flags;
+
+    pmu = perfmon_get_pmu();
+
+    thread_ref(thread);
+    event->thread = thread;
+
+    pmc_index = perfmon_event_pmc_index(event);
+    pmc = perfmon_pmu_get_pmc(pmu, pmc_index);
+    td = thread_get_perfmon_td(thread);
+    td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+    spinlock_lock_intr_save(&td->lock, &flags);
+
+    if (perfmon_td_pmc_used(td_pmc)) {
+        perfmon_td_pmc_ref(td_pmc);
+    } else {
+        perfmon_td_pmc_use(td_pmc, perfmon_pmc_id(pmc),
+                           perfmon_pmc_raw_event_id(pmc));
+    }
+
+    spinlock_unlock_intr_restore(&td->lock, flags);
+
+    xcall_call(perfmon_event_load_thread_remote, event, thread_cpu(thread));
+}
+
+static void
+perfmon_event_unload_cpu_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    event = arg;
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmc_index = perfmon_event_pmc_index(event);
+    cpu_pmc_value = perfmon_cpu_pmu_unload(cpu_pmu, pmc_index);
+    perfmon_event_update(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_unload_cpu(struct perfmon_event *event)
+{
+    xcall_call(perfmon_event_unload_cpu_remote, event, event->cpu);
+    perfmon_event_clear_type_cpu(event);
+}
+
+static void
+perfmon_event_unload_thread_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_td_pmc *td_pmc;
+    struct perfmon_td *td;
+    unsigned int pmc_index;
+    uint64_t td_pmc_value;
+
+    event = arg;
+    pmc_index = perfmon_event_pmc_index(event);
+    td = thread_get_perfmon_td(event->thread);
+    td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+    spinlock_lock(&td->lock);
+
+    if ((thread_self() == event->thread) && perfmon_td_pmc_loaded(td_pmc)) {
+        if (perfmon_td_pmc_used(td_pmc)) {
+            perfmon_td_update_pmc(td, td_pmc);
+        } else {
+            perfmon_td_unload_pmc(td, td_pmc);
+        }
+    }
+
+    td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+    spinlock_unlock(&td->lock);
+
+    perfmon_event_update(event, td_pmc_value);
+}
+
+static void
+perfmon_event_unload_thread(struct perfmon_event *event)
+{
+    struct perfmon_td_pmc *td_pmc;
+    struct perfmon_td *td;
+    unsigned int pmc_index;
+    unsigned long flags;
+
+    pmc_index = perfmon_event_pmc_index(event);
+    td = thread_get_perfmon_td(event->thread);
+    td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+    spinlock_lock_intr_save(&td->lock, &flags);
+    perfmon_td_pmc_unref(td_pmc);
+    spinlock_unlock_intr_restore(&td->lock, flags);
+
+    xcall_call(perfmon_event_unload_thread_remote, event,
+               thread_cpu(event->thread));
+
+    thread_unref(event->thread);
+    event->thread = NULL;
+}
+
+static void
+perfmon_event_sync_cpu_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    unsigned int pmc_index;
+    uint64_t cpu_pmc_value;
+
+    event = arg;
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    pmc_index = perfmon_event_pmc_index(event);
+    cpu_pmc_value = perfmon_cpu_pmu_sync(cpu_pmu, pmc_index);
+    perfmon_event_update(event, cpu_pmc_value);
+}
+
+static void
+perfmon_event_sync_cpu(struct perfmon_event *event)
+{
+    xcall_call(perfmon_event_sync_cpu_remote, event, event->cpu);
+}
+
+static void
+perfmon_event_sync_thread_remote(void *arg)
+{
+    struct perfmon_event *event;
+    struct perfmon_td_pmc *td_pmc;
+    struct perfmon_td *td;
+    unsigned int pmc_index;
+    uint64_t td_pmc_value;
+
+    event = arg;
+    pmc_index = perfmon_event_pmc_index(event);
+    td = thread_get_perfmon_td(event->thread);
+    td_pmc = perfmon_td_get_pmc(td, pmc_index);
+
+    spinlock_lock(&td->lock);
+
+    if (thread_self() == event->thread) {
+        perfmon_td_update_pmc(td, td_pmc);
+    }
+
+    td_pmc_value = perfmon_td_pmc_read(td_pmc);
+
+    spinlock_unlock(&td->lock);
+
+    perfmon_event_update(event, td_pmc_value);
+}
+
+static void
+perfmon_event_sync_thread(struct perfmon_event *event)
+{
+    xcall_call(perfmon_event_sync_thread_remote, event,
+               thread_cpu(event->thread));
+}
+
+static int
+perfmon_event_attach_pmu(struct perfmon_event *event)
+{
+    unsigned int raw_event_id = 0;
+    struct perfmon_pmu *pmu;
+    struct perfmon_pmc *pmc;
+    int error;
+
+    pmu = perfmon_get_pmu();
+
+    if (!(event->flags & PERFMON_EF_RAW)) {
+        error = perfmon_pmu_translate(pmu, &raw_event_id, event->id);
+
+        if (error) {
+            return error;
+        }
+    }
+
+    error = perfmon_pmu_take_pmc(pmu, &pmc, raw_event_id);
+
+    if (error) {
+        return error;
+    }
+
+    event->pmc_index = perfmon_pmu_get_pmc_index(pmu, pmc);
+    event->flags |= PERFMON_EF_ATTACHED;
+    event->value = 0;
+    return 0;
+}
+
+static void
+perfmon_event_detach_pmu(struct perfmon_event *event)
+{
+    struct perfmon_pmu *pmu;
+    struct perfmon_pmc *pmc;
+
+    pmu = perfmon_get_pmu();
+    pmc = perfmon_pmu_get_pmc(pmu, perfmon_event_pmc_index(event));
+    perfmon_pmu_put_pmc(pmu, pmc);
+    event->flags &= ~PERFMON_EF_ATTACHED;
+}
+
+int
+perfmon_event_attach(struct perfmon_event *event, struct thread *thread)
+{
+    int error;
+
+    spinlock_lock(&event->lock);
+
+    if (perfmon_event_attached(event)) {
+        error = EINVAL;
+        goto error;
+    }
+
+    error = perfmon_event_attach_pmu(event);
+
+    if (error) {
+        goto error;
+    }
+
+    perfmon_event_load_thread(event, thread);
+
+    spinlock_unlock(&event->lock);
+
+    return 0;
+
+error:
+    spinlock_unlock(&event->lock);
+
+    return error;
+}
+
+int
+perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu)
+{
+    int error;
+
+    if (cpu >= cpu_count()) {
+        return EINVAL;
+    }
+
+    spinlock_lock(&event->lock);
+
+    if (perfmon_event_attached(event)) {
+        error = EINVAL;
+        goto out;
+    }
+
+    error = perfmon_event_attach_pmu(event);
+
+    if (error) {
+        goto out;
+    }
+
+    perfmon_event_load_cpu(event, cpu);
+    error = 0;
+
+out:
+    spinlock_unlock(&event->lock);
+
+    return error;
+}
+
+int
+perfmon_event_detach(struct perfmon_event *event)
+{
+    int error;
+
+    spinlock_lock(&event->lock);
+
+    if (!perfmon_event_attached(event)) {
+        error = EINVAL;
+        goto out;
+    }
+
+    if (perfmon_event_type_cpu(event)) {
+        perfmon_event_unload_cpu(event);
+    } else {
+        perfmon_event_unload_thread(event);
+    }
+
+    perfmon_event_detach_pmu(event);
+    error = 0;
+
+out:
+    spinlock_unlock(&event->lock);
+
+    return error;
+}
+
+uint64_t
+perfmon_event_read(struct perfmon_event *event)
+{
+    uint64_t value;
+
+    spinlock_lock(&event->lock);
+
+    if (perfmon_event_attached(event)) {
+        if (perfmon_event_type_cpu(event)) {
+            perfmon_event_sync_cpu(event);
+        } else {
+            perfmon_event_sync_thread(event);
+        }
+    }
+
+    value = event->value;
+
+    spinlock_unlock(&event->lock);
+
+    return value;
+}
+
+static uint64_t __init
+perfmon_compute_poll_interval(uint64_t pmc_width)
+{
+    uint64_t cycles, time;
+
+    if (pmc_width == 64) {
+        cycles = (uint64_t)-1;
+    } else {
+        cycles = (uint64_t)1 << pmc_width;
+    }
+
+    /*
+     * Assume an unrealistically high upper bound on the number of
+     * events per cycle to otbain a comfortable margin of safety.
+     */
+    cycles /= 100;
+    time = cycles / (cpu_get_freq() / 1000);
+
+    if (time < PERFMON_MIN_POLL_INTERVAL) {
+        log_warning("perfmon: invalid poll interval %llu, forced to %llu",
+                    (unsigned long long)time,
+                    (unsigned long long)PERFMON_MIN_POLL_INTERVAL);
+        time = PERFMON_MIN_POLL_INTERVAL;
+    }
+
+    return clock_ticks_from_ms(time);
+}
+
+void __init
+perfmon_register(struct perfmon_dev *dev)
+{
+    const struct perfmon_dev_ops *ops;
+
+    ops = dev->ops;
+    assert(ops->translate && ops->alloc && ops->free
+           && ops->start && ops->stop && ops->read);
+    assert(dev->pmc_width <= 64);
+
+    if ((dev->ops->handle_overflow_intr == NULL) && (dev->poll_interval == 0)) {
+        dev->poll_interval = perfmon_compute_poll_interval(dev->pmc_width);
+    }
+
+    perfmon_pmu_set_dev(perfmon_get_pmu(), dev);
+}
+
+void
+perfmon_overflow_intr(void)
+{
+    perfmon_pmu_handle_overflow_intr(perfmon_get_pmu());
+}
+
+void
+perfmon_report_overflow(unsigned int pmc_index)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    cpu_pmu = perfmon_get_local_cpu_pmu();
+    cpu_pmc = perfmon_cpu_pmu_get_pmc(cpu_pmu, pmc_index);
+    perfmon_cpu_pmu_update_pmc(cpu_pmu, cpu_pmc);
+}
+
+static int __init
+perfmon_bootstrap(void)
+{
+    perfmon_pmu_init(perfmon_get_pmu());
+    return 0;
+}
+
+INIT_OP_DEFINE(perfmon_bootstrap,
+               INIT_OP_DEP(log_setup, true),
+               INIT_OP_DEP(spinlock_setup, true));
+
+static int __init
+perfmon_setup(void)
+{
+    struct perfmon_dev *dev;
+
+    dev = perfmon_pmu_get_dev(perfmon_get_pmu());
+
+    if (!dev) {
+        return ENODEV;
+    }
+
+    for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) {
+        perfmon_cpu_pmu_init(perfmon_get_cpu_pmu(cpu), cpu, dev);
+    }
+
+    return 0;
+}
+
+INIT_OP_DEFINE(perfmon_setup,
+               INIT_OP_DEP(boot_setup_pmu, true),
+               INIT_OP_DEP(cpu_mp_probe, true),
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(percpu_setup, true),
+               INIT_OP_DEP(perfmon_bootstrap, true),
+               INIT_OP_DEP(spinlock_setup, true),
+               INIT_OP_DEP(syscnt_setup, true));
diff --git a/kern/perfmon.h b/kern/perfmon.h
new file mode 100644
index 00000000..0c17752c
--- /dev/null
+++ b/kern/perfmon.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Performance monitoring based on hardware performance counters.
+ *
+ * The hardware layer is represented by a performance monitoring unit (PMU),
+ * which provides performance monitoring counters (PMCs).
+ */
+
+#ifndef KERN_PERFMON_H
+#define KERN_PERFMON_H
+
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/perfmon_types.h>
+#include <kern/thread.h>
+
+/*
+ * IDs of generic performance monitoring events.
+ */
+#define PERFMON_EV_CYCLE            0
+#define PERFMON_EV_REF_CYCLE        1
+#define PERFMON_EV_INSTRUCTION      2
+#define PERFMON_EV_CACHE_REF        3
+#define PERFMON_EV_CACHE_MISS       4
+#define PERFMON_EV_BRANCH           5
+#define PERFMON_EV_BRANCH_MISS      6
+#define PERFMON_NR_GENERIC_EVENTS   7
+
+/*
+ * Event flags.
+ */
+#define PERFMON_EF_KERN     0x1 /* Monitor events in kernel mode */
+#define PERFMON_EF_USER     0x2 /* Monitor events in user mode */
+#define PERFMON_EF_RAW      0x4 /* Raw event ID, generic if unset */
+
+/*
+ * Performance monitoring operations.
+ *
+ * This is a public structure.
+ *
+ * All operations are either global but serialized by the caller, or
+ * processor-local and called with interrupts and preemption disabled.
+ *
+ * If the hardware doesn't efficiently support overflow interrupts, the
+ * handler must be set to NULL, making the perfmon module perdiocally
+ * check the raw value of the hardware counters.
+ */
+struct perfmon_dev_ops {
+    /*
+     * Convert a generic event ID into a raw event ID.
+     *
+     * Global operation.
+     */
+    int (*translate)(unsigned int *raw_event_idp, unsigned int event_id);
+
+    /*
+     * Allocate a performance monitoring counter globally for the given
+     * raw event ID, and return the counter ID through the given pointer.
+     * The range of IDs must start from 0 and increase contiguously.
+     *
+     * The PMC index is to be used by the driver when reporting overflows,
+     * if a custom overflow interrupt handler.
+     *
+     * Global operation.
+     */
+    int (*alloc)(unsigned int *pmc_idp, unsigned int pmc_index,
+                 unsigned int raw_event_id);
+
+    /*
+     * Free an allocated performance monitoring counter.
+     *
+     * Global operation.
+     */
+    void (*free)(unsigned int pmc_id);
+
+    /*
+     * Start a performance monitoring counter for the given raw event ID.
+     *
+     * Processor-local operation.
+     */
+    void (*start)(unsigned int pmc_id, unsigned int raw_event_id);
+
+    /*
+     * Stop a performance monitoring counter.
+     *
+     * Processor-local operation.
+     */
+    void (*stop)(unsigned int pmc_id);
+
+    /*
+     * Read the value of a performance monitoring counter.
+     *
+     * Processor-local operation.
+     */
+    uint64_t (*read)(unsigned int pmc_id);
+
+    /*
+     * Custom overflow interrupt handler.
+     *
+     * Processor-local operation.
+     */
+    void (*handle_overflow_intr)(void);
+};
+
+/*
+ * Performance monitoring device.
+ *
+ * This is a public structure.
+ *
+ * The PMC width is expressed in bits.
+ *
+ * If the driver doesn't provide an overflow interrupt handler, it may set
+ * the poll interval, in ticks, to a duration that safely allows the detection
+ * of a single overflow. A value of 0 lets the perfmon module compute a poll
+ * interval itself.
+ */
+struct perfmon_dev {
+    const struct perfmon_dev_ops *ops;
+    unsigned int pmc_width;
+    uint64_t poll_interval;
+};
+
+/*
+ * Performance monitoring thread data.
+ */
+struct perfmon_td;
+
+/*
+ * Performance monitoring event.
+ *
+ * An event describes a single, well-defined hardware condition and tracks
+ * its occurrences over a period of time.
+ */
+struct perfmon_event;
+
+/*
+ * Initialize thread-specific data.
+ */
+void perfmon_td_init(struct perfmon_td *td);
+
+/*
+ * Load/unload events attached to a thread on the current processor.
+ *
+ * These functions should only be used by the scheduler on a context switch.
+ * Interrupts and preemption must be disabled when calling these functions.
+ */
+void perfmon_td_load(struct perfmon_td *td);
+void perfmon_td_unload(struct perfmon_td *td);
+
+/*
+ * Initialize an event.
+ */
+int perfmon_event_init(struct perfmon_event *event, unsigned int id,
+                       unsigned int flags);
+
+/*
+ * Attach/detach an event to/from a thread or a processor.
+ *
+ * Attaching an event allocates hardware resources and enables monitoring.
+ * The number of occurrences for the given event is reset.
+ *
+ * An event can only be attached to one thread or processor at a time.
+ */
+int perfmon_event_attach(struct perfmon_event *event, struct thread *thread);
+int perfmon_event_attach_cpu(struct perfmon_event *event, unsigned int cpu);
+int perfmon_event_detach(struct perfmon_event *event);
+
+/*
+ * Obtain the number of occurrences of an event.
+ */
+uint64_t perfmon_event_read(struct perfmon_event *event);
+
+/*
+ * Register a PMU device.
+ *
+ * Currently, there can only be a single system-wide PMU device, which
+ * assumes the driver is the same for all processors.
+ */
+void perfmon_register(struct perfmon_dev *dev);
+
+/*
+ * Handle an overflow interrupt.
+ *
+ * This function must be called in interrupt context.
+ */
+void perfmon_overflow_intr(void);
+
+/*
+ * Report a PMC overflow.
+ *
+ * This function is intended to be used by PMU drivers using a custom
+ * overflow interrupt handler.
+ *
+ * This function must be called in interrupt context.
+ */
+void perfmon_report_overflow(unsigned int pmc_index);
+
+/*
+ * This init operation provides :
+ *  - PMU device registration
+ */
+INIT_OP_DECLARE(perfmon_bootstrap);
+
+#endif /* KERN_PERFMON_H */
diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h
new file mode 100644
index 00000000..c316312a
--- /dev/null
+++ b/kern/perfmon_types.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definitions used to avoid inclusion circular dependencies.
+ */
+
+#ifndef KERN_PERFMON_TYPES_H
+#define KERN_PERFMON_TYPES_H
+
+#ifdef CONFIG_PERFMON
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/spinlock_types.h>
+
+/*
+ * Maximum number of supported hardware counters.
+ */
+#define PERFMON_MAX_PMCS CONFIG_PERFMON_MAX_PMCS
+
+/*
+ * Performance monitoring event.
+ *
+ * An event may be unattached, attached to a thread, or attached to a CPU.
+ * When it is loaded, the current value of the underlying PMC is saved.
+ * When it is updated, the delta between the current and saved PMC values
+ * is added to the event value.
+ */
+struct perfmon_event {
+    struct spinlock lock;
+    unsigned int flags;
+    unsigned int id;
+    uint64_t pmc_value;
+    uint64_t value;
+
+    union {
+        struct thread *thread;
+        unsigned int cpu;
+    };
+
+    unsigned int pmc_index;
+};
+
+/*
+ * Per-thread performance monitoring counter.
+ *
+ * Per-thread PMCs are indexed the same way as global PMCs.
+ *
+ * A per-thread PMC is referenced when an event is attached to a thread.
+ * The PMC may only be loaded if the thread is running on a processor,
+ * as a result of an event being attached to the thread, or the thread
+ * being dispatched by the scheduler. Note that this allows a transient
+ * state to be seen where a per-thread PMC is both unused and loaded.
+ * This happens after detaching an event from a thread, resulting in
+ * the underlying per-thread PMC to become unused, but if the thread
+ * is running concurrently, the counter is still loaded. The implementation
+ * resolves the situation by unloading the counter, which is either
+ * done by an explicit unload cross-call, or when the scheduler preempts
+ * the thread and unloads its thread data.
+ *
+ * When a per-thread PMC is loaded, the current value of the underlying
+ * PMC is saved, and when it's updated, the delta between the current
+ * and saved PMC values is added to the per-thread PMC value.
+ */
+struct perfmon_td_pmc {
+    unsigned int nr_refs;
+    bool loaded;
+    unsigned int pmc_id;
+    unsigned int raw_event_id;
+    uint64_t cpu_pmc_value;
+    uint64_t value;
+};
+
+/*
+ * Per-thread performance monitoring data.
+ *
+ * Interrupts must be disabled when locking thread data.
+ */
+struct perfmon_td {
+    struct spinlock lock;
+    struct perfmon_td_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+#endif /* CONFIG_PERFMON */
+
+#endif /* KERN_PERFMON_TYPES_H */
diff --git a/kern/task.c b/kern/task.c
index 5df72251..3ad863bd 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -257,7 +257,7 @@ task_info(struct task *task)
         printf(TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT
                " %.2s:%02hu %02u %s\n",
                (unsigned long)thread,
-               thread_state_to_chr(thread),
+               thread_state_to_chr(thread_state(thread)),
                thread_wchan_desc(thread),
                (unsigned long)thread_wchan_addr(thread),
                thread_sched_class_to_str(thread_user_sched_class(thread)),
diff --git a/kern/thread.c b/kern/thread.c
index e79ef311..a8f58b39 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -100,6 +100,7 @@
 #include <kern/macros.h>
 #include <kern/panic.h>
 #include <kern/percpu.h>
+#include <kern/perfmon.h>
 #include <kern/rcu.h>
 #include <kern/shell.h>
 #include <kern/sleepq.h>
@@ -600,14 +601,28 @@ thread_runq_wakeup_balancer(struct thread_runq *runq)
     }
 
     thread_clear_wchan(runq->balancer);
-    runq->balancer->state = THREAD_RUNNING;
+    atomic_store(&runq->balancer->state, THREAD_RUNNING, ATOMIC_RELAXED);
     thread_runq_wakeup(runq, runq->balancer);
 }
 
 static void
-thread_runq_schedule_prepare(struct thread *thread)
+thread_runq_schedule_load(struct thread *thread)
 {
     pmap_load(thread->task->map->pmap);
+
+#ifdef CONFIG_PERFMON
+    perfmon_td_load(thread_get_perfmon_td(thread));
+#endif
+}
+
+static void
+thread_runq_schedule_unload(struct thread *thread)
+{
+#ifdef CONFIG_PERFMON
+    perfmon_td_unload(thread_get_perfmon_td(thread));
+#else
+    (void)thread;
+#endif
 }
 
 static struct thread_runq *
@@ -639,6 +654,8 @@ thread_runq_schedule(struct thread_runq *runq)
     assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL);
 
     if (likely(prev != next)) {
+        thread_runq_schedule_unload(prev);
+
         rcu_report_context_switch(thread_rcu_reader(prev));
         spinlock_transfer_owner(&runq->lock, next);
 
@@ -660,10 +677,10 @@ thread_runq_schedule(struct thread_runq *runq)
          *  - The current thread may have been migrated to another processor.
          */
         barrier();
+        thread_runq_schedule_load(prev);
+
         next = NULL;
         runq = thread_runq_local();
-
-        thread_runq_schedule_prepare(prev);
     } else {
         next = NULL;
     }
@@ -1750,7 +1767,7 @@ thread_main(void (*fn)(void *), void *arg)
     assert(!thread_preempt_enabled());
 
     thread = thread_self();
-    thread_runq_schedule_prepare(thread);
+    thread_runq_schedule_load(thread);
 
     spinlock_unlock(&thread_runq_local()->lock);
     cpu_intr_enable();
@@ -1843,6 +1860,10 @@ thread_init(struct thread *thread, void *stack,
     thread->stack = stack;
     strlcpy(thread->name, attr->name, sizeof(thread->name));
 
+#ifdef CONFIG_PERFMON
+    perfmon_td_init(thread_get_perfmon_td(thread));
+#endif
+
     if (attr->flags & THREAD_ATTR_DETACHED) {
         thread->flags |= THREAD_DETACHED;
     }
@@ -1989,8 +2010,9 @@ static void
 thread_join_common(struct thread *thread)
 {
     struct thread_runq *runq;
-    unsigned long flags, state;
     struct thread *self;
+    unsigned long flags;
+    unsigned int state;
 
     self = thread_self();
     assert(thread != self);
@@ -2060,7 +2082,7 @@ thread_balance(void *arg)
     for (;;) {
         runq->idle_balance_ticks = THREAD_IDLE_BALANCE_TICKS;
         thread_set_wchan(self, runq, "runq");
-        self->state = THREAD_SLEEPING;
+        atomic_store(&self->state, THREAD_SLEEPING, ATOMIC_RELAXED);
         runq = thread_runq_schedule(runq);
         assert(runq == arg);
 
@@ -2309,6 +2331,13 @@ thread_setup(void)
 #define THREAD_STACK_GUARD_INIT_OP_DEPS
 #endif /* CONFIG_THREAD_STACK_GUARD */
 
+#ifdef CONFIG_PERFMON
+#define THREAD_PERFMON_INIT_OP_DEPS \
+               INIT_OP_DEP(perfmon_bootstrap, true),
+#else /* CONFIG_PERFMON */
+#define THREAD_PERFMON_INIT_OP_DEPS
+#endif /* CONFIG_PERFMON */
+
 INIT_OP_DEFINE(thread_setup,
                INIT_OP_DEP(cpumap_setup, true),
                INIT_OP_DEP(kmem_setup, true),
@@ -2318,6 +2347,7 @@ INIT_OP_DEFINE(thread_setup,
                INIT_OP_DEP(thread_bootstrap, true),
                INIT_OP_DEP(turnstile_setup, true),
                THREAD_STACK_GUARD_INIT_OP_DEPS
+               THREAD_PERFMON_INIT_OP_DEPS
 );
 
 void __init
@@ -2421,7 +2451,7 @@ thread_exit(void)
     runq = thread_runq_local();
     spinlock_lock_intr_save(&runq->lock, &flags);
 
-    thread->state = THREAD_DEAD;
+    atomic_store(&thread->state, THREAD_DEAD, ATOMIC_RELAXED);
 
     thread_runq_schedule(runq);
     panic("thread: dead thread walking");
@@ -2461,7 +2491,7 @@ thread_wakeup_common(struct thread *thread, int error)
         }
 
         thread_clear_wchan(thread);
-        thread->state = THREAD_RUNNING;
+        atomic_store(&thread->state, THREAD_RUNNING, ATOMIC_RELAXED);
         thread_unlock_runq(runq, flags);
     }
 
@@ -2532,7 +2562,7 @@ thread_sleep_common(struct spinlock *interlock, const void *wchan_addr,
     }
 
     thread_set_wchan(thread, wchan_addr, wchan_desc);
-    thread->state = THREAD_SLEEPING;
+    atomic_store(&thread->state, THREAD_SLEEPING, ATOMIC_RELAXED);
 
     runq = thread_runq_schedule(runq);
     assert(thread->state == THREAD_RUNNING);
@@ -2699,9 +2729,9 @@ thread_report_periodic_event(void)
 }
 
 char
-thread_state_to_chr(const struct thread *thread)
+thread_state_to_chr(unsigned int state)
 {
-    switch (thread->state) {
+    switch (state) {
     case THREAD_RUNNING:
         return 'R';
     case THREAD_SLEEPING:
@@ -2906,6 +2936,21 @@ thread_key_create(unsigned int *keyp, thread_dtor_fn_t dtor)
     *keyp = key;
 }
 
+unsigned int
+thread_cpu(const struct thread *thread)
+{
+    const struct thread_runq *runq;
+
+    runq = atomic_load(&thread->runq, ATOMIC_RELAXED);
+    return runq->cpu;
+}
+
+unsigned int
+thread_state(const struct thread *thread)
+{
+    return atomic_load(&thread->state, ATOMIC_RELAXED);
+}
+
 bool
 thread_is_running(const struct thread *thread)
 {
diff --git a/kern/thread.h b/kern/thread.h
index 6e696fc7..5b5729ce 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -75,6 +75,13 @@ struct thread_sched_data {
 #define THREAD_KERNEL_PREFIX KERNEL_NAME "_"
 
 /*
+ * Thread states.
+ */
+#define THREAD_RUNNING  0
+#define THREAD_SLEEPING 1
+#define THREAD_DEAD     2
+
+/*
  * Scheduling policies.
  *
  * The idle policy is reserved for the per-CPU idle threads.
@@ -323,7 +330,7 @@ thread_wchan_desc(const struct thread *thread)
 /*
  * Return a character representation of the state of a thread.
  */
-char thread_state_to_chr(const struct thread *thread);
+char thread_state_to_chr(unsigned int state);
 
 static inline const struct thread_sched_data *
 thread_get_user_sched_data(const struct thread *thread)
@@ -705,6 +712,28 @@ thread_get_specific(unsigned int key)
     return thread_tsd_get(thread_self(), key);
 }
 
+#ifdef CONFIG_PERFMON
+static inline struct perfmon_td *
+thread_get_perfmon_td(struct thread *thread)
+{
+    return &thread->perfmon_td;
+}
+#endif /* CONFIG_PERFMON */
+
+/*
+ * Return the last CPU on which the thread has been scheduled.
+ *
+ * This call isn't synchronized, and the caller may obtain an outdated value.
+ */
+unsigned int thread_cpu(const struct thread *thread);
+
+/*
+ * Return the current state of the given thread.
+ *
+ * This call isn't synchronized, and the caller may obtain an outdated value.
+ */
+unsigned int thread_state(const struct thread *thread);
+
 /*
  * Return true if the given thread is running.
  *
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 0be1e773..9c9a705b 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -24,6 +24,7 @@
 #include <kern/atomic.h>
 #include <kern/cpumap.h>
 #include <kern/list_types.h>
+#include <kern/perfmon_types.h>
 #include <kern/rcu_types.h>
 #include <kern/spinlock_types.h>
 #include <kern/turnstile_types.h>
@@ -45,16 +46,6 @@ struct thread_fs_runq;
 #define THREAD_DETACHED 0x2UL /* Resources automatically released on exit */
 
 /*
- * Thread states.
- *
- * Threads in the running state may not be on a run queue if they're being
- * awaken.
- */
-#define THREAD_RUNNING  0
-#define THREAD_SLEEPING 1
-#define THREAD_DEAD     2
-
-/*
  * Scheduling data for a real-time thread.
  */
 struct thread_rt_data {
@@ -113,7 +104,7 @@ struct thread {
     const void *wchan_addr;     /* (r)   */
     const char *wchan_desc;     /* (r)   */
     int wakeup_error;           /* (r)   */
-    unsigned short state;       /* (r)   */
+    unsigned int state;         /* (a,r) */
 
     /* Sleep queue available for lending */
     struct sleepq *priv_sleepq; /* (-) */
@@ -185,6 +176,10 @@ struct thread {
     struct list task_node;          /* (T) */
     void *stack;                    /* (-) */
     char name[THREAD_NAME_SIZE];    /* ( ) */
+
+#ifdef CONFIG_PERFMON
+    struct perfmon_td perfmon_td;   /* ( ) */
+#endif
 };
 
 #define THREAD_ATTR_DETACHED 0x1
diff --git a/test/Kconfig b/test/Kconfig
index 3f1c3b69..9f0faf44 100644
--- a/test/Kconfig
+++ b/test/Kconfig
@@ -34,6 +34,18 @@ config TEST_MODULE_MUTEX
 config TEST_MODULE_MUTEX_PI
 	bool "mutex_pi"
 
+config TEST_MODULE_PERFMON_CPU
+	bool "perfmon_cpu"
+	depends on PERFMON
+
+config TEST_MODULE_PERFMON_THREAD
+	bool "perfmon_thread"
+	depends on PERFMON
+
+config TEST_MODULE_PERFMON_TORTURE
+	bool "perfmon_torture"
+	depends on PERFMON
+
 config TEST_MODULE_PMAP_UPDATE_MP
 	bool "pmap_update_mp"
 
diff --git a/test/Makefile b/test/Makefile
index cdce6130..76edbf0e 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -2,6 +2,9 @@ x15_SOURCES-$(CONFIG_TEST_MODULE_ATOMIC)                += test/test_atomic.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_BULLETIN)              += test/test_bulletin.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX)                 += test/test_mutex.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI)              += test/test_mutex_pi.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU)           += test/test_perfmon_cpu.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD)        += test/test_perfmon_thread.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE)       += test/test_perfmon_torture.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP)        += test/test_pmap_update_mp.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER)             += test/test_rcu_defer.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES)     += test/test_sref_dirty_zeroes.c
diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c
new file mode 100644
index 00000000..75f69d3f
--- /dev/null
+++ b/test/test_perfmon_cpu.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test checks the behavior of performance monitoring on a CPU.
+ * It creates a group with two events, cycle and instruction, and attaches
+ * that group to CPU1, where a thread is bound and runs a tight loop to
+ * make sure the target CPU is never idle. After some time, the measurement
+ * stops and values are reported.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <machine/cpu.h>
+#include <test/test.h>
+
+#define TEST_WAIT_DELAY_MS 1000
+
+/*
+ * Using another CPU than the BSP as the monitored CPU checks that PMUs are
+ * correctly initialized on APs.
+ */
+#define TEST_CONTROL_CPU    0
+#define TEST_MONITORED_CPU  (TEST_CONTROL_CPU + 1)
+#define TEST_MIN_CPUS       (TEST_MONITORED_CPU + 1)
+
+#define TEST_EVENT_NAME_MAX_SIZE 32
+
+struct test_event {
+    struct list node;
+    struct perfmon_event pm_event;
+    char name[TEST_EVENT_NAME_MAX_SIZE];
+};
+
+struct test_group {
+    struct list events;
+};
+
+static unsigned int test_run_stop;
+
+static void
+test_wait(void)
+{
+    thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+}
+
+static void
+test_event_init(struct test_event *event, unsigned int id, const char *name)
+{
+    int error;
+
+    error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN);
+    error_check(error, "perfmon_event_init");
+    strlcpy(event->name, name, sizeof(event->name));
+}
+
+static void
+test_event_report(struct test_event *event)
+{
+    uint64_t count;
+    int error;
+
+    count = perfmon_event_read(&event->pm_event);
+    error = (count == 0) ? EINVAL : 0;
+    error_check(error, __func__);
+    log_info("test: %s: %llu", event->name, (unsigned long long)count);
+}
+
+static void
+test_event_attach_cpu(struct test_event *event, unsigned int cpu)
+{
+    int error;
+
+    error = perfmon_event_attach_cpu(&event->pm_event, cpu);
+    error_check(error, "perfmon_event_attach_cpu");
+}
+
+static void
+test_event_detach(struct test_event *event)
+{
+    int error;
+
+    error = perfmon_event_detach(&event->pm_event);
+    error_check(error, "perfmon_event_detach");
+}
+
+static void
+test_group_init(struct test_group *group)
+{
+    list_init(&group->events);
+}
+
+static void
+test_group_add(struct test_group *group, struct test_event *event)
+{
+    list_insert_tail(&group->events, &event->node);
+}
+
+static void
+test_group_attach_cpu(struct test_group *group, unsigned int cpu)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_attach_cpu(event, cpu);
+    }
+}
+
+static void
+test_group_detach(struct test_group *group)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_detach(event);
+    }
+}
+
+static void
+test_group_report(struct test_group *group)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_report(event);
+    }
+}
+
+static void
+test_run(void *arg)
+{
+    unsigned int stop;
+
+    (void)arg;
+
+    do {
+        stop = atomic_load(&test_run_stop, ATOMIC_RELAXED);
+    } while (!stop);
+}
+
+static void
+test_control(void *arg)
+{
+    struct test_event cycle, instruction;
+    struct test_group group;
+    struct thread *thread;
+
+    thread = arg;
+
+    test_event_init(&cycle, PERFMON_EV_CYCLE, "cycle");
+    test_event_init(&instruction, PERFMON_EV_INSTRUCTION, "instruction");
+    test_group_init(&group);
+    test_group_add(&group, &cycle);
+    test_group_add(&group, &instruction);
+    test_group_attach_cpu(&group, TEST_MONITORED_CPU);
+    test_wait();
+    test_group_report(&group);
+    test_wait();
+    test_group_detach(&group);
+    test_group_report(&group);
+
+    atomic_store(&test_run_stop, 1, ATOMIC_RELAXED);
+    thread_join(thread);
+    log_info("test: done");
+}
+
+void
+test_setup(void)
+{
+    struct thread *thread;
+    struct thread_attr attr;
+    struct cpumap *cpumap;
+    int error;
+
+    if (cpu_count() < TEST_MIN_CPUS) {
+        panic("test: %u processors required", TEST_MIN_CPUS);
+    }
+
+    error = cpumap_create(&cpumap);
+    error_check(error, "cpumap_create");
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run");
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, TEST_MONITORED_CPU);
+    thread_attr_set_cpumap(&attr, cpumap);
+    error = thread_create(&thread, &attr, test_run, NULL);
+    error_check(error, "thread_create");
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+    thread_attr_set_detached(&attr);
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, TEST_CONTROL_CPU);
+    thread_attr_set_cpumap(&attr, cpumap);
+    error = thread_create(NULL, &attr, test_control, thread);
+    error_check(error, "thread_create");
+
+    cpumap_destroy(cpumap);
+}
diff --git a/test/test_perfmon_thread.c b/test/test_perfmon_thread.c
new file mode 100644
index 00000000..0213777b
--- /dev/null
+++ b/test/test_perfmon_thread.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test checks the behavior of performance monitoring on a thread.
+ * It creates a group with a single event, cycle, and attaches that group to
+ * a runner thread. Two checks are then performed :
+ *  - the first makes sure the number of cycles changes when the runner
+ *    thread is running
+ *  - the second makes sure the number of cycles doesn't change when the
+ *    runner thread is sleeping
+ *
+ * Another group with a cycle event is created and attached to CPU0 to make
+ * sure that a shared event is correctly handled, and the runner thread is
+ * bound to CPU0 to force sharing. A third thread is created to fill CPU0
+ * time with cycles so that the cycle counter of the CPU-attached group
+ * changes while the runner thread is sleeping.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/condition.h>
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+#define TEST_WAIT_DELAY_MS 1000
+
+#define TEST_EVENT_NAME_MAX_SIZE 32
+
+struct test_event {
+    struct list node;
+    struct perfmon_event pm_event;
+    uint64_t last_value;
+    char name[TEST_EVENT_NAME_MAX_SIZE];
+};
+
+struct test_group {
+    struct list events;
+};
+
+enum test_state {
+    TEST_STATE_RUNNING,
+    TEST_STATE_SUSPENDED,
+    TEST_STATE_TERMINATED,
+};
+
+static struct condition test_condition;
+static struct mutex test_mutex;
+static enum test_state test_state;
+
+static void
+test_wait(void)
+{
+    log_info("test: controller waiting");
+    thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+    log_info("test: controller resuming");
+}
+
+static void
+test_event_init(struct test_event *event, unsigned int id, const char *name)
+{
+    int error;
+
+    error = perfmon_event_init(&event->pm_event, id, PERFMON_EF_KERN);
+    error_check(error, "perfmon_event_init");
+    strlcpy(event->name, name, sizeof(event->name));
+}
+
+static void
+test_event_attach(struct test_event *event, struct thread *thread)
+{
+    int error;
+
+    error = perfmon_event_attach(&event->pm_event, thread);
+    error_check(error, "perfmon_event_attach");
+}
+
+static void
+test_event_attach_cpu(struct test_event *event, unsigned int cpu)
+{
+    int error;
+
+    error = perfmon_event_attach_cpu(&event->pm_event, cpu);
+    error_check(error, "perfmon_event_attach_cpu");
+}
+
+static void
+test_event_detach(struct test_event *event)
+{
+    int error;
+
+    error = perfmon_event_detach(&event->pm_event);
+    error_check(error, "perfmon_event_detach");
+}
+
+static uint64_t
+test_event_read(struct test_event *event)
+{
+    uint64_t value;
+
+    value = perfmon_event_read(&event->pm_event);
+    log_info("test: %s: %llu", event->name, (unsigned long long)value);
+    return value;
+}
+
+static void
+test_event_save(struct test_event *event)
+{
+    event->last_value = test_event_read(event);
+}
+
+static void
+test_event_check(struct test_event *event, bool change_expected)
+{
+    uint64_t value;
+    bool changed;
+
+    value = test_event_read(event);
+    changed = (value != event->last_value);
+
+    if (changed != change_expected) {
+        panic("test: invalid value");
+    }
+
+    event->last_value = value;
+}
+
+static void
+test_group_init(struct test_group *group)
+{
+    list_init(&group->events);
+}
+
+static void
+test_group_add(struct test_group *group, struct test_event *event)
+{
+    list_insert_tail(&group->events, &event->node);
+}
+
+static void
+test_group_attach(struct test_group *group, struct thread *thread)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_attach(event, thread);
+    }
+}
+
+static void
+test_group_attach_cpu(struct test_group *group, unsigned int cpu)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_attach_cpu(event, cpu);
+    }
+}
+
+static void
+test_group_detach(struct test_group *group)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_detach(event);
+    }
+}
+
+static void
+test_group_save(struct test_group *group)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_save(event);
+    }
+}
+
+static void
+test_group_check(struct test_group *group, bool change_expected)
+{
+    struct test_event *event;
+
+    list_for_each_entry(&group->events, event, node) {
+        test_event_check(event, change_expected);
+    }
+}
+
+static void
+test_run(void *arg)
+{
+    bool report;
+
+    (void)arg;
+
+    report = true;
+
+    mutex_lock(&test_mutex);
+
+    while (test_state != TEST_STATE_TERMINATED) {
+        if (test_state == TEST_STATE_SUSPENDED) {
+            log_info("test: runner suspended");
+            report = true;
+            condition_wait(&test_condition, &test_mutex);
+        } else {
+            mutex_unlock(&test_mutex);
+
+            if (report) {
+                log_info("test: runner running");
+                report = false;
+            }
+
+            mutex_lock(&test_mutex);
+        }
+    }
+
+    mutex_unlock(&test_mutex);
+}
+
+static void
+test_fill(void *arg)
+{
+    enum test_state state;
+
+    (void)arg;
+
+    do {
+        state = atomic_load(&test_state, ATOMIC_RELAXED);
+    } while (state != TEST_STATE_TERMINATED);
+}
+
+static void
+test_wait_state(const struct thread *thread, unsigned short state)
+{
+    for (;;) {
+        if (thread_state(thread) == state) {
+            break;
+        }
+
+        thread_delay(1, false);
+    }
+}
+
+static void
+test_resume(struct thread *thread)
+{
+    test_wait_state(thread, THREAD_SLEEPING);
+
+    mutex_lock(&test_mutex);
+    assert(test_state == TEST_STATE_SUSPENDED);
+    atomic_store(&test_state, TEST_STATE_RUNNING, ATOMIC_RELAXED);
+    condition_signal(&test_condition);
+    mutex_unlock(&test_mutex);
+
+    test_wait_state(thread, THREAD_RUNNING);
+}
+
+static void
+test_suspend(struct thread *thread)
+{
+    test_wait_state(thread, THREAD_RUNNING);
+
+    mutex_lock(&test_mutex);
+    assert(test_state == TEST_STATE_RUNNING);
+    atomic_store(&test_state, TEST_STATE_SUSPENDED, ATOMIC_RELAXED);
+    mutex_unlock(&test_mutex);
+
+    test_wait_state(thread, THREAD_SLEEPING);
+}
+
+static void
+test_terminate(void)
+{
+    mutex_lock(&test_mutex);
+    test_state = TEST_STATE_TERMINATED;
+    condition_signal(&test_condition);
+    mutex_unlock(&test_mutex);
+}
+
+static void
+test_control(void *arg)
+{
+    struct test_event thread_cycle, cpu_cycle;
+    struct test_group thread_group, cpu_group;
+    struct thread *runner;
+
+    runner = arg;
+
+    test_event_init(&thread_cycle, PERFMON_EV_CYCLE, "thread_cycle");
+    test_group_init(&thread_group);
+    test_group_add(&thread_group, &thread_cycle);
+
+    test_event_init(&cpu_cycle, PERFMON_EV_CYCLE, "cpu_cycle");
+    test_group_init(&cpu_group);
+    test_group_add(&cpu_group, &cpu_cycle);
+
+    test_group_attach(&thread_group, runner);
+    test_group_attach_cpu(&cpu_group, 0);
+
+    test_group_save(&thread_group);
+    test_group_save(&cpu_group);
+    test_resume(runner);
+    test_wait();
+    test_suspend(runner);
+    test_group_check(&thread_group, true);
+    test_group_check(&cpu_group, true);
+    test_wait();
+    test_group_check(&thread_group, false);
+    test_group_check(&cpu_group, true);
+    test_terminate();
+
+    test_group_detach(&cpu_group);
+    test_group_detach(&thread_group);
+
+    thread_join(runner);
+    log_info("test: done");
+}
+
+void
+test_setup(void)
+{
+    struct thread_attr attr;
+    struct thread *runner;
+    struct cpumap *cpumap;
+    int error;
+
+    condition_init(&test_condition);
+    mutex_init(&test_mutex);
+    test_state = TEST_STATE_SUSPENDED;
+
+    error = cpumap_create(&cpumap);
+    error_check(error, "cpumap_create");
+
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, 0);
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_run");
+    thread_attr_set_cpumap(&attr, cpumap);
+    error = thread_create(&runner, &attr, test_run, NULL);
+    error_check(error, "thread_create");
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_fill");
+    thread_attr_set_detached(&attr);
+    thread_attr_set_cpumap(&attr, cpumap);
+    thread_attr_set_priority(&attr, THREAD_SCHED_FS_PRIO_MIN);
+    error = thread_create(NULL, &attr, test_fill, NULL);
+    error_check(error, "thread_create");
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+    thread_attr_set_detached(&attr);
+    error = thread_create(NULL, &attr, test_control, runner);
+    error_check(error, "thread_create");
+
+    cpumap_destroy(cpumap);
+}
diff --git a/test/test_perfmon_torture.c b/test/test_perfmon_torture.c
new file mode 100644
index 00000000..171cb99c
--- /dev/null
+++ b/test/test_perfmon_torture.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This module is a stress test, expected to never terminate, of the
+ * performance monitoring module. It creates a control thread which
+ * maintains a couple of test threads running while toggling performance
+ * monitoring on them, attempting to produce many regular and corner
+ * cases. In particular, the thread pool is randomly resized by destroying
+ * and creating the underlying kernel threads.
+ *
+ * The control thread regularly prints some stats about the thread pool
+ * and the associated performance monitoring events to report that it's
+ * making progress.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/error.h>
+#include <kern/kmem.h>
+#include <kern/log.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+struct test_thread {
+    unsigned int id;
+    struct thread *thread;
+    struct perfmon_event event;
+    unsigned int must_stop;
+    bool monitored;
+    unsigned long long count;
+};
+
+struct test_controller {
+    struct test_thread **threads;
+    unsigned int nr_threads;
+    unsigned int monitoring_lid;
+    unsigned int state_lid;
+    unsigned long nr_current_events;
+    unsigned long nr_total_events;
+    unsigned long nr_current_threads;
+    unsigned long nr_total_threads;
+};
+
+#define TEST_WAIT_DELAY_MS      100
+#define TEST_LOOPS_PER_PRINT    20
+
+#define TEST_MONITORING_SEED    12345
+#define TEST_STATE_SEED         23456
+
+static void
+test_wait(void)
+{
+    thread_delay(clock_ticks_from_ms(TEST_WAIT_DELAY_MS), false);
+}
+
+static unsigned int
+test_rand(unsigned int x)
+{
+    /* Basic 32-bit xorshift PRNG */
+    x ^= x << 13;
+    x ^= x >> 17;
+    x ^= x << 5;
+    return x;
+}
+
+static bool
+test_thread_monitored(const struct test_thread *thread)
+{
+    return thread->monitored;
+}
+
+static void
+test_thread_start_monitoring(struct test_thread *thread)
+{
+    int error;
+
+    error = perfmon_event_attach(&thread->event, thread->thread);
+    error_check(error, __func__);
+    thread->monitored = true;
+}
+
+static void
+test_thread_stop_monitoring(struct test_thread *thread)
+{
+    int error;
+
+    thread->count += perfmon_event_read(&thread->event);
+    error = perfmon_event_detach(&thread->event);
+    error_check(error, __func__);
+    thread->monitored = false;
+}
+
+static void
+test_thread_report(const struct test_thread *thread)
+{
+    log_info("test: thread:%u count:%llu", thread->id, thread->count);
+}
+
+static void
+test_run(void *arg)
+{
+    struct test_thread *thread;
+
+    thread = arg;
+
+    for (;;) {
+        if (atomic_load(&thread->must_stop, ATOMIC_RELAXED)) {
+            break;
+        }
+    }
+}
+
+static bool
+test_thread_started(const struct test_thread *thread)
+{
+    return thread->thread;
+}
+
+static void
+test_thread_start(struct test_thread *thread)
+{
+    char name[THREAD_NAME_SIZE];
+    struct thread_attr attr;
+    int error;
+
+    assert(!thread->monitored);
+
+    if (test_thread_started(thread)) {
+        return;
+    }
+
+    thread->must_stop = 0;
+
+    snprintf(name, sizeof(name),
+             THREAD_KERNEL_PREFIX "test_run:%u", thread->id);
+    thread_attr_init(&attr, name);
+    error = thread_create(&thread->thread, &attr, test_run, thread);
+    error_check(error, "thread_create");
+}
+
+static void
+test_thread_request_stop(struct test_thread *thread)
+{
+    atomic_store(&thread->must_stop, 1, ATOMIC_RELAXED);
+}
+
+static void
+test_thread_join(struct test_thread *thread)
+{
+    assert(test_thread_started(thread));
+    assert(!test_thread_monitored(thread));
+
+    thread_join(thread->thread);
+    thread->thread = NULL;
+}
+
+static struct test_thread *
+test_thread_create(unsigned int id)
+{
+    struct test_thread *thread;
+
+    thread = kmem_alloc(sizeof(*thread));
+
+    if (thread == NULL) {
+        panic("thread allocation failed");
+    }
+
+    thread->id = id;
+    thread->thread = NULL;
+    thread->must_stop = 0;
+    thread->monitored = false;
+    thread->count = 0;
+
+    perfmon_event_init(&thread->event, PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+    test_thread_start(thread);
+
+    return thread;
+}
+
+static struct test_thread *
+test_controller_get(struct test_controller *controller, unsigned int id)
+{
+    assert(id < controller->nr_threads);
+    return controller->threads[id];
+}
+
+static struct test_thread *
+test_controller_get_by_lid(struct test_controller *controller, unsigned int lid)
+{
+    return test_controller_get(controller, lid % controller->nr_threads);
+}
+
+static void
+test_toggle_monitoring(struct test_controller *controller,
+                       struct test_thread *thread)
+{
+    if (!test_thread_started(thread)) {
+        return;
+    }
+
+    if (thread->monitored) {
+        test_thread_stop_monitoring(thread);
+        controller->nr_current_events--;
+    } else {
+        test_thread_start_monitoring(thread);
+        controller->nr_total_events++;
+        controller->nr_current_events++;
+    }
+}
+
+static void
+test_toggle_state(struct test_controller *controller,
+                  struct test_thread *thread)
+{
+    if (test_thread_started(thread)) {
+        /*
+         * Make the thread stop asynchronously with monitoring to test
+         * thread referencing.
+         */
+        test_thread_request_stop(thread);
+
+        if (test_thread_monitored(thread)) {
+            test_thread_stop_monitoring(thread);
+            controller->nr_current_events--;
+        }
+
+        test_thread_join(thread);
+        controller->nr_current_threads--;
+    } else {
+        test_thread_start(thread);
+        controller->nr_total_threads++;
+        controller->nr_current_threads++;
+    }
+}
+
+static void
+test_controller_report(struct test_controller *controller)
+{
+    log_info("test: events:%lu total:%lu threads:%lu total:%lu",
+             controller->nr_current_events, controller->nr_total_events,
+             controller->nr_current_threads, controller->nr_total_threads);
+
+    for (unsigned int i = 0; i < controller->nr_threads; i++) {
+        test_thread_report(test_controller_get(controller, i));
+    }
+}
+
+static void
+test_control(void *arg)
+{
+    struct test_controller *controller;
+    struct test_thread *thread;
+
+    controller = arg;
+
+    log_info("test: %u threads", controller->nr_threads);
+
+    for (unsigned long nr_loops = 1; /* no condition */; nr_loops++) {
+        controller->monitoring_lid = test_rand(controller->monitoring_lid);
+        thread = test_controller_get_by_lid(controller,
+                                            controller->monitoring_lid);
+        test_toggle_monitoring(controller, thread);
+
+        controller->state_lid = test_rand(controller->state_lid);
+        thread = test_controller_get_by_lid(controller,
+                                            controller->state_lid);
+        test_toggle_state(controller, thread);
+
+        test_wait();
+
+        if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) {
+            test_controller_report(controller);
+        }
+    }
+}
+
+static void
+test_controller_create(void)
+{
+    struct test_controller *controller;
+    struct thread_attr attr;
+    int error;
+
+    controller = kmem_alloc(sizeof(*controller));
+
+    if (!controller) {
+        panic("test: unable to create controller");
+    }
+
+    /*
+     * At least two threads are required by the monitoring/state toggling
+     * operations, otherwise they always apply to the same thread, severely
+     * restricting their usefulness.
+     */
+    controller->nr_threads = MAX(cpu_count() - 1, 2);
+    controller->threads = kmem_alloc(controller->nr_threads
+                                     * sizeof(*controller->threads));
+
+    if (!controller->threads) {
+        panic("test: unable to allocate thread array");
+    }
+
+    for (unsigned int i = 0; i < controller->nr_threads; i++) {
+        controller->threads[i] = test_thread_create(i);
+    }
+
+    controller->monitoring_lid = TEST_MONITORING_SEED;
+    controller->state_lid = TEST_STATE_SEED;
+    controller->nr_current_events = 0;
+    controller->nr_total_events = 0;
+    controller->nr_current_threads = controller->nr_threads;
+    controller->nr_total_threads = controller->nr_threads;
+
+    thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_control");
+    thread_attr_set_detached(&attr);
+    error = thread_create(NULL, &attr, test_control, controller);
+    error_check(error, "thread_create");
+}
+
+void
+test_setup(void)
+{
+    test_controller_create();
+}
author	Richard Braun <rbraun@sceen.net>	2018-06-25 21:56:01 +0200
committer	Richard Braun <rbraun@sceen.net>	2018-06-25 21:56:01 +0200
commit	30dd97fb786ef5f7ca28049684b17bdc2ee7a718 (patch)
tree	330f0514edcfaaa4e3266edb1191c90a39edcb80
parent	0a7c73d2e06172a1210e2bbdfba5718040f4f007 (diff)
parent	7686bfcb703049db5d3711e59133ca4b2259e1f1 (diff)