25 files changed, 3284 insertions, 17 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9866d93..002ed44 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -61,3 +61,6 @@ x15_SOURCES-y += \
         arch/x86/machine/trap_asm.S \
         arch/x86/machine/trap.c \
         arch/x86/machine/uart.c
+
+x15_SOURCES-$(CONFIG_PERFMON) += arch/x86/machine/pmu_amd.c
+x15_SOURCES-$(CONFIG_PERFMON) += arch/x86/machine/pmu_intel.c
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c
index 98d3680..6cbe168 100644
--- a/arch/x86/machine/cpu.c
+++ b/arch/x86/machine/cpu.c
@@ -69,6 +69,11 @@
 
 #define CPU_INVALID_APIC_ID ((unsigned int)-1)
 
+struct cpu_vendor {
+    unsigned int id;
+    const char *str;
+};
+
 /*
  * MP related CMOS ports, registers and values.
  */
@@ -155,6 +160,12 @@ static alignas(8) struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __read_mostly;
 static unsigned long cpu_double_fault_handler;
 static alignas(CPU_DATA_ALIGN) char cpu_double_fault_stack[TRAP_STACK_SIZE];
 
+uint64_t
+cpu_get_freq(void)
+{
+    return cpu_freq;
+}
+
 void
 cpu_delay(unsigned long usecs)
 {
@@ -173,6 +184,11 @@ cpu_delay(unsigned long usecs)
     } while (total > 0);
 }
 
+static const struct cpu_vendor cpu_vendors[] = {
+    { CPU_VENDOR_INTEL, "GenuineIntel" },
+    { CPU_VENDOR_AMD,   "AuthenticAMD" },
+};
+
 void * __init
 cpu_get_boot_stack(void)
 {
@@ -182,10 +198,9 @@ cpu_get_boot_stack(void)
 static void __init
 cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id)
 {
+    memset(cpu, 0, sizeof(*cpu));
     cpu->id = id;
     cpu->apic_id = apic_id;
-    cpu->state = CPU_STATE_OFF;
-    cpu->boot_stack = NULL;
 }
 
 static void
@@ -430,6 +445,32 @@ cpu_load_idt(const void *idt, size_t size)
     asm volatile("lidt %0" : : "m" (idtr));
 }
 
+static const struct cpu_vendor *
+cpu_vendor_lookup(const char *str)
+{
+    for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) {
+        if (strcmp(str, cpu_vendors[i].str) == 0) {
+            return &cpu_vendors[i];
+        }
+    }
+
+    return NULL;
+}
+
+static void __init
+cpu_init_vendor_id(struct cpu *cpu)
+{
+    const struct cpu_vendor *vendor;
+
+    vendor = cpu_vendor_lookup(cpu->vendor_str);
+
+    if (vendor == NULL) {
+        return;
+    }
+
+    cpu->vendor_id = vendor->id;
+}
+
 /*
  * Initialize the given cpu structure for the current processor.
  */
@@ -456,10 +497,12 @@ cpu_init(struct cpu *cpu)
     eax = 0;
     cpu_cpuid(&eax, &ebx, &ecx, &edx);
     max_basic = eax;
-    memcpy(cpu->vendor_id, &ebx, sizeof(ebx));
-    memcpy(cpu->vendor_id + 4, &edx, sizeof(edx));
-    memcpy(cpu->vendor_id + 8, &ecx, sizeof(ecx));
-    cpu->vendor_id[sizeof(cpu->vendor_id) - 1] = '\0';
+    cpu->cpuid_max_basic = max_basic;
+    memcpy(cpu->vendor_str, &ebx, sizeof(ebx));
+    memcpy(cpu->vendor_str + 4, &edx, sizeof(edx));
+    memcpy(cpu->vendor_str + 8, &ecx, sizeof(ecx));
+    cpu->vendor_str[sizeof(cpu->vendor_str) - 1] = '\0';
+    cpu_init_vendor_id(cpu);
 
     /* Some fields are only initialized if supported by the processor */
     cpu->model_name[0] = '\0';
@@ -498,6 +541,8 @@ cpu_init(struct cpu *cpu)
         max_extended = eax;
     }
 
+    cpu->cpuid_max_extended = max_extended;
+
     if (max_extended < 0x80000001) {
         cpu->features3 = 0;
         cpu->features4 = 0;
@@ -617,7 +662,7 @@ void
 cpu_log_info(const struct cpu *cpu)
 {
     log_info("cpu%u: %s, type %u, family %u, model %u, stepping %u",
-             cpu->id, cpu->vendor_id, cpu->type, cpu->family, cpu->model,
+             cpu->id, cpu->vendor_str, cpu->type, cpu->family, cpu->model,
              cpu->stepping);
 
     if (strlen(cpu->model_name) > 0) {
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index 8f9b23c..2464117 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -218,9 +218,13 @@ struct cpu_tss {
     uint16_t iobp_base;
 } __packed;
 
-#define CPU_VENDOR_ID_SIZE  13
+#define CPU_VENDOR_STR_SIZE 13
 #define CPU_MODEL_NAME_SIZE 49
 
+#define CPU_VENDOR_UNKNOWN  0
+#define CPU_VENDOR_INTEL    1
+#define CPU_VENDOR_AMD      2
+
 /*
  * CPU states.
  */
@@ -230,8 +234,11 @@ struct cpu_tss {
 struct cpu {
     unsigned int id;
     unsigned int apic_id;
-    char vendor_id[CPU_VENDOR_ID_SIZE];
+    char vendor_str[CPU_VENDOR_STR_SIZE];
     char model_name[CPU_MODEL_NAME_SIZE];
+    unsigned int cpuid_max_basic;
+    unsigned int cpuid_max_extended;
+    unsigned int vendor_id;
     unsigned int type;
     unsigned int family;
     unsigned int model;
@@ -537,16 +544,48 @@ cpu_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx,
                  : : "memory");
 }
 
+/*
+ * Implies a compiler barrier.
+ */
 static __always_inline void
 cpu_get_msr(uint32_t msr, uint32_t *high, uint32_t *low)
 {
-    asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr));
+    asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr) : "memory");
 }
 
-static __always_inline void
+/*
+ * Implies a compiler barrier.
+ */
+static __always_inline uint64_t
+cpu_get_msr64(uint32_t msr)
+{
+    uint32_t high, low;
+
+    cpu_get_msr(msr, &high, &low);
+    return (((uint64_t)high << 32) | low);
+}
+
+/*
+ * Implies a full memory barrier.
+ */
+static inline void
 cpu_set_msr(uint32_t msr, uint32_t high, uint32_t low)
 {
-    asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high));
+    asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high) : "memory");
+}
+
+/*
+ * Implies a full memory barrier.
+ */
+static inline void
+cpu_set_msr64(uint32_t msr, uint64_t value)
+{
+    uint32_t low, high;
+
+    low = value & 0xffffffff;
+    high = value >> 32;
+
+    cpu_set_msr(msr, high, low);
 }
 
 static __always_inline uint64_t
@@ -607,6 +646,11 @@ cpu_tlb_flush_va(unsigned long va)
 }
 
 /*
+ * Get cpu frequency in Hz.
+ */
+uint64_t cpu_get_freq(void);
+
+/*
  * Busy-wait for a given amount of time, in microseconds.
  */
 void cpu_delay(unsigned long usecs);
diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c
index 3f6d0c2..d74e676 100644
--- a/arch/x86/machine/lapic.c
+++ b/arch/x86/machine/lapic.c
@@ -25,6 +25,7 @@
 #include <kern/log.h>
 #include <kern/macros.h>
 #include <kern/panic.h>
+#include <kern/perfmon.h>
 #include <machine/cpu.h>
 #include <machine/lapic.h>
 #include <machine/pmap.h>
@@ -159,7 +160,7 @@ struct lapic_map {
     struct lapic_register icr_high;
     struct lapic_register lvt_timer;
     const struct lapic_register reserved14; /* Thermal sensor register */
-    const struct lapic_register reserved15; /* Performance counters register */
+    struct lapic_register lvt_pmc; /* Performance counters register */
     struct lapic_register lvt_lint0;
     struct lapic_register lvt_lint1;
     struct lapic_register lvt_error;
@@ -239,6 +240,7 @@ lapic_setup_registers(void)
     lapic_write(&lapic_map->lvt_error, TRAP_LAPIC_ERROR);
     lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1);
     lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ);
+    lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF);
 }
 
 void __init
@@ -334,6 +336,21 @@ lapic_ipi_broadcast(uint32_t vector)
 }
 
 void
+lapic_pmc_of_intr(struct trap_frame *frame)
+{
+    (void)frame;
+
+#ifdef CONFIG_PERFMON
+    perfmon_of_intr();
+
+    /* Resets the interupt as it is auto cleared when it fires */
+    lapic_write(&lapic_map->lvt_pmc, TRAP_LAPIC_PMC_OF);
+#endif
+
+    lapic_eoi();
+}
+
+void
 lapic_timer_intr(struct trap_frame *frame)
 {
     (void)frame;
diff --git a/arch/x86/machine/lapic.h b/arch/x86/machine/lapic.h
index 6355da4..4b8385c 100644
--- a/arch/x86/machine/lapic.h
+++ b/arch/x86/machine/lapic.h
@@ -54,6 +54,7 @@ void lapic_ipi_broadcast(uint32_t vector);
 /*
  * Interrupt handlers.
  */
+void lapic_pmc_of_intr(struct trap_frame *frame);
 void lapic_timer_intr(struct trap_frame *frame);
 void lapic_error_intr(struct trap_frame *frame);
 void lapic_spurious_intr(struct trap_frame *frame);
diff --git a/arch/x86/machine/pmu.h b/arch/x86/machine/pmu.h
new file mode 100644
index 0000000..009aac5
--- /dev/null
+++ b/arch/x86/machine/pmu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2018 Remy Noel.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Pmu driver modules.
+ */
+
+#ifndef X86_PMU_H
+#define X86_PMU_H
+
+#include <kern/init.h>
+
+/*
+ * PMU init modules
+ *  - module fully initialized
+ */
+INIT_OP_DECLARE(pmu_intel_setup);
+INIT_OP_DECLARE(pmu_amd_setup);
+
+#endif /* X86_PMU_H */
diff --git a/arch/x86/machine/pmu_amd.c b/arch/x86/machine/pmu_amd.c
new file mode 100644
index 0000000..8e56bfa
--- /dev/null
+++ b/arch/x86/machine/pmu_amd.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/clock.h>
+#include <kern/log.h>
+#include <kern/perfmon.h>
+#include <machine/cpu.h>
+#include <machine/pmu.h>
+
+/*
+ * AMD raw event IDs.
+ */
+#define PMU_AMD_RE_CYCLE            0
+#define PMU_AMD_RE_INSTRUCTION      1
+#define PMU_AMD_RE_CACHE_REF        2
+#define PMU_AMD_RE_CACHE_MISS       3
+#define PMU_AMD_RE_BRANCH           4
+#define PMU_AMD_RE_BRANCH_MISS      5
+#define PMU_AMD_RE_DCACHE_REF       6
+#define PMU_AMD_RE_DCACHE_MISS      7
+#define PMU_AMD_RE_IFETCH_STALL     8
+#define PMU_AMD_RE_INVALID          ((unsigned int)-1)
+
+/*
+ * PMU MSR addresses
+ */
+#define PMU_AMD_MSR_PERFEVTSEL0 0xc0010000
+#define PMU_AMD_MSR_PERCTR0     0xc0010004
+
+/*
+ * Event Select Register addresses
+ */
+#define PMU_AMD_EVTSEL_USR  0x00010000
+#define PMU_AMD_EVTSEL_OS   0x00020000
+#define PMU_AMD_EVTSEL_INT  0x00100000
+#define PMU_AMD_EVTSEL_EN   0x00400000
+
+/*
+ * XXX These properties have the minimum values required by the architecture.
+ * TODO Per-family/model event availability database.
+ */
+#define PMU_AMD_NR_PMCS     4
+#define PMU_AMD_PMC_WIDTH   48
+
+/*
+ * Global PMU properties.
+ *
+ * The bitmap is used to implement counter allocation, where each bit denotes
+ * whether a counter is available or not.
+ */
+struct pmu_amd {
+    unsigned int pmc_bm;
+};
+
+static struct pmu_amd pmu_amd;
+
+struct pmu_amd_event_code {
+    unsigned short event_select;
+    unsigned short umask;
+};
+
+/*
+ * TODO Per-family/model event availability database.
+ */
+static const struct pmu_amd_event_code pmu_amd_event_codes[] = {
+    [PMU_AMD_RE_CYCLE]          = { 0x76, 0x00 },
+    [PMU_AMD_RE_INSTRUCTION]    = { 0xc0, 0x00 },
+    [PMU_AMD_RE_CACHE_REF]      = { 0x80, 0x00 },
+    [PMU_AMD_RE_CACHE_MISS]     = { 0x81, 0x00 },
+    [PMU_AMD_RE_BRANCH]         = { 0xc2, 0x00 },
+    [PMU_AMD_RE_BRANCH_MISS]    = { 0xc3, 0x00 },
+    [PMU_AMD_RE_DCACHE_REF]     = { 0x40, 0x00 },
+    [PMU_AMD_RE_DCACHE_MISS]    = { 0x41, 0x00 },
+    [PMU_AMD_RE_IFETCH_STALL]   = { 0x87, 0x00 },
+};
+
+static const unsigned int pmu_amd_generic_events[] = {
+    [PERFMON_EV_CYCLE]          = PMU_AMD_RE_CYCLE,
+    [PERFMON_EV_REF_CYCLE]      = PMU_AMD_RE_INVALID,
+    [PERFMON_EV_INSTRUCTION]    = PMU_AMD_RE_INSTRUCTION,
+    [PERFMON_EV_CACHE_REF]      = PMU_AMD_RE_CACHE_REF,
+    [PERFMON_EV_CACHE_MISS]     = PMU_AMD_RE_CACHE_MISS,
+    [PERFMON_EV_BRANCH]         = PMU_AMD_RE_BRANCH,
+    [PERFMON_EV_BRANCH_MISS]    = PMU_AMD_RE_BRANCH_MISS,
+};
+
+static struct pmu_amd *
+pmu_amd_get(void)
+{
+    return &pmu_amd;
+}
+
+static void
+pmu_amd_info(void)
+{
+    log_info("pmu: driver: amd, nr_pmcs: %u, pmc_width: %u\n",
+             PMU_AMD_NR_PMCS, PMU_AMD_PMC_WIDTH);
+}
+
+static int
+pmu_amd_translate(unsigned int *raw_event_idp, unsigned int event_id)
+{
+    assert(event_id < ARRAY_SIZE(pmu_amd_generic_events));
+
+    *raw_event_idp = pmu_amd_generic_events[event_id];
+    return 0;
+}
+
+static int
+pmu_amd_alloc(unsigned int *pmc_idp, unsigned int raw_event_id)
+{
+    struct pmu_amd *pmu;
+    unsigned int pmc_id;
+
+    /* TODO Per-family/model event availability database */
+    (void)raw_event_id;
+
+    pmu = pmu_amd_get();
+
+    if (pmu->pmc_bm == 0) {
+        return EAGAIN;
+    }
+
+    pmc_id = __builtin_ffs(pmu->pmc_bm) - 1;
+    pmu->pmc_bm &= ~(1U << pmc_id);
+    *pmc_idp = pmc_id;
+
+    return 0;
+}
+
+static void
+pmu_amd_free(unsigned int pmc_id)
+{
+    struct pmu_amd *pmu;
+    unsigned int mask;
+
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    pmu = pmu_amd_get();
+    mask = (1U << pmc_id);
+    assert(!(pmu->pmc_bm & mask));
+    pmu->pmc_bm |= mask;
+}
+
+static void
+pmu_amd_start(unsigned int pmc_id, unsigned int raw_event_id)
+{
+    const struct pmu_amd_event_code *code;
+    uint32_t high, low;
+
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+    assert(raw_event_id < ARRAY_SIZE(pmu_amd_event_codes));
+
+    code = &pmu_amd_event_codes[raw_event_id];
+
+    /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */
+    high = code->event_select >> 8;
+    low = PMU_AMD_EVTSEL_EN
+          | PMU_AMD_EVTSEL_OS
+          | PMU_AMD_EVTSEL_USR
+          | (code->umask << 8)
+          | (code->event_select & 0xff);
+    cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, high, low);
+}
+
+static void
+pmu_amd_stop(unsigned int pmc_id)
+{
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    cpu_set_msr(PMU_AMD_MSR_PERFEVTSEL0 + pmc_id, 0, 0);
+}
+
+static uint64_t
+pmu_amd_read(unsigned int pmc_id)
+{
+    assert(pmc_id < PMU_AMD_NR_PMCS);
+
+    return cpu_get_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id);
+}
+
+static void
+pmu_amd_write(unsigned int pmc_id, uint64_t value)
+{
+    cpu_set_msr64(PMU_AMD_MSR_PERCTR0 + pmc_id, value);
+}
+
+static int __init
+pmu_amd_setup(void)
+{
+    const struct cpu *cpu;
+    struct pmu_amd *pmu;
+    struct perfmon_pmu_driver pmu_driver;
+
+    cpu = cpu_current();
+
+    if (cpu->vendor_id != CPU_VENDOR_AMD) {
+        return 0;
+    }
+
+    /* Support AMD Family 10h processors and later */
+    if (cpu->family < 16) {
+        return ENODEV;
+    }
+
+    pmu = pmu_amd_get();
+    pmu->pmc_bm = (1U << PMU_AMD_NR_PMCS) - 1;
+
+    pmu_driver.pmc_width = PMU_AMD_PMC_WIDTH;
+    /* Set max_tick to half the number of instruction per seconds. */
+    pmu_driver.of_max_ticks =
+        (1UL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ);
+
+    pmu_driver.ops.info = pmu_amd_info;
+    pmu_driver.ops.translate = pmu_amd_translate;
+    pmu_driver.ops.alloc = pmu_amd_alloc;
+    pmu_driver.ops.free = pmu_amd_free;
+    pmu_driver.ops.start = pmu_amd_start;
+    pmu_driver.ops.stop = pmu_amd_stop;
+    pmu_driver.ops.read = pmu_amd_read;
+    pmu_driver.ops.write = pmu_amd_write;
+
+    return perfmon_pmu_register(&pmu_driver);
+}
+
+INIT_OP_DEFINE(pmu_amd_setup,
+               INIT_OP_DEP(perfmon_bootstrap, true),
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(log_setup, true));
diff --git a/arch/x86/machine/pmu_intel.c b/arch/x86/machine/pmu_intel.c
new file mode 100644
index 0000000..6fbdbae
--- /dev/null
+++ b/arch/x86/machine/pmu_intel.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/clock.h>
+#include <kern/log.h>
+#include <kern/perfmon.h>
+#include <machine/cpu.h>
+#include <machine/pmu.h>
+
+/*
+ * Intel raw event IDs.
+ */
+#define PMU_INTEL_RE_CYCLE          0
+#define PMU_INTEL_RE_REF_CYCLE      1
+#define PMU_INTEL_RE_INSTRUCTION    2
+#define PMU_INTEL_RE_CACHE_REF      3
+#define PMU_INTEL_RE_CACHE_MISS     4
+#define PMU_INTEL_RE_BRANCH         5
+#define PMU_INTEL_RE_BRANCH_MISS    6
+
+/*
+ * PMU MSR addresses
+ */
+#define PMU_INTEL_MSR_PMC0      0x0c1
+#define PMU_INTEL_MSR_EVTSEL0   0x186
+
+/*
+ * V2 MSR addresses
+ */
+#define PMU_INTEL_MSR_FIXED_CTR0        0x0309
+#define PMU_INTEL_MSR_FIXED_CTR1        0x030a
+#define PMU_INTEL_MSR_FIXED_CTR2        0x030b
+#define PMU_INTEL_MSR_FIXED_CTR_CTRL    0x038d
+#define PMU_INTEL_MSR_GLOBAL_STATUS     0x038e
+#define PMU_INTEL_MSR_GLOBAL_CTRL       0x038f
+#define PMU_INTEL_MSR_GLOBAL_OVF_CTRL   0x0390
+
+/*
+ * Event Select Register addresses
+ */
+#define PMU_INTEL_EVTSEL_USR    0x00010000
+#define PMU_INTEL_EVTSEL_OS     0x00020000
+#define PMU_INTEL_EVTSEL_EDGE   0x00040000
+#define PMU_INTEL_EVTSEL_PC     0x00080000
+#define PMU_INTEL_EVTSEL_INT    0x00100000
+#define PMU_INTEL_EVTSEL_EN     0x00400000
+#define PMU_INTEL_EVTSEL_INV    0x00800000
+
+#define PMU_INTEL_ID_VERSION_MASK       0x000000ff
+#define PMU_INTEL_ID_NR_PMCS_MASK       0x0000ff00
+#define PMU_INTEL_ID_NR_PMCS_OFFSET     8
+#define PMU_INTEL_ID_PMC_WIDTH_MASK     0x00ff0000
+#define PMU_INTEL_ID_PMC_WIDTH_OFFSET   16
+#define PMU_INTEL_ID_EVLEN_MASK         0xff000000
+#define PMU_INTEL_ID_EVLEN_OFFSET       24
+#define PMU_INTEL_ID_EVLEN_MAX          7
+
+/*
+ * Global PMU properties.
+ *
+ * The bitmap is used to implement counter allocation, where each bit denotes
+ * whether a counter is available or not.
+ */
+struct pmu_intel {
+    unsigned int version;
+    unsigned int nr_pmcs;
+    unsigned int pmc_bm;
+    unsigned int pmc_width;
+    unsigned int events;
+};
+
+static struct pmu_intel pmu_intel;
+
+/*
+ * Intel hardware events.
+ */
+#define PMU_INTEL_EVENT_CYCLE          0x01
+#define PMU_INTEL_EVENT_INSTRUCTION    0x02
+#define PMU_INTEL_EVENT_REF_CYCLE      0x04
+#define PMU_INTEL_EVENT_CACHE_REF      0x08
+#define PMU_INTEL_EVENT_CACHE_MISS     0x10
+#define PMU_INTEL_EVENT_BRANCH         0x20
+#define PMU_INTEL_EVENT_BRANCH_MISS    0x40
+
+struct pmu_intel_event_code {
+    unsigned int hw_event_id;
+    unsigned short event_select;
+    unsigned short umask;
+};
+
+static const unsigned int pmu_intel_raw_events[] = {
+    [PERFMON_EV_CYCLE]          = PMU_INTEL_RE_CYCLE,
+    [PERFMON_EV_REF_CYCLE]      = PMU_INTEL_RE_REF_CYCLE,
+    [PERFMON_EV_INSTRUCTION]    = PMU_INTEL_RE_INSTRUCTION,
+    [PERFMON_EV_CACHE_REF]      = PMU_INTEL_RE_CACHE_REF,
+    [PERFMON_EV_CACHE_MISS]     = PMU_INTEL_RE_CACHE_MISS,
+    [PERFMON_EV_BRANCH]         = PMU_INTEL_RE_BRANCH,
+    [PERFMON_EV_BRANCH_MISS]    = PMU_INTEL_RE_BRANCH_MISS,
+};
+
+static const struct pmu_intel_event_code pmu_intel_event_codes[] = {
+    [PMU_INTEL_RE_CYCLE]        = { PMU_INTEL_EVENT_CYCLE,        0x3c, 0x00 },
+    [PMU_INTEL_RE_REF_CYCLE]    = { PMU_INTEL_EVENT_REF_CYCLE,    0x3c, 0x01 },
+    [PMU_INTEL_RE_INSTRUCTION]  = { PMU_INTEL_EVENT_INSTRUCTION,  0xc0, 0x00 },
+    [PMU_INTEL_RE_CACHE_REF]    = { PMU_INTEL_EVENT_CACHE_REF,    0x2e, 0x4f },
+    [PMU_INTEL_RE_CACHE_MISS]   = { PMU_INTEL_EVENT_CACHE_MISS,   0x2e, 0x41 },
+    [PMU_INTEL_RE_BRANCH]       = { PMU_INTEL_EVENT_BRANCH,       0xc4, 0x00 },
+    [PMU_INTEL_RE_BRANCH_MISS]  = { PMU_INTEL_EVENT_BRANCH_MISS,  0xc5, 0x00 },
+};
+
+static struct pmu_intel *
+pmu_intel_get(void)
+{
+    return &pmu_intel;
+}
+
+static uint64_t
+pmu_intel_get_status(void)
+{
+    return cpu_get_msr64(PMU_INTEL_MSR_GLOBAL_STATUS);
+}
+
+static void
+pmu_intel_ack_status(uint64_t status)
+{
+    return cpu_set_msr64(PMU_INTEL_MSR_GLOBAL_OVF_CTRL, status);
+}
+
+/*
+ * TODO use the compiler built-in once libgcc is linked again.
+ */
+static unsigned int
+pmu_popcount(unsigned int bits)
+{
+    unsigned int count;
+
+    count = 0;
+
+    while (bits) {
+        if (bits & 1) {
+            count++;
+        }
+
+        bits >>= 1;
+    }
+
+    return count;
+}
+
+static void
+pmu_intel_info(void)
+{
+    const struct pmu_intel *pmu;
+    unsigned int nr_events;
+
+    pmu = pmu_intel_get();
+    nr_events = pmu_popcount(pmu->events);
+    log_info("pmu: driver: intel, architectural v%d "
+             "pmu: nr_pmcs: %u, pmc_width: %u, events: %#x, nr_events: %u\n",
+             pmu->version, pmu->nr_pmcs, pmu->pmc_width, pmu->events,
+             nr_events);
+}
+
+static int
+pmu_intel_translate(unsigned int *raw_event_idp, unsigned event_id)
+{
+    if (event_id >= ARRAY_SIZE(pmu_intel_raw_events)) {
+        return EINVAL;
+    }
+
+    *raw_event_idp = pmu_intel_raw_events[event_id];
+    return 0;
+}
+
+static int
+pmu_intel_alloc(unsigned int *pmc_idp, unsigned int raw_event_id)
+{
+    struct pmu_intel *pmu;
+    unsigned int pmc_id;
+    unsigned int hw_event_id;
+
+    assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes));
+
+    pmu = pmu_intel_get();
+    hw_event_id = pmu_intel_event_codes[raw_event_id].hw_event_id;
+
+    if (!(pmu->events & hw_event_id)) {
+        return EINVAL;
+    }
+
+    if (pmu->pmc_bm == 0) {
+        return EAGAIN;
+    }
+
+    pmc_id = __builtin_ffs(pmu->pmc_bm) - 1;
+    pmu->pmc_bm &= ~(1U << pmc_id);
+    *pmc_idp = pmc_id;
+    return 0;
+}
+
+static void
+pmu_intel_free(unsigned int pmc_id)
+{
+    struct pmu_intel *pmu;
+    unsigned int mask;
+
+    pmu = pmu_intel_get();
+    mask = (1U << pmc_id);
+    assert(!(pmu->pmc_bm & mask));
+    pmu->pmc_bm |= mask;
+}
+
+static void
+pmu_intel_start(unsigned int pmc_id, unsigned int raw_event_id)
+{
+    const struct pmu_intel_event_code *code;
+    struct pmu_intel *pmu;
+    uint32_t evtsel;
+
+    assert(raw_event_id < ARRAY_SIZE(pmu_intel_event_codes));
+
+    code = &pmu_intel_event_codes[raw_event_id];
+    pmu = pmu_intel_get();
+
+    /* TODO Handle PERFMON_EF_KERN/PERFMON_EF_USER */
+    evtsel = PMU_INTEL_EVTSEL_EN
+             | PMU_INTEL_EVTSEL_OS
+             | PMU_INTEL_EVTSEL_USR
+             | (code->umask << 8)
+             | code->event_select;
+    if (pmu->version >= 2) {
+        evtsel |= PMU_INTEL_EVTSEL_INT;
+    }
+    cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, evtsel);
+}
+
+static void
+pmu_intel_stop(unsigned int pmc_id)
+{
+    cpu_set_msr(PMU_INTEL_MSR_EVTSEL0 + pmc_id, 0, 0);
+}
+
+static uint64_t
+pmu_intel_read(unsigned int pmc_id)
+{
+    return cpu_get_msr64(PMU_INTEL_MSR_PMC0 + pmc_id);
+}
+
+static void
+pmu_intel_write(unsigned int pmc_id, uint64_t value)
+{
+    cpu_set_msr64(PMU_INTEL_MSR_PMC0 + pmc_id, value);
+}
+
+static int
+pmu_intel_consume_bits(uint64_t *bits)
+{
+    int bit;
+
+    bit = __builtin_ffsll(*bits) - 1;
+
+    if (bit < 0) {
+        return bit;
+    }
+
+    *bits &= ~(1U << bit);
+    return bit;
+}
+
+static void
+pmu_intel_handle_of_intr_v2(void)
+{
+    struct pmu_intel *pmu;
+    uint64_t status;
+    int pmc_id;
+
+    status = pmu_intel_get_status();
+
+    if (status == 0) {
+        return;
+    }
+
+    pmu_intel_ack_status(status);
+    pmu = pmu_intel_get();
+
+    status &= ((1ULL << pmu->pmc_width) - 1);
+
+    for (;;) {
+        pmc_id = pmu_intel_consume_bits(&status);
+
+        if (pmc_id < 0) {
+            break;
+        }
+
+        perfmon_cpu_on_pmc_of(pmc_id);
+    }
+}
+
+static int __init
+pmu_intel_setup(void)
+{
+    const struct cpu *cpu;
+    struct pmu_intel *pmu;
+    struct perfmon_pmu_driver pmu_driver;
+    unsigned int eax, ebx, ecx, edx, ev_len;
+
+    cpu = cpu_current();
+    eax = 0xa;
+
+    if (cpu->vendor_id != CPU_VENDOR_INTEL) {
+        return 0;
+    }
+
+    if (cpu->cpuid_max_basic < eax) {
+        return ENODEV;
+    }
+
+    pmu = pmu_intel_get();
+    cpu_cpuid(&eax, &ebx, &ecx, &edx);
+    pmu->version = eax & PMU_INTEL_ID_VERSION_MASK;
+
+    if (pmu->version == 0) {
+        return ENODEV;
+    }
+
+    pmu->nr_pmcs = (eax & PMU_INTEL_ID_NR_PMCS_MASK)
+                   >> PMU_INTEL_ID_NR_PMCS_OFFSET;
+    pmu->pmc_bm = (1U << pmu->nr_pmcs ) - 1;
+    pmu->pmc_width = (eax & PMU_INTEL_ID_PMC_WIDTH_MASK)
+                     >> PMU_INTEL_ID_PMC_WIDTH_OFFSET;
+    ev_len = (eax & PMU_INTEL_ID_EVLEN_MASK) >> PMU_INTEL_ID_EVLEN_OFFSET;
+
+    assert(ev_len <= PMU_INTEL_ID_EVLEN_MAX);
+
+    pmu->events = ~ebx & ((1U << ev_len) - 1);
+
+    pmu_driver.pmc_width = pmu->pmc_width;
+    pmu_driver.ops.info = pmu_intel_info;
+    pmu_driver.ops.translate = pmu_intel_translate;
+    pmu_driver.ops.alloc = pmu_intel_alloc;
+    pmu_driver.ops.free = pmu_intel_free;
+    pmu_driver.ops.start = pmu_intel_start;
+    pmu_driver.ops.stop = pmu_intel_stop;
+    pmu_driver.ops.read = pmu_intel_read;
+    pmu_driver.ops.write = pmu_intel_write;
+
+    if (pmu->version >= 2) {
+        pmu_driver.ops.handle_of_intr = pmu_intel_handle_of_intr_v2;
+        pmu_driver.of_max_ticks = 0;
+    } else {
+        /* Set max_tick to half the number of instruction per seconds. */
+        pmu_driver.ops.handle_of_intr = NULL;
+        pmu_driver.of_max_ticks =
+            (1ULL << (pmu_driver.pmc_width - 1)) / (cpu_get_freq() / CLOCK_FREQ);
+    }
+
+    return perfmon_pmu_register(&pmu_driver);
+}
+
+INIT_OP_DEFINE(pmu_intel_setup,
+               INIT_OP_DEP(perfmon_bootstrap, true),
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(log_setup, true));
diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c
index 534b3f6..aaf49ea 100644
--- a/arch/x86/machine/trap.c
+++ b/arch/x86/machine/trap.c
@@ -210,9 +210,11 @@ trap_setup(void)
     trap_install(TRAP_XCALL, TRAP_HF_INTR, cpu_xcall_intr);
     trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_INTR, cpu_thread_schedule_intr);
     trap_install(TRAP_CPU_HALT, TRAP_HF_INTR, cpu_halt_intr);
+    trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, trap_default);
     trap_install(TRAP_LAPIC_TIMER, TRAP_HF_INTR, lapic_timer_intr);
     trap_install(TRAP_LAPIC_ERROR, TRAP_HF_INTR, lapic_error_intr);
     trap_install(TRAP_LAPIC_SPURIOUS, TRAP_HF_INTR, lapic_spurious_intr);
+    trap_install(TRAP_LAPIC_PMC_OF, TRAP_HF_INTR, lapic_pmc_of_intr);
 
     return 0;
 }
diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h
index af6fd6b..c5bdc1f 100644
--- a/arch/x86/machine/trap.h
+++ b/arch/x86/machine/trap.h
@@ -62,6 +62,7 @@
 #define TRAP_XCALL              238
 #define TRAP_THREAD_SCHEDULE    239
 #define TRAP_CPU_HALT           240
+#define TRAP_LAPIC_PMC_OF       252
 #define TRAP_LAPIC_TIMER        253
 #define TRAP_LAPIC_ERROR        254
 #define TRAP_LAPIC_SPURIOUS     255
diff --git a/kern/Kconfig b/kern/Kconfig
index fced67c..5e0e5eb 100644
--- a/kern/Kconfig
+++ b/kern/Kconfig
@@ -94,6 +94,12 @@ config THREAD_STACK_GUARD
 
 	  If unsure, disable.
 
+config PERFMON
+	bool "Performances monitoring counters"
+	---help---
+	  Enable the performance monitoring counters (perfmon API). While in use,
+	  it might lengthen threads scheduling critical section a bit.
+
 endmenu
 
 menu "Debugging"
diff --git a/kern/Makefile b/kern/Makefile
index ab7d6b5..5b04fcb 100644
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -41,3 +41,5 @@ x15_SOURCES-$(CONFIG_SHELL) += kern/shell.c
 
 x15_SOURCES-$(CONFIG_MUTEX_ADAPTIVE) += kern/mutex/mutex_adaptive.c
 x15_SOURCES-$(CONFIG_MUTEX_PLAIN) += kern/mutex/mutex_plain.c
+
+x15_SOURCES-$(CONFIG_PERFMON) += kern/perfmon.c
diff --git a/kern/perfmon.c b/kern/perfmon.c
new file mode 100644
index 0000000..c910069
--- /dev/null
+++ b/kern/perfmon.c
@@ -0,0 +1,1388 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014-2015 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The perfomance monitoring modules allows to manage performance monitoring as
+ * event groups. Each physical performance monitoring counter (pmc) may be
+ * referenced by perfmon events, which are theelves groupped in perfmon groups.
+ * Groups can then be attached to either threads or cpus into perfmon
+ * grouplists.
+ *
+ * In order to guarantee that thread relocation, is properly handled, events
+ * types are reseved on perfomance monitoring units (pmu) for all cpus for every
+ * event of a group when it is attached. Therefore a group attach may fail if no
+ * compatible pmc is available globally.
+ *
+ * Locking order : interrupts -> thread runq -> grouplist -> group
+ *
+ * TODO API to differenciate user and kernel events.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/error.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/log.h>
+#include <kern/macros.h>
+#include <kern/panic.h>
+#include <kern/percpu.h>
+#include <kern/perfmon.h>
+#include <kern/perfmon_i.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/xcall.h>
+#include <machine/cpu.h>
+#include <machine/pmu.h>
+
+/*
+ * Performance monitoring event.
+ *
+ * When a group is attached, each of its events is associated to a PMC,
+ * adding a reference in the process.
+ */
+struct perfmon_event {
+    uint64_t count;
+    uint64_t prev;
+    uint64_t overflow_id;
+    struct list node;
+    int flags;
+    unsigned int type;
+    unsigned int id;
+    unsigned int pmc_index;
+#ifdef CONFIG_PERFMON_TEST
+    uint64_t value;
+    bool set_value;
+#endif
+};
+
+#define PERFMON_INVALID_CPU ((unsigned int)-1)
+
+/*
+ * Group types.
+ */
+#define PERFMON_GT_UNKNOWN  0
+#define PERFMON_GT_CPU      1
+#define PERFMON_GT_THREAD   2
+
+/*
+ * Group States flags.
+ */
+#define PERFMON_GF_ATTACHED         1
+#define PERFMON_GF_ENABLED          2
+#define PERFMON_GF_LOADED           4
+#define PERFMON_GF_PENDING_DISABLE  8
+
+/*
+ * Group possible states are handled through the flags attribute.
+ *  - A group can either be unattached or attached(to a thread or a cpu)
+ *  - An attached group may be enabled or not.
+ *  - An enabled group may be loaded or not e.g. have actual running
+ * performance counters on a CPU.
+ *
+ * When a group is attached, some ressources are reserved for it so it can be
+ * monitored at any time.
+ * When a group is enabled it will get loaded when needed:
+ *   -Cpu groups stay loaded as long as they are enabled.
+ *   -Thread groups are loaded when running.
+ * When a group is loaded its performance counters are currently enabled.
+ *
+ * The PENDING_DISABLE is here so that a remote thread can be disabled when it
+ * unscedule itself.
+ *
+ * Note that a non-attached group can only be referenced from the api. Since
+ * manipulating the same group from different threads as the same time is not
+ * supported, the code does not bother to lock it when the group is assumed
+ * un-attached.
+ *
+ * About thread-type group counters synchronization:
+ *      - The groups are synchronized when their thread is unscheduled which
+ *        means their counter value is updated and pending counter changes
+ *        (like unloading) are performed.
+ *      - Since all operations requires the group to be locked, it is mandatory
+ *        to unlock the group before xcalling any remote operation in order to
+ *        avoid a deadlock.
+ *      - Any remote thread operation that gets executed after the thread got
+ *        unscheduled will have nothing to do if the current thread is not the
+ *        target one since the target thread have been unloaded inbetween.
+ */
+struct perfmon_group {
+    struct list node;
+    struct list events;
+    struct thread *thread;
+    struct spinlock lock;
+    unsigned int cpu;
+    short flags;
+    unsigned short type;
+};
+
+/*
+ * List of all groups attached to a single monitored object, either a CPU
+ * or a thread.
+ */
+struct perfmon_grouplist {
+    struct list groups;
+    struct spinlock lock;
+};
+
+/*
+ * Maximum number of supported hardware counters.
+ */
+#define PERFMON_MAX_PMCS 64
+
+/*
+ * Performance monitoring counter.
+ *
+ * When a PMC is valid, it maps a raw event to a hardware counter.
+ * A PMC is valid if and only if its reference count isn't zero.
+ */
+struct perfmon_pmc {
+    unsigned int nr_refs;
+    unsigned int raw_event_id;
+    unsigned int id;
+};
+
+/*
+ * Performance monitoring unit.
+ *
+ * There is a single system-wide logical PMU, used to globally allocate
+ * PMCs. Reserving a counter across the entire system ensures thread
+ * migration isn't hindered by performance monitoring.
+ */
+struct perfmon_pmu {
+    struct spinlock lock;
+    unsigned int nr_pmcs;
+    struct perfmon_pmc pmcs[PERFMON_MAX_PMCS];
+};
+
+/*
+ * Per-CPU performance monitoring counter.
+ *
+ * When a group is loaded on a processor, the per-CPU PMCs of its events
+ * get referenced. When a per-CPU PMC is referenced, its underlying hardware
+ * counter is active.
+ *
+ * Preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmc {
+    unsigned int nr_refs;
+    uint64_t prev_value;
+    uint64_t overflow_id;
+};
+
+/*
+ * Per-CPU performance monitoring unit.
+ *
+ * The per-CPU PMCs are indexed the same way as the global PMCs.
+ *
+ * Preemption must be disabled on access.
+ */
+struct perfmon_cpu_pmu {
+    struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS];
+    struct timer of_timer;
+    unsigned int cpu_id;
+};
+
+static struct perfmon_pmu_driver pmu_driver __read_mostly;
+
+static struct perfmon_pmu perfmon_pmu;
+static unsigned int perfmon_pmc_id_to_index[PERFMON_MAX_PMCS];
+
+static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu;
+
+/*
+ * Cache of thread-specific group lists.
+ */
+static struct kmem_cache perfmon_grouplist_cache;
+
+/*
+ * CPU specific group lists.
+ */
+static struct perfmon_grouplist *perfmon_cpu_grouplist  __percpu;
+
+static inline int
+perfmon_translate(unsigned int *raw_event_idp, unsigned int event_type,
+                  unsigned int event_id)
+{
+    switch (event_type) {
+    case PERFMON_ET_RAW:
+        *raw_event_idp = event_id;
+        return 0;
+    case PERFMON_ET_GENERIC:
+        return pmu_driver.ops.translate(raw_event_idp, event_id);
+    default:
+        panic("perfmon: unsupported event type");
+    }
+}
+
+static int
+perfmon_pmc_alloc(struct perfmon_pmc **pmcp, unsigned int raw_event_id)
+{
+    struct perfmon_pmc *pmc;
+    size_t i;
+    int error;
+
+    if (perfmon_pmu.nr_pmcs == ARRAY_SIZE(perfmon_pmu.pmcs)) {
+        return EAGAIN;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+        pmc = &perfmon_pmu.pmcs[i];
+
+        if (pmc->nr_refs == 0) {
+            break;
+        }
+    }
+
+    assert(i < ARRAY_SIZE(perfmon_pmu.pmcs));
+    error = pmu_driver.ops.alloc(&pmc->id, raw_event_id);
+
+    if (error) {
+        return error;
+    }
+
+    pmc->raw_event_id = raw_event_id;
+    perfmon_pmu.nr_pmcs++;
+    *pmcp = pmc;
+
+    return 0;
+}
+
+static struct perfmon_pmc *
+perfmon_pmc_lookup(unsigned int raw_event_id)
+{
+    struct perfmon_pmc *pmc;
+    size_t i;
+
+    if (perfmon_pmu.nr_pmcs == 0) {
+        return NULL;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+        pmc = &perfmon_pmu.pmcs[i];
+
+        if ((pmc->nr_refs != 0) && (pmc->raw_event_id == raw_event_id)) {
+            return pmc;
+        }
+    }
+
+    return NULL;
+}
+
+static inline unsigned int
+perfmon_pmc_index(const struct perfmon_pmc *pmc)
+{
+    unsigned int index;
+
+    index = pmc - perfmon_pmu.pmcs;
+    assert(index < ARRAY_SIZE(perfmon_pmu.pmcs));
+    return index;
+}
+
+/*
+ * Obtain a reference on a PMC for the given event.
+ *
+ * If there is no existing PMC suitable for this event, allocate one.
+ */
+static int
+perfmon_pmc_get(struct perfmon_pmc **pmcp, const struct perfmon_event *event)
+{
+    struct perfmon_pmc *pmc;
+    unsigned int raw_event_id;
+    unsigned int pmc_index;
+    int error;
+
+    error = perfmon_translate(&raw_event_id, event->type, event->id);
+
+    if (error) {
+        return error;
+    }
+
+    spinlock_lock(&perfmon_pmu.lock);
+
+    pmc = perfmon_pmc_lookup(raw_event_id);
+
+    if (pmc == NULL) {
+        error = perfmon_pmc_alloc(&pmc, raw_event_id);
+
+        if (error) {
+            goto out;
+        }
+        pmc_index = perfmon_pmc_index(pmc);
+        assert(perfmon_pmc_id_to_index[pmc->id] == UINT32_MAX);
+        perfmon_pmc_id_to_index[pmc->id] = pmc_index;
+    }
+
+    pmc->nr_refs++;
+
+out:
+    spinlock_unlock(&perfmon_pmu.lock);
+
+    if (error) {
+        return error;
+    }
+    *pmcp = pmc;
+
+    return 0;
+}
+
+/*
+ * Release a reference on a PMC.
+ */
+static void
+perfmon_pmc_put(struct perfmon_pmc *pmc)
+{
+    spinlock_lock(&perfmon_pmu.lock);
+
+    assert(pmc->nr_refs != 0);
+    pmc->nr_refs--;
+
+    if (pmc->nr_refs == 0) {
+        pmu_driver.ops.free(pmc->id);
+        assert(perfmon_pmc_id_to_index[pmc->id] != UINT32_MAX);
+        perfmon_pmc_id_to_index[pmc->id] = UINT32_MAX;
+    }
+
+    spinlock_unlock(&perfmon_pmu.lock);
+}
+
+static inline struct perfmon_pmc *
+perfmon_pmc_from_index(unsigned int index)
+{
+    assert(index < ARRAY_SIZE(perfmon_pmu.pmcs));
+    return &perfmon_pmu.pmcs[index];
+}
+
+static void
+perfmon_grouplist_ctor(void *arg)
+{
+    struct perfmon_grouplist *grouplist;
+
+    grouplist = arg;
+
+    list_init(&grouplist->groups);
+    spinlock_init(&grouplist->lock);
+}
+
+static struct perfmon_grouplist *
+perfmon_grouplist_create(void)
+{
+    return kmem_cache_alloc(&perfmon_grouplist_cache);
+}
+
+static void
+perfmon_grouplist_destroy(struct perfmon_grouplist *grouplist)
+{
+    kmem_cache_free(&perfmon_grouplist_cache, grouplist);
+}
+
+static void perfmon_check_of(struct timer *timer);
+
+static void __init
+perfmon_cpu_pmu_init(unsigned int cpuid)
+{
+    unsigned int i;
+    struct perfmon_cpu_pmu *cpu_pmu;
+
+    cpu_pmu = percpu_ptr(perfmon_cpu_pmu, cpuid);
+    cpu_pmu->cpu_id = cpuid;
+    if (!pmu_driver.ops.handle_of_intr) {
+        /* XXX: using high prio instead or INTR because we might xcall from the
+         * callbacks.
+         */
+        timer_init(&cpu_pmu->of_timer, &perfmon_check_of, TIMER_HIGH_PRIO);
+        timer_schedule(&cpu_pmu->of_timer, pmu_driver.of_max_ticks);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(cpu_pmu->pmcs); i++) {
+        struct perfmon_cpu_pmc *pmc;
+
+        pmc = &cpu_pmu->pmcs[i];
+
+        pmc->nr_refs = 0;
+        pmc->overflow_id = 0;
+
+    }
+}
+
+static struct perfmon_cpu_pmc *
+perfmon_cpu_pmu_get_pmc_from_id(unsigned int pmc_id)
+{
+    unsigned int pmc_index;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    assert(perfmon_pmc_id_to_index[pmc_id] != UINT32_MAX);
+    pmc_index = perfmon_pmc_id_to_index[pmc_id];
+
+    /* TODO: this may be called many times in a row. We may want to have it
+     * passed to the function.
+     */
+    cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+    cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+
+    assert(cpu_pmc->nr_refs != 0);
+
+    return cpu_pmc;
+}
+
+void
+perfmon_cpu_on_pmc_of(unsigned int pmc_id)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    cpu_pmc = perfmon_cpu_pmu_get_pmc_from_id(pmc_id);
+    cpu_pmc->overflow_id++;
+}
+
+static void
+perfmon_check_of_remote(void *arg)
+{
+    perfmon_check_of(arg);
+}
+
+static void
+perfmon_check_pmc_of(struct perfmon_cpu_pmc *cpu_pmc, uint64_t value)
+{
+    uint64_t prev;
+
+    prev = cpu_pmc->prev_value;
+    if (prev > value) {
+        /* Overflow */
+        cpu_pmc->overflow_id++;
+    }
+    cpu_pmc->prev_value = value;
+}
+
+static void
+perfmon_check_of(struct timer *timer)
+{
+    struct perfmon_pmc *pmc;
+    struct perfmon_cpu_pmc *cpu_pmc;
+    struct perfmon_cpu_pmu *cpu_pmu;
+    uint64_t value;
+
+    cpu_pmu = structof(timer, struct perfmon_cpu_pmu, of_timer);
+    if (cpu_pmu->cpu_id != cpu_id())
+    {
+        xcall_call(perfmon_check_of_remote, timer, cpu_pmu->cpu_id);
+        return;
+    }
+
+    for (size_t i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+        pmc = perfmon_pmc_from_index(i);
+        if (pmc->nr_refs == 0) {
+            continue;
+        }
+
+        cpu_pmc = &cpu_pmu->pmcs[i];
+        value = pmu_driver.ops.read(pmc->id);
+
+        perfmon_check_pmc_of(cpu_pmc, value);
+    }
+
+    timer_schedule(timer, pmu_driver.of_max_ticks);
+}
+
+static void
+perfmon_cpu_pmu_load(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+
+    if (cpu_pmc->nr_refs == 0) {
+        pmu_driver.ops.start(perfmon_pmu.pmcs[pmc_index].id,
+                             perfmon_pmu.pmcs[pmc_index].raw_event_id);
+        cpu_pmc->prev_value = pmu_driver.ops.read(perfmon_pmu.pmcs[pmc_index].id);
+    }
+
+    cpu_pmc->nr_refs++;
+}
+
+static void
+perfmon_cpu_pmu_unload(struct perfmon_cpu_pmu *cpu_pmu, unsigned int pmc_index)
+{
+    struct perfmon_cpu_pmc *cpu_pmc;
+
+    cpu_pmc = &cpu_pmu->pmcs[pmc_index];
+    assert(cpu_pmc->nr_refs != 0);
+    cpu_pmc->nr_refs--;
+
+    if (cpu_pmc->nr_refs == 0) {
+        pmu_driver.ops.stop(perfmon_pmu.pmcs[pmc_index].id);
+    }
+}
+
+void
+perfmon_of_intr(void)
+{
+    assert(pmu_driver.ops.handle_of_intr);
+    pmu_driver.ops.handle_of_intr();
+}
+
+int
+perfmon_pmu_register(struct perfmon_pmu_driver *driver)
+{
+    struct perfmon_pmu_ops *ops = &driver->ops;
+
+    assert(ops->info && ops->translate && ops->alloc
+           && ops->free && ops->start && ops->stop);
+    assert(!ops->handle_of_intr != !driver->of_max_ticks);
+
+    if (pmu_driver.ops.info) {
+        /* Already initialized */
+        assert(0);
+        return EINVAL;
+    }
+    pmu_driver = *driver;
+
+    return 0;
+}
+
+static int __init
+perfmon_bootstrap(void)
+{
+    kmem_cache_init(&perfmon_grouplist_cache, "perfmon_grouplist",
+                    sizeof(struct perfmon_grouplist), 0,
+                    perfmon_grouplist_ctor, 0);
+
+    return 0;
+}
+
+INIT_OP_DEFINE(perfmon_bootstrap,
+               INIT_OP_DEP(kmem_setup, true));
+
+static int __init
+perfmon_setup(void)
+{
+    struct perfmon_grouplist *grouplist;
+    unsigned int i;
+
+    spinlock_init(&perfmon_pmu.lock);
+    perfmon_pmu.nr_pmcs = 0;
+
+    for (i = 0; i < ARRAY_SIZE(perfmon_pmu.pmcs); i++) {
+        perfmon_pmu.pmcs[i].nr_refs = 0;
+    }
+    for (i = 0; i < ARRAY_SIZE(perfmon_pmc_id_to_index); i++) {
+        perfmon_pmc_id_to_index[i] = UINT32_MAX;
+    }
+
+    for (i = 0; i < cpu_count(); i++) {
+        perfmon_cpu_pmu_init(i);
+    }
+
+    for (i = 0; i < cpu_count(); i++) {
+        grouplist = perfmon_grouplist_create();
+
+        if (grouplist == NULL) {
+            panic("perfmon: unable to create cpu grouplists");
+        }
+
+        percpu_var(perfmon_cpu_grouplist, i) = grouplist;
+    }
+
+    if (!pmu_driver.ops.info) {
+        log_err("unable to start perfmon: no compatible pmu driver available");
+        return ENODEV;
+    }
+    pmu_driver.ops.info();
+
+    return 0;
+}
+
+INIT_OP_DEFINE(perfmon_setup,
+               INIT_OP_DEP(cpu_setup, true),
+               INIT_OP_DEP(kmem_setup, true),
+               INIT_OP_DEP(panic_setup, true),
+               INIT_OP_DEP(percpu_setup, true),
+               INIT_OP_DEP(perfmon_bootstrap, true),
+               INIT_OP_DEP(pmu_amd_setup, false),
+               INIT_OP_DEP(pmu_intel_setup, false),
+               INIT_OP_DEP(spinlock_setup, true),
+               INIT_OP_DEP(thread_setup, true),
+               INIT_OP_DEP(trap_setup, true));
+
+static void
+perfmon_check_event_args(unsigned int type, unsigned int id, int flags)
+{
+    (void)type;
+    (void)id;
+    (void)flags;
+    assert((type == PERFMON_ET_RAW) || (type == PERFMON_ET_GENERIC));
+    assert((type != PERFMON_ET_GENERIC) || (id < PERFMON_NR_GENERIC_EVENTS));
+    assert((flags & PERFMON_EF_MASK) == flags);
+    assert((flags & (PERFMON_EF_KERN | PERFMON_EF_USER)));
+}
+
+int
+perfmon_event_create(struct perfmon_event **eventp, unsigned int type,
+                     unsigned int id, int flags)
+{
+    struct perfmon_event *event;
+
+    perfmon_check_event_args(type, id, flags);
+
+    event = kmem_alloc(sizeof(*event));
+
+    if (event == NULL) {
+        return ENOMEM;
+    }
+
+    event->count = 0;
+    list_node_init(&event->node);
+    event->flags = flags;
+    event->type = type;
+    event->id = id;
+    *eventp = event;
+    return 0;
+}
+
+void
+perfmon_event_destroy(struct perfmon_event *event)
+{
+    kmem_free(event, sizeof(*event));
+}
+
+uint64_t
+perfmon_event_read(const struct perfmon_event *event)
+{
+    return event->count;
+}
+
+#ifdef CONFIG_PERFMON_TEST
+
+int
+perfmon_event_write(struct perfmon_event *event, uint64_t value)
+{
+    if (!pmu_driver.ops.write) {
+        return ENODEV;
+    }
+    event->value = value;
+    event->set_value = true;
+
+    return 0;
+}
+
+int
+perfmon_get_pmc_width(void)
+{
+    return pmu_driver.pmc_width;
+}
+
+#endif /* CONFIG_PERFMON_TEST */
+
+void
+perfmon_event_reset(struct perfmon_event *event)
+{
+    event->count = 0;
+}
+
+static void
+perfmon_event_sync(struct perfmon_cpu_pmu *cpu_pmu,
+                   struct perfmon_event *event)
+{
+    struct perfmon_pmc *pmc;
+    struct perfmon_cpu_pmc *cpu_pmc;
+    uint64_t count;
+    int diff;
+
+    pmc = perfmon_pmc_from_index(event->pmc_index);
+    cpu_pmc = &cpu_pmu->pmcs[event->pmc_index];
+    count = pmu_driver.ops.read(pmc->id);
+
+    if (!pmu_driver.ops.handle_of_intr) {
+        /* Force pmc overflow status update */
+        perfmon_check_pmc_of(cpu_pmc, count);
+    }
+
+    if (unlikely(event->overflow_id != cpu_pmc->overflow_id)) {
+        assert(cpu_pmc->overflow_id > event->overflow_id);
+        diff = cpu_pmc->overflow_id > event->overflow_id;
+        /* diff is very likely 1. */
+        event->count += (1ULL <<  pmu_driver.pmc_width) * diff
+                        - event->prev + count;
+        event->overflow_id = cpu_pmc->overflow_id;
+    } else {
+        event->count += count - event->prev;
+    }
+    event->prev = count;
+}
+
+static inline int
+perfmon_group_attached(const struct perfmon_group *group)
+{
+    return group->flags & PERFMON_GF_ATTACHED;
+}
+
+static inline int
+perfmon_group_enabled(const struct perfmon_group *group)
+{
+    return group->flags & PERFMON_GF_ENABLED;
+}
+
+static inline int
+perfmon_group_loaded(const struct perfmon_group *group)
+{
+    return group->flags & PERFMON_GF_LOADED;
+}
+
+static inline int
+perfmon_group_stopping(const struct perfmon_group *group)
+{
+    return group->flags & PERFMON_GF_PENDING_DISABLE;
+}
+
+int
+perfmon_group_create(struct perfmon_group **groupp)
+{
+    struct perfmon_group *group;
+
+    group = kmem_alloc(sizeof(*group));
+
+    if (group == NULL) {
+        return ENOMEM;
+    }
+
+    list_init(&group->events);
+    spinlock_init(&group->lock);
+    group->cpu = PERFMON_INVALID_CPU;
+    group->flags = 0;
+    group->type = PERFMON_GT_UNKNOWN;
+    *groupp = group;
+    return 0;
+}
+
+int
+perfmon_group_destroy(struct perfmon_group *group)
+{
+    struct perfmon_event *event;
+
+    if (perfmon_group_attached(group)) {
+        return EINVAL;
+    }
+    assert (!perfmon_group_enabled(group));
+
+    while (!list_empty(&group->events)) {
+        event = list_first_entry(&group->events, struct perfmon_event, node);
+        list_remove(&event->node);
+        perfmon_event_destroy(event);
+    }
+
+    kmem_free(group, sizeof(*group));
+    return 0;
+}
+
+void
+perfmon_group_add(struct perfmon_group *group, struct perfmon_event *event)
+{
+    assert(list_node_unlinked(&event->node));
+    assert(!perfmon_group_attached(group));
+
+    /* TODO: check that we we do not have the same event twice. */
+
+    list_insert_tail(&group->events, &event->node);
+}
+
+/*
+ * Attach a group to the global logical PMU.
+ *
+ * For each event in the group, obtain a reference on a PMC.
+ */
+static int
+perfmon_group_attach_pmu(struct perfmon_group *group)
+{
+    struct perfmon_event *event, *tmp;
+    struct perfmon_pmc *pmc = NULL;
+    int error;
+
+    assert(!perfmon_group_attached(group));
+
+    list_for_each_entry(&group->events, event, node) {
+        error = perfmon_pmc_get(&pmc, event);
+
+        if (error) {
+            goto error_pmc;
+        }
+
+        event->pmc_index = perfmon_pmc_index(pmc);
+    }
+
+    return 0;
+
+error_pmc:
+    list_for_each_entry(&group->events, tmp, node) {
+        if (tmp == event) {
+            break;
+        }
+
+        perfmon_pmc_put(perfmon_pmc_from_index(tmp->pmc_index));
+    }
+
+    return error;
+}
+
+static void
+perfmon_group_detach_pmu(struct perfmon_group *group)
+{
+    struct perfmon_event *event;
+
+    assert(perfmon_group_attached(group));
+
+    list_for_each_entry(&group->events, event, node) {
+        perfmon_pmc_put(perfmon_pmc_from_index(event->pmc_index));
+    }
+}
+
+int
+perfmon_group_attach(struct perfmon_group *group, struct thread *thread)
+{
+    struct perfmon_grouplist *grouplist;
+    unsigned long flags;
+    int error;
+
+    assert(group->type == PERFMON_GT_UNKNOWN);
+
+    error = perfmon_group_attach_pmu(group);
+
+    if (error) {
+        return error;
+    }
+
+    thread_ref(thread);
+    group->thread = thread;
+    group->type = PERFMON_GT_THREAD;
+    group->flags |= PERFMON_GF_ATTACHED;
+
+    grouplist = thread->perfmon_groups;
+
+    spinlock_lock_intr_save(&grouplist->lock, &flags);
+    list_insert_tail(&grouplist->groups, &group->node);
+    spinlock_unlock_intr_restore(&grouplist->lock, flags);
+
+    return 0;
+}
+
+int
+perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu)
+{
+    int error;
+    struct perfmon_grouplist *grouplist;
+
+    assert(cpu < cpu_count());
+    assert(group->type == PERFMON_GT_UNKNOWN);
+
+    error = perfmon_group_attach_pmu(group);
+
+    if (error) {
+        return error;
+    }
+
+    group->cpu = cpu;
+    group->type = PERFMON_GT_CPU;
+    group->flags |= PERFMON_GF_ATTACHED;
+
+    grouplist = percpu_var(perfmon_cpu_grouplist, cpu);
+
+    spinlock_lock(&grouplist->lock);
+    list_insert_tail(&grouplist->groups, &group->node);
+    spinlock_unlock(&grouplist->lock);
+
+    return 0;
+}
+
+int
+perfmon_group_detach(struct perfmon_group *group)
+{
+    unsigned long flags;
+    unsigned long grouplist_flags;
+    struct perfmon_grouplist *grouplist;
+    struct thread *prev_thread;
+    unsigned int type;
+    int ret;
+
+    type = group->type;
+    grouplist_flags = 0; /* silence Wmaybe-uninitialized warning. */
+    ret = 0;
+    prev_thread = NULL;
+
+    switch (type) {
+    case PERFMON_GT_THREAD:
+        grouplist = group->thread->perfmon_groups;
+        spinlock_lock_intr_save(&grouplist->lock, &grouplist_flags);
+        prev_thread = group->thread;
+        break;
+    case PERFMON_GT_CPU:
+        grouplist = percpu_var(perfmon_cpu_grouplist, group->cpu);
+        spinlock_lock(&grouplist->lock);
+        break;
+    default:
+        panic("perfmon: invalid group type on detach");
+    }
+    spinlock_lock_intr_save(&group->lock, &flags);
+
+
+    if (perfmon_group_enabled(group)) {
+        ret = EINVAL;
+        goto out;
+    }
+
+    if (!perfmon_group_attached(group)) {
+        goto out;
+    }
+
+    perfmon_group_detach_pmu(group);
+    list_remove(&group->node);
+
+    group->thread = NULL;
+    group->cpu = PERFMON_INVALID_CPU;
+    group->type = PERFMON_GT_UNKNOWN;
+    group->flags &= ~PERFMON_GF_ATTACHED;
+    assert(!group->flags);
+
+    goto out;
+
+out:
+    spinlock_unlock_intr_restore(&group->lock, flags);
+    switch (type) {
+    case PERFMON_GT_THREAD:
+        spinlock_unlock_intr_restore(&grouplist->lock, grouplist_flags);
+        break;
+    case PERFMON_GT_CPU:
+        spinlock_unlock(&grouplist->lock);
+        break;
+    }
+
+    if (prev_thread) {
+        /* Late unref as it might destroy the thread and lock the runq. */
+        thread_unref(prev_thread);
+    }
+
+    return ret;
+}
+
+static void
+perfmon_group_load(struct perfmon_group *group)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_event *event;
+    struct perfmon_pmc *pmc;
+    uint64_t prev;
+
+    assert(!thread_preempt_enabled());
+    assert(perfmon_group_enabled(group));
+    assert(!perfmon_group_loaded(group));
+
+    cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+#ifdef CONFIG_PERFMON_TEST
+    /* XXX: could be done in the loading loop, but performance does not
+     * matters in the functional tests using this feature.
+     */
+    list_for_each_entry(&group->events, event, node) {
+        if (!event->set_value) {
+            continue;
+        }
+        pmc = perfmon_pmc_from_index(event->pmc_index);
+        pmu_driver.ops.write(pmc->id, event->value);
+        event->set_value = false;
+    }
+#endif
+
+    list_for_each_entry(&group->events, event, node) {
+        pmc = perfmon_pmc_from_index(event->pmc_index);
+        prev = pmu_driver.ops.read(pmc->id);
+
+        perfmon_cpu_pmu_load(cpu_pmu, event->pmc_index);
+        event->prev = prev;
+        event->overflow_id = cpu_pmu->pmcs[event->pmc_index].overflow_id;
+    }
+
+    group->cpu = cpu_id();
+    group->flags |= PERFMON_GF_LOADED;
+}
+
+static void
+perfmon_cpu_load_remote(void *arg)
+{
+    struct perfmon_group *group;
+
+    group = arg;
+    assert (group->cpu == cpu_id());
+
+    spinlock_lock(&group->lock);
+
+    perfmon_group_load(group);
+
+    spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_group_unload(struct perfmon_group *group)
+{
+    struct perfmon_cpu_pmu *cpu_pmu;
+    struct perfmon_event *event;
+
+    assert(!thread_preempt_enabled());
+    assert(perfmon_group_enabled(group));
+    assert(perfmon_group_loaded(group));
+
+    cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+    list_for_each_entry(&group->events, event, node) {
+        perfmon_cpu_pmu_unload(cpu_pmu, event->pmc_index);
+        perfmon_event_sync(cpu_pmu, event);
+    }
+
+    group->flags &= ~PERFMON_GF_LOADED;
+}
+
+static void
+perfmon_cpu_unload_remote(void *arg)
+{
+    struct perfmon_group *group;
+
+    group = arg;
+    assert (group->cpu == cpu_id());
+    assert (perfmon_group_stopping(group));
+
+    spinlock_lock(&group->lock);
+
+    perfmon_group_unload(group);
+
+    group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+    group->flags &= ~PERFMON_GF_ENABLED;
+
+    spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_thread_load_remote(void *arg)
+{
+    struct perfmon_group *group;
+    struct thread *thread;
+
+    assert (!cpu_intr_enabled());
+
+    group = arg;
+    thread = thread_self();
+
+    if (thread != group->thread) {
+        return;
+    }
+
+    spinlock_lock(&group->lock);
+
+    if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) {
+        perfmon_group_load(group);
+    }
+
+    spinlock_unlock(&group->lock);
+}
+
+static void
+perfmon_thread_unload_remote(void *arg)
+{
+    struct perfmon_group *group;
+    struct thread *thread;
+
+    assert (!cpu_intr_enabled());
+
+    group = arg;
+    thread = thread_self();
+
+    if (thread != group->thread) {
+        return;
+    }
+
+    spinlock_lock(&group->lock);
+
+    if (perfmon_group_enabled(group)) {
+        assert (perfmon_group_stopping(group));
+        if (perfmon_group_loaded(group)) {
+            perfmon_group_unload(group);
+        }
+        group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+        group->flags &= ~PERFMON_GF_ENABLED;
+    }
+
+    spinlock_unlock(&group->lock);
+}
+
+int
+perfmon_group_start(struct perfmon_group *group)
+{
+    unsigned long flags;
+    unsigned int cpu;
+    int ret;
+
+    ret = 0;
+    spinlock_lock_intr_save(&group->lock, &flags);
+
+    if (!perfmon_group_attached(group) || perfmon_group_loaded(group)) {
+        ret = EINVAL;
+        goto end;
+    }
+    assert(!perfmon_group_enabled(group));
+
+    group->flags |= PERFMON_GF_ENABLED;
+
+    if (group->type == PERFMON_GT_CPU) {
+        spinlock_unlock_intr_restore(&group->lock, flags);
+
+        xcall_call(perfmon_cpu_load_remote, group, group->cpu);
+
+        return 0;
+    } else if (group->thread == thread_self()) {
+        perfmon_group_load(group);
+    } else if (group->thread->state == THREAD_RUNNING) {
+        spinlock_unlock_intr_restore(&group->lock, flags);
+
+        cpu = thread_cpu(group->thread);
+
+        xcall_call(perfmon_thread_load_remote, group, cpu);
+
+        return 0;
+    }
+end:
+    spinlock_unlock_intr_restore(&group->lock, flags);
+
+    return ret;
+}
+
+static void
+perfmon_group_sync_local(struct perfmon_group *group)
+{
+    struct perfmon_event *event;
+    struct perfmon_cpu_pmu *cpu_pmu;
+
+    cpu_pmu = cpu_local_ptr(perfmon_cpu_pmu);
+
+    /* The group sync duration *should be* limited as a group may only have a
+     * limited amount of *different* events.
+     */
+    list_for_each_entry(&group->events, event, node) {
+        perfmon_event_sync(cpu_pmu, event);
+    }
+}
+
+static void
+perfmon_cpu_sync_remote(void *arg)
+{
+    struct perfmon_group *group;
+
+    group = arg;
+    assert (group->type == PERFMON_GT_CPU);
+    assert (group->cpu == cpu_id());
+
+    perfmon_group_sync_local(group);
+}
+
+static void
+perfmon_thread_sync_remote(void *arg)
+{
+    struct perfmon_group *group;
+    unsigned long flags;
+
+    group = arg;
+
+    assert (group->type == PERFMON_GT_THREAD);
+    if (thread_self() != group->thread) {
+        return;
+    }
+    spinlock_lock_intr_save(&group->lock, &flags);
+
+    perfmon_group_sync_local(group);
+
+    spinlock_unlock_intr_restore(&group->lock, flags);
+}
+
+void
+perfmon_group_update(struct perfmon_group *group)
+{
+    unsigned long flags;
+    unsigned int cpu;
+
+    assert(perfmon_group_enabled(group));
+
+    spinlock_lock_intr_save(&group->lock, &flags);
+
+    assert(perfmon_group_attached(group));
+    assert(perfmon_group_enabled(group));
+
+    if (!perfmon_group_loaded(group)) {
+        goto end;
+    }
+
+    if (group->type == PERFMON_GT_CPU) {
+        if (group->cpu == cpu_id())
+            perfmon_group_sync_local(group);
+        else {
+            xcall_call(perfmon_cpu_sync_remote, group, group->cpu);
+        }
+    } else {
+        if (group->thread == thread_self()) {
+            assert (perfmon_group_loaded(group));
+            perfmon_group_sync_local(group);
+        } else if (group->thread->state == THREAD_RUNNING) {
+            spinlock_unlock_intr_restore(&group->lock, flags);
+            cpu = thread_cpu(group->thread);
+            xcall_call(perfmon_thread_sync_remote, group, cpu);
+            return;
+        }
+    }
+end:
+    spinlock_unlock_intr_restore(&group->lock, flags);
+}
+
+int
+perfmon_group_stop(struct perfmon_group *group)
+{
+    int ret;
+    unsigned long flags;
+    unsigned int cpu;
+
+    ret = 0;
+    spinlock_lock_intr_save(&group->lock, &flags);
+
+    if (!perfmon_group_attached(group) || !perfmon_group_enabled(group)) {
+       ret = EINVAL;
+       goto end;
+    }
+
+    if (!perfmon_group_loaded(group)) {
+        goto disable;
+    }
+
+    group->flags |= PERFMON_GF_PENDING_DISABLE;
+
+    if (group->type == PERFMON_GT_CPU) {
+        spinlock_unlock_intr_restore(&group->lock, flags);
+
+        xcall_call(perfmon_cpu_unload_remote, group, group->cpu);
+        return 0;
+    } else if (group->thread == thread_self()) {
+        perfmon_group_unload(group);
+    } else {
+        /* If the thead is not running (but still loaded), the unload is
+         * (probably) getting called when we release the group lock, but we
+         * still need a blocking xcall to guarantee the group is disabled when
+         * the function returns.
+         */
+        spinlock_unlock_intr_restore(&group->lock, flags);
+
+        cpu = thread_cpu(group->thread);
+
+        xcall_call(perfmon_thread_unload_remote, group, cpu);
+        return 0;
+    }
+
+disable:
+    group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+    group->flags &= ~PERFMON_GF_ENABLED;
+
+end:
+    spinlock_unlock_intr_restore(&group->lock, flags);
+    return ret;
+}
+
+int
+perfmon_thread_init(struct thread *thread)
+{
+    struct perfmon_grouplist *grouplist;
+
+    grouplist = perfmon_grouplist_create();
+
+    if (grouplist == NULL) {
+        return ENOMEM;
+    }
+
+    thread->perfmon_groups = grouplist;
+    return 0;
+}
+
+void
+perfmon_thread_destroy(struct thread *thread)
+{
+    perfmon_grouplist_destroy(thread->perfmon_groups);
+}
+
+void
+perfmon_thread_load(struct thread *thread)
+{
+    struct perfmon_grouplist *grouplist;
+    struct perfmon_group *group;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    grouplist = thread->perfmon_groups;
+
+    spinlock_lock(&grouplist->lock);
+
+    list_for_each_entry(&grouplist->groups, group, node) {
+        spinlock_lock(&group->lock);
+
+        if (perfmon_group_enabled(group) && !perfmon_group_loaded(group)) {
+            perfmon_group_load(group);
+        }
+
+        spinlock_unlock(&group->lock);
+    }
+
+    spinlock_unlock(&grouplist->lock);
+}
+
+void
+perfmon_thread_unload(struct thread *thread)
+{
+    struct perfmon_grouplist *grouplist;
+    struct perfmon_group *group;
+
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    grouplist = thread->perfmon_groups;
+
+    spinlock_lock(&grouplist->lock);
+
+    list_for_each_entry(&grouplist->groups, group, node) {
+        spinlock_lock(&group->lock);
+        /* TODO: we may want to prevent long looping on the groups.
+         * One way to do this would be to maintain events mapping in the
+         * grouplist in order to have a finite operation upon schedueling.
+         */
+
+        if (perfmon_group_loaded(group)) {
+            perfmon_group_unload(group);
+            if (perfmon_group_stopping(group)) {
+                group->flags &= ~PERFMON_GF_PENDING_DISABLE;
+                group->flags &= ~PERFMON_GF_ENABLED;
+            }
+        }
+
+        spinlock_unlock(&group->lock);
+    }
+
+    spinlock_unlock(&grouplist->lock);
+}
diff --git a/kern/perfmon.h b/kern/perfmon.h
new file mode 100644
index 0000000..b1da4ec
--- /dev/null
+++ b/kern/perfmon.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Performance monitoring based on hardware performance counters.
+ */
+
+#ifndef KERN_PERFMON_H
+#define KERN_PERFMON_H
+
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/thread.h>
+
+/*
+ * Performance event types.
+ */
+#define PERFMON_ET_GENERIC  0
+#define PERFMON_ET_RAW      1
+
+/*
+ * IDs of generic performance events.
+ */
+#define PERFMON_EV_CYCLE            0
+#define PERFMON_EV_REF_CYCLE        1
+#define PERFMON_EV_INSTRUCTION      2
+#define PERFMON_EV_CACHE_REF        3
+#define PERFMON_EV_CACHE_MISS       4
+#define PERFMON_EV_BRANCH           5
+#define PERFMON_EV_BRANCH_MISS      6
+#define PERFMON_NR_GENERIC_EVENTS   7
+
+/*
+ * Event flags.
+ */
+#define PERFMON_EF_KERN 0x1 /* Monitor events in kernel mode */
+#define PERFMON_EF_USER 0x2 /* Monitor events in user mode */
+#define PERFMON_EF_MASK (PERFMON_EF_KERN | PERFMON_EF_USER)
+
+/*
+ * Pmu operations.
+ *
+ * Set by calling perfmon_register_pmu_ops.
+ */
+struct perfmon_pmu_ops {
+    void (*info)(void);
+    int (*translate)(unsigned int *raw_event_idp, unsigned int event_id);
+    int (*alloc)(unsigned int *pmc_idp, unsigned int raw_event_id);
+    void (*free)(unsigned int pmc_id);
+    void (*start)(unsigned int pmc_id, unsigned int raw_event_id);
+    void (*stop)(unsigned int pmc_id);
+    uint64_t (*read)(unsigned int pmc_id);
+    void (*write)(unsigned int pmc_id, uint64_t value);
+    /* If set, of_max_ticks should be set to 0. */
+    void (*handle_of_intr)(void);
+};
+
+/*
+ * Pmu device description.
+ */
+struct perfmon_pmu_driver {
+    uint8_t pmc_width; /* width in bits of a pmc */
+    /*
+     * Maximum number of clock ticks between two overflow ckecks.
+     * Should be set to 0 if handle_of_intr is set.
+     */
+    uint64_t of_max_ticks;
+    struct perfmon_pmu_ops ops;
+};
+
+/*
+ * Performance monitoring event.
+ *
+ * An event describes a single, well-defined state and records its
+ * occurrences over a period of time. It must be added to exactly
+ * one group before being used.
+ */
+struct perfmon_event;
+
+/*
+ * Group of performance monitoring events.
+ *
+ * A group must be attached to either a thread or a processor, and abstracts
+ * all operations on hardware counters.
+ *
+ * Until a group is actually attached, it is assumed there is only one
+ * reference on it, owned by the caller.
+ *
+ * For a thread-attached group, it is the user's responsability to make sure
+ * that perfmon_stop is always called before the monitored thread is deleted.
+ */
+struct perfmon_group;
+
+/*
+ * Create an event.
+ */
+int perfmon_event_create(struct perfmon_event **eventp, unsigned int type,
+                         unsigned int id, int flags);
+
+/*
+ * Destroy an event.
+ *
+ * Once an event is added to a group, it can only be destroyed by destroying
+ * the group.
+ */
+void perfmon_event_destroy(struct perfmon_event *event);
+
+/*
+ * Obtain the number of occurrences of an event.
+ *
+ * Events are updated at specific points in time, which means the value
+ * returned by this function can be outdated.
+ *
+ * See perfmon_group_update() and perfmon_group_stop().
+ */
+uint64_t perfmon_event_read(const struct perfmon_event *event);
+
+/*
+ * Reset the number of occurrences of an event to 0.
+ *
+ * The group containing the given event should be stopped when calling
+ * this function.
+ */
+void perfmon_event_reset(struct perfmon_event *event);
+
+/*
+ * Create an event group.
+ *
+ * Events must be added to the group, which must then be attached to a
+ * processor or a thread.
+ */
+int perfmon_group_create(struct perfmon_group **groupp);
+
+/*
+ * Destroy a group and all its events.
+ *
+ * A group can only be destroyed once stopped and detached.
+ *
+ * Will return EINVAL if the group is not detached.
+ */
+int perfmon_group_destroy(struct perfmon_group *group);
+
+/*
+ * Add an event into a group.
+ *
+ * Events can only be added when a group isn't attached.
+ */
+void perfmon_group_add(struct perfmon_group *group,
+                       struct perfmon_event *event);
+
+/*
+ * Attach a group to, respectively, a thread or a processor, reserving
+ * associated logical counter.
+ *
+ * A group can only be attached to one thread or processor at a time.
+ */
+int perfmon_group_attach(struct perfmon_group *group, struct thread *thread);
+int perfmon_group_attach_cpu(struct perfmon_group *group, unsigned int cpu);
+
+/*
+ * Detach a group from a thread or a processor.
+ *
+ * It frees associated logical counters..
+ *
+ * returns EINVAL if the group is still enabled (not stopped).
+ */
+int perfmon_group_detach(struct perfmon_group *group);
+
+/*
+ * Start performance monitoring.
+ *
+ * A group must be attached before being started.
+ */
+int perfmon_group_start(struct perfmon_group *group);
+
+/*
+ * Update all events in the given group.
+ */
+void perfmon_group_update(struct perfmon_group *group);
+
+/*
+ * Stop performance monitoring.
+ *
+ * A group can't be detached before it's stopped. Events are implicitely
+ * updated when calling this function.
+ */
+int perfmon_group_stop(struct perfmon_group *group);
+
+/*
+ * Initialize perfmon thread-specific data for the given thread.
+ */
+int perfmon_thread_init(struct thread *thread);
+
+/*
+ * Destroy perfmon thread-specific data for the given thread.
+ */
+void perfmon_thread_destroy(struct thread *thread);
+
+/*
+ * Load/unload the events associated to a thread on the current processor.
+ *
+ * These functions should only be used by the scheduler during context switch.
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void perfmon_thread_load(struct thread *thread);
+void perfmon_thread_unload(struct thread *thread);
+
+/*
+ * This init operation provides :
+ *  - perfmon_thread_init()
+ */
+INIT_OP_DECLARE(perfmon_bootstrap);
+
+/*
+ * This init operation provides :
+ *  - module fully initialized
+ */
+INIT_OP_DECLARE(perfmon_setup);
+
+/*
+ * Handle overflow interrupt.
+ */
+void perfmon_of_intr(void);
+
+/*
+ * Register an architecture-specific driver.
+ */
+int perfmon_pmu_register(struct perfmon_pmu_driver *driver);
+
+/*
+ * Signal overflow for given pmc.
+ *
+ * Should be called from a pmu driver custom overflow interrupt handler.
+ */
+void perfmon_cpu_on_pmc_of(unsigned int pmc_id);
+
+#endif /* KERN_PERFMON_H */
diff --git a/kern/perfmon_i.h b/kern/perfmon_i.h
new file mode 100644
index 0000000..3072171
--- /dev/null
+++ b/kern/perfmon_i.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Performance monitoring based on performance counters internal functions.
+ */
+
+#ifndef KERN_PERFMON_I_H
+#define KERN_PERFMON_I_H
+
+#include <kern/perfmon.h>
+
+#ifdef CONFIG_PERFMON_TEST
+
+/*
+ * Set a running event hardware counter value for overflow tests purposes.
+ *
+ * Beware, this will affect all events associated to the same hardware counter.
+ */
+int perfmon_event_write(struct perfmon_event *event, uint64_t value);
+
+/*
+ * Returns the bit width of the register used in perfmon.
+ */
+int perfmon_get_pmc_width(void);
+
+#endif /* CONFIG_PERFMON_TEST */
+
+#endif /* KERN_PERFMON_I_H */
diff --git a/kern/perfmon_types.h b/kern/perfmon_types.h
new file mode 100644
index 0000000..6f9be0b
--- /dev/null
+++ b/kern/perfmon_types.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2014-2018 Remy Noel.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef KERN_PERFMON_TYPES_H
+#define KERN_PERFMON_TYPES_H
+
+struct perfmon_grouplist;
+
+#endif /* KERN_PERFMON_TYPES_H */
diff --git a/kern/thread.c b/kern/thread.c
index 85e557d..77960ec 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -100,6 +100,7 @@
 #include <kern/macros.h>
 #include <kern/panic.h>
 #include <kern/percpu.h>
+#include <kern/perfmon.h>
 #include <kern/rcu.h>
 #include <kern/shell.h>
 #include <kern/sleepq.h>
@@ -605,9 +606,22 @@ thread_runq_wakeup_balancer(struct thread_runq *runq)
 }
 
 static void
-thread_runq_schedule_prepare(struct thread *thread)
+thread_runq_schedule_load(struct thread *thread)
 {
     pmap_load(thread->task->map->pmap);
+#ifdef CONFIG_PERFMON
+    perfmon_thread_load(thread);
+#endif
+}
+
+static void
+thread_runq_schedule_unload(struct thread *thread)
+{
+#ifdef CONFIG_PERFMON
+    perfmon_thread_unload(thread);
+#else
+    (void)thread;
+#endif
 }
 
 static struct thread_runq *
@@ -639,6 +653,8 @@ thread_runq_schedule(struct thread_runq *runq)
     assert(next->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL);
 
     if (likely(prev != next)) {
+        thread_runq_schedule_unload(prev);
+
         rcu_report_context_switch(thread_rcu_reader(prev));
         spinlock_transfer_owner(&runq->lock, next);
 
@@ -660,10 +676,10 @@ thread_runq_schedule(struct thread_runq *runq)
          *  - The current thread may have been migrated to another processor.
          */
         barrier();
+        thread_runq_schedule_load(prev);
+
         next = NULL;
         runq = thread_runq_local();
-
-        thread_runq_schedule_prepare(prev);
     } else {
         next = NULL;
     }
@@ -1750,7 +1766,7 @@ thread_main(void (*fn)(void *), void *arg)
     assert(!thread_preempt_enabled());
 
     thread = thread_self();
-    thread_runq_schedule_prepare(thread);
+    thread_runq_schedule_load(thread);
 
     spinlock_unlock(&thread_runq_local()->lock);
     cpu_intr_enable();
@@ -1847,6 +1863,14 @@ thread_init(struct thread *thread, void *stack,
         thread->flags |= THREAD_DETACHED;
     }
 
+#ifdef CONFIG_PERFMON
+    error = perfmon_thread_init(thread);
+
+    if (error) {
+        goto error_perfmon;
+    }
+#endif /* CONFIG_PERFMON */
+
     error = tcb_build(&thread->tcb, stack, fn, arg);
 
     if (error) {
@@ -1858,6 +1882,10 @@ thread_init(struct thread *thread, void *stack,
     return 0;
 
 error_tcb:
+#ifdef CONFIG_PERFMON
+    perfmon_thread_destroy(thread);
+error_perfmon:
+#endif /* CONFIG_PERFMON */
     thread_destroy_tsd(thread);
     turnstile_destroy(thread->priv_turnstile);
 error_turnstile:
@@ -1977,6 +2005,9 @@ thread_destroy(struct thread *thread)
     /* See task_info() */
     task_remove_thread(thread->task, thread);
 
+#ifdef CONFIG_PERFMON
+    perfmon_thread_destroy(thread);
+#endif
     thread_destroy_tsd(thread);
     turnstile_destroy(thread->priv_turnstile);
     sleepq_destroy(thread->priv_sleepq);
@@ -2309,6 +2340,13 @@ thread_setup(void)
 #define THREAD_STACK_GUARD_INIT_OP_DEPS
 #endif /* CONFIG_THREAD_STACK_GUARD */
 
+#ifdef CONFIG_PERFMON
+#define THREAD_PERFMON_INIT_OP_DEPS                 \
+               INIT_OP_DEP(perfmon_bootstrap, true),
+#else /* CONFIG_PERFMON */
+#define THREAD_PERFMON_INIT_OP_DEPS
+#endif /* CONFIG_PERFMON */
+
 INIT_OP_DEFINE(thread_setup,
                INIT_OP_DEP(cpumap_setup, true),
                INIT_OP_DEP(kmem_setup, true),
@@ -2317,6 +2355,7 @@ INIT_OP_DEFINE(thread_setup,
                INIT_OP_DEP(task_setup, true),
                INIT_OP_DEP(thread_bootstrap, true),
                INIT_OP_DEP(turnstile_setup, true),
+               THREAD_PERFMON_INIT_OP_DEPS
                THREAD_STACK_GUARD_INIT_OP_DEPS
 );
 
@@ -2696,6 +2735,13 @@ thread_report_periodic_event(void)
     spinlock_unlock(&runq->lock);
 }
 
+unsigned int
+thread_cpu(const struct thread *thread)
+{
+    assert(thread->runq);
+    return thread->runq->cpu;
+}
+
 char
 thread_state_to_chr(const struct thread *thread)
 {
diff --git a/kern/thread.h b/kern/thread.h
index 4bead75..787ccf5 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -283,6 +283,14 @@ void thread_setscheduler(struct thread *thread, unsigned char policy,
 void thread_pi_setscheduler(struct thread *thread, unsigned char policy,
                             unsigned short priority);
 
+/*
+ * Return the last CPU on which the thread has been scheduled.
+ *
+ * This call is not synchronized with respect to migration. The caller
+ * may obtain an outdated value.
+ */
+unsigned int thread_cpu(const struct thread *thread);
+
 static inline void
 thread_ref(struct thread *thread)
 {
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 0be1e77..9c24d3a 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -24,6 +24,7 @@
 #include <kern/atomic.h>
 #include <kern/cpumap.h>
 #include <kern/list_types.h>
+#include <kern/perfmon_types.h>
 #include <kern/rcu_types.h>
 #include <kern/spinlock_types.h>
 #include <kern/turnstile_types.h>
@@ -185,6 +186,10 @@ struct thread {
     struct list task_node;          /* (T) */
     void *stack;                    /* (-) */
     char name[THREAD_NAME_SIZE];    /* ( ) */
+
+#ifdef CONFIG_PERFMON
+    struct perfmon_grouplist *perfmon_groups;
+#endif
 };
 
 #define THREAD_ATTR_DETACHED 0x1
diff --git a/test/Kconfig b/test/Kconfig
index 80679ef..6999605 100644
--- a/test/Kconfig
+++ b/test/Kconfig
@@ -19,6 +19,18 @@ config TEST_MODULE_MUTEX
 config TEST_MODULE_MUTEX_PI
 	bool "mutex_pi"
 
+config TEST_MODULE_PERFMON_CPU
+	bool "perfmon_cpu"
+    depends on PERFMON
+
+config TEST_MODULE_PERFMON_THREAD
+	bool "perfmon_thread"
+    depends on PERFMON
+
+config TEST_MODULE_PERFMON_TORTURE
+	bool "perfmon_torture"
+    depends on PERFMON
+
 config TEST_MODULE_PMAP_UPDATE_MP
 	bool "pmap_update_mp"
 
@@ -43,3 +55,7 @@ config TEST_MODULE_XCALL
 endchoice
 
 endif
+
+config PERFMON_TEST
+	def_bool y
+	depends on TEST_MODULE_PERFMON_THREAD || TEST_MODULE_PERFMON_CPU
diff --git a/test/Makefile b/test/Makefile
index c98d6fb..b6f2260 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,6 +1,9 @@
 x15_SOURCES-$(CONFIG_TEST_MODULE_BULLETIN)              += test/test_bulletin.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX)                 += test/test_mutex.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI)              += test/test_mutex_pi.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU)           += test/test_perfmon_cpu.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD)        += test/test_perfmon_thread.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE)       += test/test_perfmon_torture.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP)        += test/test_pmap_update_mp.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER)             += test/test_rcu_defer.c
 x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES)     += test/test_sref_dirty_zeroes.c
diff --git a/test/test_perfmon_cpu.c b/test/test_perfmon_cpu.c
new file mode 100644
index 0000000..6f1414c
--- /dev/null
+++ b/test/test_perfmon_cpu.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2014 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Here, we test the perfmon module for cross CPU performances monitoring.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <kern/error.h>
+#include <kern/perfmon_i.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+#define WAIT_DELAY_USEC 1000000
+
+static volatile bool stop;
+
+static void
+test_do_nothing(void* arg)
+{
+    (void)arg;
+
+    while (!stop);
+}
+
+static void
+test_report_event(const struct perfmon_event *event, const char *name)
+{
+    unsigned long long count;
+
+    count = perfmon_event_read(event);
+    printf("test: %s: %llu\n", name, count);
+}
+
+static uint64_t
+test_get_pre_overflow_value(uint64_t value)
+{
+    uint64_t pmc_max;
+    unsigned int pmc_width;
+
+    pmc_width = perfmon_get_pmc_width();
+    pmc_max = (1ULL << pmc_width) - 1;
+    pmc_max &= 0xffffffff80000000;
+
+    /* XXX: workaround most processor not allowing full width writes */
+    return ((~value + 1) & 0x7fffffff) | pmc_max;
+}
+
+static void
+test_run(void *arg)
+{
+    struct perfmon_event *ev_cycle, *ev_instruction;
+    struct perfmon_group *group;
+    int error;
+    uint64_t value;
+
+    (void)arg;
+
+    error = perfmon_group_create(&group);
+    error_check(error, "perfmon_group_create");
+
+    error = perfmon_event_create(&ev_cycle, PERFMON_ET_GENERIC,
+                                 PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+    error_check(error, "perfmon_event_create");
+    perfmon_group_add(group, ev_cycle);
+
+    error = perfmon_event_create(&ev_instruction, PERFMON_ET_GENERIC,
+                                 PERFMON_EV_INSTRUCTION, PERFMON_EF_KERN);
+    error_check(error, "perfmon_event_create");
+    perfmon_group_add(group, ev_instruction);
+
+    error = perfmon_group_attach_cpu(group, 1);
+    error_check(error, "perfmon_group_attach_cpu 1");
+
+    error = perfmon_group_start(group);
+    error_check(error, "perfmon_group_start");
+
+    cpu_delay(WAIT_DELAY_USEC);
+    error = perfmon_group_stop(group);
+    error_check(error, "perfmon_group_stop");
+    test_report_event(ev_cycle, "cycle");
+    test_report_event(ev_instruction, "instruction");
+
+    printf("checking with overflow ...\n");
+    value = test_get_pre_overflow_value( perfmon_event_read(ev_cycle) / 2);
+    error = perfmon_event_write(ev_cycle, value);
+    error_check(error, "perfmon_event_write");
+
+    value = test_get_pre_overflow_value(perfmon_event_read(ev_instruction) / 3);
+    error = perfmon_event_write(ev_instruction, value);
+    error_check(error, "perfmon_event_write");
+
+    perfmon_event_reset(ev_cycle);
+    perfmon_event_reset(ev_instruction);
+
+    error = perfmon_group_start(group);
+    error_check(error, "perfmon_group_start");
+
+    cpu_delay(WAIT_DELAY_USEC);
+    error = perfmon_group_stop(group);
+    error_check(error, "perfmon_group_stop");
+    test_report_event(ev_cycle, "cycle");
+    test_report_event(ev_instruction, "instruction");
+
+    error = perfmon_group_detach(group);
+    error_check(error, "perfmon_group_detach");
+
+    error = perfmon_group_destroy(group);
+    error_check(error, "perfmon_group_destroy");
+
+    stop = true;
+}
+
+void
+test_setup(void)
+{
+    struct thread_attr attr;
+    struct thread *thread0, *thread1;
+    struct cpumap *cpumap;
+    int error;
+
+    error = cpumap_create(&cpumap);
+    error_check(error, "cpumap_create 0");
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, 0);
+
+    thread_attr_init(&attr, "x15_test_run");
+    thread_attr_set_detached(&attr);
+    thread_attr_set_cpumap(&attr, cpumap);
+    error = thread_create(&thread0, &attr, test_run, NULL);
+    error_check(error, "thread_create 0");
+
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, 1);
+
+    thread_attr_init(&attr, "x15_test_do_nothing");
+    thread_attr_set_detached(&attr);
+    thread_attr_set_cpumap(&attr, cpumap);
+    error = thread_create(&thread1, &attr, test_do_nothing, NULL);
+    error_check(error, "thread_create 1");
+}
diff --git a/test/test_perfmon_thread.c b/test/test_perfmon_thread.c
new file mode 100644
index 0000000..e78fab9
--- /dev/null
+++ b/test/test_perfmon_thread.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2014 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test checks that cpu-local remote thread monitoring works properly.
+ * The test uses two threads: the main one which is monitored and another that
+ * acts as a control thread. The instruction count should increase while the
+ * main thread runs and shouldn't otherwise.
+ * Initially the main thread runs while the control one doesn't.
+ * The control thread is then scheduled and finally the main thread is
+ * rescheduled.
+ *
+ * In order to trigger the counter slot reuse mechanism, this tests also adds a
+ * cpu counter on the same cpu as the threads.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <kern/condition.h>
+#include <kern/error.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/printf.h>
+#include <kern/thread.h>
+
+#include <test/test.h>
+
+#define NR_LOOPS 1000000UL
+
+static inline void
+test_loop(void)
+{
+    volatile unsigned long i;
+
+    for (i = 0; i < NR_LOOPS; i++);
+}
+
+struct thread *test_main, *test_control;
+struct perfmon_group *thread_group;
+struct perfmon_event *thread_ev_cycle;
+bool test_monitoring = true;
+
+struct perfmon_group *cpu_group;
+struct perfmon_event *cpu_ev_cycle;
+
+struct proxy_thread_runq {
+    struct spinlock lock;
+    unsigned int cpu;
+};
+
+static void
+x15_test_main_run(void *arg)
+{
+    unsigned long long thread_count1, thread_count2;
+    unsigned long long cpu_count1, cpu_count2;
+    int error;
+
+    (void)arg;
+
+    thread_preempt_disable();
+
+    /* Create a perfmon group to monitor this cpu. */
+    error = perfmon_group_create(&cpu_group);
+    error_check(error, "perfmon_cpu_group_create");
+
+    error = perfmon_event_create(&cpu_ev_cycle, PERFMON_ET_GENERIC,
+                                 PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+    error_check(error, "perfmon_cpu_event_create");
+    perfmon_group_add(cpu_group, cpu_ev_cycle);
+
+    error = perfmon_group_attach_cpu(cpu_group, 0);
+    error_check(error, "perfmon_cpu_group_attach");
+
+    error = perfmon_group_start(cpu_group);
+    error_check(error, "perfmon_group_start_cpu");
+
+    perfmon_group_update(cpu_group);
+    cpu_count1 = perfmon_event_read(cpu_ev_cycle);
+
+    /* Create a perfmon group to monitor this thread.*/
+    error = perfmon_group_create(&thread_group);
+    error_check(error, "perfmon_thread_group_create");
+
+    error = perfmon_event_create(&thread_ev_cycle, PERFMON_ET_GENERIC,
+                                 PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+    error_check(error, "perfmon_thread_event_create");
+    perfmon_group_add(thread_group, thread_ev_cycle);
+
+    error = perfmon_group_attach(thread_group, thread_self());
+    error_check(error, "perfmon_thread_group_attach");
+
+    /* Start monitoring */
+    error = perfmon_group_start(thread_group);
+    error_check(error, "perfmon_group_start_thread");
+
+    perfmon_group_update(thread_group);
+    thread_count1 = perfmon_event_read(thread_ev_cycle);
+
+    test_loop();
+    perfmon_group_update(thread_group);
+    thread_count2 = perfmon_event_read(thread_ev_cycle);
+
+    perfmon_group_update(cpu_group);
+    cpu_count2 = perfmon_event_read(cpu_ev_cycle);
+
+    if (thread_count1 == thread_count2) {
+        panic("not monitoring thread after monitoring start \n"
+              "stayed at %llu cycles\n", thread_count1);
+    }
+    if (cpu_count1 == cpu_count2) {
+        panic("not monitoring cpu after monitoring start \n"
+              "stayed at %llu cycles\n", cpu_count1);
+    }
+
+    /* Lets switch to the other thread and sleep */
+    test_monitoring = false;
+    thread_wakeup(test_control);
+    thread_sleep(NULL, &test_monitoring, "dummy sync object");
+
+    /* waking up */
+    if (!test_monitoring) {
+        panic("main thread woke up when it should not");
+    }
+
+    /* Check monitoring is active again */
+    perfmon_group_update(cpu_group);
+    cpu_count1 = perfmon_event_read(cpu_ev_cycle);
+
+    perfmon_group_update(thread_group);
+    thread_count1 = perfmon_event_read(thread_ev_cycle);
+
+    test_loop();
+
+    perfmon_group_update(thread_group);
+    thread_count2 = perfmon_event_read(thread_ev_cycle);
+
+    perfmon_group_update(cpu_group);
+    cpu_count2 = perfmon_event_read(cpu_ev_cycle);
+
+    if (thread_count1 == thread_count2) {
+        panic("not monitoring thread after thread re-schedueling\n"
+              "stayed at %llu cycles\n", thread_count1);
+    }
+    if (cpu_count1 == cpu_count2) {
+        panic("not monitoring cpu after thread got re-scheduled \n"
+              "stayed at %llu cycles\n", cpu_count1);
+    }
+
+    thread_preempt_enable();
+    error = perfmon_group_stop(thread_group);
+    error_check(error, "perfmon_group_stop_thread");
+
+    error = perfmon_group_detach(thread_group);
+    error_check(error, "perfmon_group_detach_thread");
+
+    error = perfmon_group_destroy(thread_group);
+    error_check(error, "perfmon_group_destroy_thread");
+
+    error = perfmon_group_stop(cpu_group);
+    error_check(error, "perfmon_group_stop_cpu");
+    error = perfmon_group_detach(cpu_group);
+    error_check(error, "perfmon_group_detach_cpu");
+    error = perfmon_group_destroy(cpu_group);
+    error_check(error, "perfmon_group_destroy_cpu");
+
+    printf("test perfmon thread sched finished\n");
+}
+
+static void
+x15_test_control_run(void *arg)
+{
+    unsigned long long thread_count1, thread_count2;
+    unsigned long long cpu_count1, cpu_count2;
+
+    (void)arg;
+
+    thread_preempt_disable();
+
+    /* Let first thread run */
+    while (test_monitoring) {
+        thread_sleep(NULL, &test_monitoring, "dummy sync object");
+    }
+
+    /* Check this thread is not monitored (but the cpu is) */
+    perfmon_group_update(cpu_group);
+    cpu_count1 = perfmon_event_read(cpu_ev_cycle);
+
+    perfmon_group_update(thread_group);
+    thread_count1 = perfmon_event_read(thread_ev_cycle);
+
+    test_loop();
+
+    perfmon_group_update(thread_group);
+    thread_count2 = perfmon_event_read(thread_ev_cycle);
+
+    perfmon_group_update(cpu_group);
+    cpu_count2 = perfmon_event_read(cpu_ev_cycle);
+
+    if (thread_count1 != thread_count2) {
+        panic("still monitoring while thread is unschedueled\n"
+              "gone from %llu to %llu cycles\n", thread_count1, thread_count2);
+    }
+    if (cpu_count1 == cpu_count2) {
+        panic("not monitoring cpu after thread got unscheduled \n"
+              "stayed at %llu cycles\n", cpu_count1);
+    }
+
+    /* Wakeup x15_test_main */
+    test_monitoring = true;
+    thread_wakeup(test_main);
+    thread_preempt_enable();
+}
+
+void
+test_setup(void)
+{
+    struct thread_attr attr;
+    struct cpumap *cpumap;
+    int error;
+
+    printf("test perfmon thread sched start\n");
+
+    error = cpumap_create(&cpumap);
+    error_check(error, "cpumap_create");
+    cpumap_zero(cpumap);
+    cpumap_set(cpumap, 0);
+
+    thread_attr_init(&attr, "x15_test_main thread");
+    thread_attr_set_detached(&attr);
+    thread_attr_set_cpumap(&attr, cpumap);
+    thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+
+    error = thread_create(&test_main, &attr, x15_test_main_run, NULL);
+    error_check(error, "thread_create 0");
+
+    thread_attr_init(&attr, "15_test_control_thread");
+    thread_attr_set_detached(&attr);
+    thread_attr_set_cpumap(&attr, cpumap);
+    thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+    error = thread_create(&test_control, &attr, x15_test_control_run, NULL);
+    error_check(error, "thread_create 1");
+}
diff --git a/test/test_perfmon_torture.c b/test/test_perfmon_torture.c
new file mode 100644
index 0000000..7b54129
--- /dev/null
+++ b/test/test_perfmon_torture.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2014 Remy Noel.
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This is a stress test for perfmon thread monitoring.
+ * The goal is to pass through a maximum of code paths of the perfmon module.
+ * We therefore try to get a high migration rate by schedueling at least as
+ * many threads as CPUs.
+ * Also, we stop and restart some threads along the way in order to check wether
+ * stopped threads are propery handled.
+ *
+ * TODO: replace thread selection with a proper pseudo-random function once we
+ * get one.
+ */
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <kern/error.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/perfmon.h>
+#include <kern/thread.h>
+#include <test/test.h>
+
+enum test_thread_state {
+    TEST_LAUNCHED = 0,
+    TEST_RUNNING,
+    TEST_STOPPING,
+    TEST_STOPPED,
+};
+
+struct test_thread {
+    struct thread *thread;
+    struct perfmon_group *group;
+    struct perfmon_event *event;
+
+    bool monitored;
+    enum test_thread_state state;
+    unsigned long long count;
+};
+
+struct threads_stats {
+    size_t num_group_start;
+    size_t num_group_started;
+    size_t num_thread_start;
+    size_t num_thread_started;
+};
+
+static struct test_thread **test_threads;
+
+static struct thread *test_control;
+
+#define TEST_WAIT_INSTRUCT_COUNT 1000UL
+#define TEST_NUM_LOOP_STATUS_PRINT 200000
+
+static void
+test_wait(void)
+{
+    volatile unsigned long i;
+
+    /* TODO: Do something a a bit more clever once timers are here */
+
+    for (i = 0; i < TEST_WAIT_INSTRUCT_COUNT; i++);
+}
+
+static void
+test_thread_run(void *arg)
+{
+    struct test_thread *thread = arg;
+    unsigned long num_loops;
+
+    assert(thread->state == TEST_LAUNCHED);
+    num_loops = 0;
+
+    thread->state = TEST_RUNNING;
+
+    for (;;) {
+        barrier();
+        if (thread->state == TEST_STOPPING) {
+            break;
+        }
+        /* Invividual threads waits twice as much as control one in order to
+         * induce some asynchronism between control and treads.
+         */
+        test_wait();
+        test_wait();
+        num_loops++;
+    }
+    thread->state = TEST_STOPPED;
+}
+
+static void
+test_thread_toggle_monitor(struct test_thread *thread,
+                           struct threads_stats *stats)
+{
+    int error;
+    struct perfmon_group *group;
+
+    group = thread->group;
+
+    if (!thread->monitored) {
+        error = perfmon_group_start(group);
+        error_check(error, "perfmon_group_start");
+        thread->monitored = true;
+        stats->num_group_start++;
+        stats->num_group_started++;
+    } else {
+        perfmon_group_update(group);
+        thread->count = perfmon_event_read(thread->event);
+        error = perfmon_group_stop(group);
+        error_check(error, "perfmon_group_stop");
+        thread->monitored = false;
+        stats->num_group_started--;
+    }
+}
+
+static int
+test_thread_create_monitored_thread(struct thread **thread, size_t index,
+                                    void *arg)
+{
+    struct thread_attr attr;
+    char name[THREAD_NAME_SIZE];
+
+    snprintf(name, sizeof(name), THREAD_KERNEL_PREFIX
+             "test_monitored_thread:%zu", index);
+    thread_attr_init(&attr, name);
+    thread_attr_set_detached(&attr);
+
+    return thread_create(thread, &attr, test_thread_run, arg);
+}
+
+static void
+test_thread_toggle_state(size_t index,
+                         struct threads_stats *stats)
+{
+    int error;
+    struct perfmon_group *group;
+    struct test_thread *thread;
+
+    thread = test_threads[index];
+    group = thread->group;
+
+    switch (thread->state) {
+    case TEST_RUNNING:
+        thread->state = TEST_STOPPING;
+        stats->num_thread_started--;
+        break;
+    case TEST_STOPPED:
+        /* restart thread and attach it to the group of the previous thread.
+         */
+        if (thread->monitored) {
+            test_thread_toggle_monitor(thread, stats);
+        }
+        error = perfmon_group_detach(group);
+        error_check(error, "perfmon_group_detach");
+        thread->state = TEST_LAUNCHED;
+        error = test_thread_create_monitored_thread(&thread->thread, index,
+                                                    thread);
+        error_check(error, "thread_recreate monitored");
+        error = perfmon_group_attach(group, thread->thread);
+        error_check(error, "perfmon_group_attach");
+        stats->num_thread_start++;
+        stats->num_thread_started++;
+        break;
+    default:
+        /* Do nothing if the thread is not in a stable state */
+        break;
+    }
+}
+
+static void
+test_x15_test_control_run(void *arg)
+{
+    size_t selected_thread;
+    size_t stopped_thread;
+    struct test_thread *thread;
+    size_t nr_threads;
+    size_t loop_since_status;
+    struct threads_stats stats;
+
+    (void)arg;
+    nr_threads = MAX(cpu_count() - 1, 1);
+    selected_thread = 0;
+    stopped_thread = 0;
+    loop_since_status = 0;
+    stats.num_group_start = 0;
+    stats.num_group_started = 0;
+    stats.num_thread_start = nr_threads;
+    stats.num_thread_started = nr_threads;
+
+    printf("monitoring %zu threads\n", nr_threads);
+
+    for (;;) {
+        /* Dummy `random` thread selection. */
+        selected_thread = (selected_thread + 7) % nr_threads;
+        thread = test_threads[selected_thread];
+        test_thread_toggle_monitor(thread, &stats);
+
+        /* only half of the threads may be stopped / restarted */
+        stopped_thread = (stopped_thread + 11) % ((nr_threads  + 1) / 2);
+        test_thread_toggle_state(stopped_thread, &stats);
+
+        test_wait();
+        if (!(++loop_since_status % TEST_NUM_LOOP_STATUS_PRINT)) {
+            printf("===============================\n");
+            printf("%zu groups started (%zu total)\n", stats.num_group_started,
+                   stats.num_group_start);
+            printf("%zu threads started (%zu total)\n",
+                   stats.num_thread_started, stats.num_thread_start);
+            printf("monitor value: ");
+            for (size_t i = 0; i < nr_threads; i++) {
+                printf("%zu: %llu, ", i, test_threads[i]->count);
+            }
+            printf("\n");
+        }
+    }
+}
+
+static struct test_thread *
+test_thread_create(size_t index)
+{
+    struct test_thread *thread;
+    int error;
+
+    thread = kmem_zalloc(sizeof(*thread));
+
+    if (thread == NULL) {
+        panic("thread allocation failed");
+    }
+
+    error = test_thread_create_monitored_thread(&thread->thread, index, thread);
+    error_check(error, "thread_create");
+    error = perfmon_group_create(&thread->group);
+    error_check(error, "perfmon_group_create");
+    error = perfmon_event_create(&thread->event, PERFMON_ET_GENERIC,
+                                 PERFMON_EV_CYCLE, PERFMON_EF_KERN);
+    error_check(error, "perfmon_event_create");
+
+    perfmon_group_add(thread->group, thread->event);
+    error = perfmon_group_attach(thread->group, thread->thread);
+    error_check(error, "perfmon_group_attach");
+
+    return thread;
+}
+
+void
+test_setup(void)
+{
+    struct thread_attr attr;
+    size_t nr_threads;
+    size_t i;
+    int error;
+
+    nr_threads = MAX(cpu_count() - 1, 1);
+
+    test_threads = kmem_alloc(nr_threads * sizeof(*test_threads));
+    for (i = 0; i < nr_threads; i++) {
+        test_threads[i] =  test_thread_create(i);
+    }
+
+    thread_attr_init(&attr, "15_test_control_thread");
+    thread_attr_set_detached(&attr);
+    error = thread_create(&test_control, &attr, test_x15_test_control_run,
+                          NULL);
+    error_check(error, "thread_create control");
+}
diff --git a/tools/build_configs.py b/tools/build_configs.py
index b0674b7..ba872d4 100755
--- a/tools/build_configs.py
+++ b/tools/build_configs.py
@@ -94,6 +94,7 @@ small_options_dict = {
     'CONFIG_SMP'                    : ['y', 'n'],
     'CONFIG_MAX_CPUS'               : ['1', '128'],
     'CONFIG_ASSERT'                 : ['y', 'n'],
+    'CONFIG_PERFMON'                : ['y', 'n'],
 }
 
 large_options_dict = dict(small_options_dict)