summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2014-09-03 21:22:26 +0200
committerRichard Braun <rbraun@sceen.net>2014-09-03 21:22:26 +0200
commiteed59b8076e7668b5e0f874bd3ed28230f470bb1 (patch)
treed5b7ddf4c01d3c8b9bbcd4fe957c0b40522ac15b
parent7fa931d4e56e8926058c0a2a10d731614dfd6f65 (diff)
x86/cpu: improve percpu support
- declare CPU descriptors as percpu variables - make the percpu segment register point to the percpu area instead of the CPU descriptor - remove the ugly accessors for the local CPU descriptor, pmap and TCB and use percpu variables for them instead - implement the cpu_local accessors as described in the percpu documentation
-rw-r--r--arch/x86/machine/cpu.c156
-rw-r--r--arch/x86/machine/cpu.h109
-rw-r--r--arch/x86/machine/cpu_asm.S15
-rw-r--r--arch/x86/machine/pmap.c8
-rw-r--r--arch/x86/machine/pmap.h3
-rw-r--r--arch/x86/machine/tcb.c2
-rw-r--r--arch/x86/machine/tcb.h8
-rw-r--r--kern/percpu.c1
8 files changed, 173 insertions, 129 deletions
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c
index 73d7ca3d..0f362527 100644
--- a/arch/x86/machine/cpu.c
+++ b/arch/x86/machine/cpu.c
@@ -67,12 +67,17 @@
#define CPU_MP_CMOS_DATA_RESET_WARM 0x0a
#define CPU_MP_CMOS_RESET_VECTOR 0x467
-struct cpu cpu_array[MAX_CPUS];
+void *cpu_local_area __percpu;
/*
- * Number of configured processors.
+ * Processor descriptor, one per CPU.
*/
-unsigned int cpu_array_size __read_mostly;
+struct cpu cpu_desc __percpu;
+
+/*
+ * Number of active processors.
+ */
+unsigned int cpu_nr_active __read_mostly;
/*
* Interrupt descriptor table.
@@ -81,6 +86,9 @@ static struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __aligned(8) __read_mostly;
/*
* Double fault handler, and stack for the main processor.
+ *
+ * TODO Declare as init data, and replace the BSP stack with kernel virtual
+ * memory.
*/
static unsigned long cpu_double_fault_handler;
static char cpu_double_fault_stack[STACK_SIZE] __aligned(DATA_ALIGN);
@@ -88,7 +96,16 @@ static char cpu_double_fault_stack[STACK_SIZE] __aligned(DATA_ALIGN);
unsigned long __init
cpu_get_boot_stack(void)
{
- return cpu_array[boot_ap_id].boot_stack;
+ return percpu_var(cpu_desc.boot_stack, boot_ap_id);
+}
+
+static void __init
+cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id)
+{
+ cpu->id = id;
+ cpu->apic_id = apic_id;
+ cpu->state = CPU_STATE_OFF;
+ cpu->boot_stack = 0;
}
static void
@@ -168,15 +185,43 @@ cpu_seg_set_tss(char *table, unsigned int selector, struct cpu_tss *tss)
}
/*
- * Set the given GDT for the current processor, and reload its segment
- * registers.
+ * Set the given GDT for the current processor.
+ *
+ * On i386, the ds, es and ss segment registers are reloaded. In any case,
+ * the gs segment register is set to the null selector. The fs segment
+ * register, which points to the percpu area, must be set separately.
*/
-void cpu_load_gdt(struct cpu *cpu, struct cpu_pseudo_desc *gdtr);
+void cpu_load_gdt(struct cpu_pseudo_desc *gdtr);
+
+static inline void __init
+cpu_set_percpu_area(const struct cpu *cpu, void *area)
+{
+#ifdef __LP64__
+ unsigned long va;
+
+ va = (unsigned long)area;
+ cpu_set_msr(CPU_MSR_FSBASE, (uint32_t)va, (uint32_t)(va >> 32));
+#else /* __LP64__ */
+ asm volatile("mov %0, %%fs" : : "r" (CPU_GDT_SEL_PERCPU));
+#endif /* __LP64__ */
+
+ percpu_var(cpu_local_area, cpu->id) = area;
+}
+
+static void __init
+cpu_init_gdtr(struct cpu_pseudo_desc *gdtr, const struct cpu *cpu)
+{
+ gdtr->address = (unsigned long)cpu->gdt;
+ gdtr->limit = sizeof(cpu->gdt) - 1;
+}
static void __init
cpu_init_gdt(struct cpu *cpu)
{
struct cpu_pseudo_desc gdtr;
+ void *pcpu_area;
+
+ pcpu_area = percpu_area(cpu->id);
cpu_seg_set_null(cpu->gdt, CPU_GDT_SEL_NULL);
cpu_seg_set_code(cpu->gdt, CPU_GDT_SEL_CODE);
@@ -185,12 +230,12 @@ cpu_init_gdt(struct cpu *cpu)
#ifndef __LP64__
cpu_seg_set_tss(cpu->gdt, CPU_GDT_SEL_DF_TSS, &cpu->double_fault_tss);
- cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_CPU, (unsigned long)cpu);
+ cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_PERCPU, (unsigned long)pcpu_area);
#endif /* __LP64__ */
- gdtr.address = (unsigned long)cpu->gdt;
- gdtr.limit = sizeof(cpu->gdt) - 1;
- cpu_load_gdt(cpu, &gdtr);
+ cpu_init_gdtr(&gdtr, cpu);
+ cpu_load_gdt(&gdtr);
+ cpu_set_percpu_area(cpu, pcpu_area);
}
static void __init
@@ -235,7 +280,7 @@ cpu_init_double_fault_tss(struct cpu *cpu)
tss->cs = CPU_GDT_SEL_CODE;
tss->ss = CPU_GDT_SEL_DATA;
tss->ds = CPU_GDT_SEL_DATA;
- tss->fs = CPU_GDT_SEL_CPU;
+ tss->fs = CPU_GDT_SEL_PERCPU;
}
#endif /* __LP64__ */
@@ -297,9 +342,6 @@ cpu_cpuid(unsigned long *eax, unsigned long *ebx, unsigned long *ecx,
/*
* Initialize the given cpu structure for the current processor.
- *
- * On the BSP, this function is called before it can determine the cpu
- * structure. It is part of its task to make it possible.
*/
static void __init
cpu_init(struct cpu *cpu)
@@ -411,19 +453,13 @@ cpu_init(struct cpu *cpu)
void __init
cpu_setup(void)
{
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(cpu_array); i++) {
- cpu_array[i].self = &cpu_array[i];
- cpu_array[i].id = i;
- cpu_array[i].apic_id = CPU_INVALID_APIC_ID;
- cpu_array[i].state = CPU_STATE_OFF;
- cpu_array[i].boot_stack = 0;
- }
+ struct cpu *cpu;
- cpu_array_size = 1;
- cpu_array[0].double_fault_stack = (unsigned long)cpu_double_fault_stack;
- cpu_init(&cpu_array[0]);
+ cpu = percpu_ptr(cpu_desc, 0);
+ cpu_preinit(cpu, 0, CPU_INVALID_APIC_ID);
+ cpu->double_fault_stack = (unsigned long)cpu_double_fault_stack; /* XXX */
+ cpu_init(cpu);
+ cpu_nr_active = 1;
}
static void __init
@@ -459,35 +495,54 @@ cpu_info(const struct cpu *cpu)
}
void __init
+cpu_fixup_bsp_percpu_area(void)
+{
+ struct cpu_pseudo_desc gdtr;
+ struct cpu *cpu;
+ void *pcpu_area;
+
+ /*
+ * It's important to use the percpu interface here, and not the cpu_local
+ * accessors : this function updates the GDTR (and the GDT on i386), as a
+ * result it must reference the future version of the GDT from the newly
+ * allocated percpu area.
+ */
+ cpu = percpu_ptr(cpu_desc, 0);
+ pcpu_area = percpu_area(0);
+
+#ifndef __LP64__
+ cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_PERCPU, (unsigned long)pcpu_area);
+#endif /* __LP64__ */
+
+ cpu_init_gdtr(&gdtr, cpu);
+ cpu_load_gdt(&gdtr);
+ cpu_set_percpu_area(cpu, pcpu_area);
+}
+
+void __init
cpu_mp_register_lapic(unsigned int apic_id, int is_bsp)
{
- static int skip_warning __initdata;
+ struct cpu *cpu;
int error;
if (is_bsp) {
- if (cpu_array[0].apic_id != CPU_INVALID_APIC_ID)
- panic("cpu: another processor pretends to be the BSP");
+ cpu = percpu_ptr(cpu_desc, 0);
- cpu_array[0].apic_id = apic_id;
- return;
- }
-
- if (cpu_array_size == ARRAY_SIZE(cpu_array)) {
- if (!skip_warning) {
- printk("cpu: ignoring processor beyond id %u\n", MAX_CPUS - 1);
- skip_warning = 1;
- }
+ if (cpu->apic_id != CPU_INVALID_APIC_ID)
+ panic("cpu: another processor pretends to be the BSP");
+ cpu->apic_id = apic_id;
return;
}
- error = percpu_add(cpu_array_size);
+ error = percpu_add(cpu_nr_active);
if (error)
return;
- cpu_array[cpu_array_size].apic_id = apic_id;
- cpu_array_size++;
+ cpu = percpu_ptr(cpu_desc, cpu_nr_active);
+ cpu_preinit(cpu, cpu_nr_active, apic_id);
+ cpu_nr_active++;
}
void __init
@@ -501,7 +556,7 @@ cpu_mp_probe(void)
if (error)
panic("cpu: ACPI required to initialize local APIC");
- printk("cpu: %u processor(s) configured\n", cpu_array_size);
+ printk("cpu: %u processor(s) configured\n", cpu_count());
}
void __init
@@ -514,7 +569,7 @@ cpu_mp_setup(void)
size_t map_size;
unsigned int i;
- if (cpu_array_size == 1) {
+ if (cpu_count() == 1) {
pmap_mp_setup();
return;
}
@@ -548,8 +603,8 @@ cpu_mp_setup(void)
io_write_byte(CPU_MP_CMOS_PORT_REG, CPU_MP_CMOS_REG_RESET);
io_write_byte(CPU_MP_CMOS_PORT_DATA, CPU_MP_CMOS_DATA_RESET_WARM);
- for (i = 1; i < cpu_array_size; i++) {
- cpu = &cpu_array[i];
+ for (i = 1; i < cpu_count(); i++) {
+ cpu = percpu_ptr(cpu_desc, i);
cpu->boot_stack = vm_kmem_alloc(STACK_SIZE);
if (cpu->boot_stack == 0)
@@ -568,8 +623,8 @@ cpu_mp_setup(void)
*/
pmap_mp_setup();
- for (i = 1; i < cpu_array_size; i++) {
- cpu = &cpu_array[i];
+ for (i = 1; i < cpu_count(); i++) {
+ cpu = percpu_ptr(cpu_desc, i);
boot_ap_id = i;
/* Perform the "Universal Start-up Algorithm" */
@@ -590,7 +645,10 @@ cpu_mp_setup(void)
void __init
cpu_ap_setup(void)
{
- cpu_init(&cpu_array[boot_ap_id]);
+ struct cpu *cpu;
+
+ cpu = percpu_ptr(cpu_desc, boot_ap_id);
+ cpu_init(cpu);
cpu_check(cpu_current());
lapic_ap_setup();
}
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index 08420ac6..52a2af03 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -30,7 +30,7 @@
#define CPU_GDT_SIZE 40
#else /* __LP64__ */
#define CPU_GDT_SEL_DF_TSS 32
-#define CPU_GDT_SEL_CPU 40
+#define CPU_GDT_SEL_PERCPU 40
#define CPU_GDT_SIZE 48
#endif /* __LP64__ */
@@ -92,6 +92,7 @@
#include <kern/assert.h>
#include <kern/macros.h>
#include <kern/param.h>
+#include <kern/percpu.h>
#include <kern/stddef.h>
#include <kern/stdint.h>
#include <machine/lapic.h>
@@ -211,27 +212,12 @@ struct cpu_tss {
} __packed;
/*
- * Forward declarations.
- */
-struct tcb;
-struct pmap;
-
-/*
* CPU states.
*/
#define CPU_STATE_OFF 0
#define CPU_STATE_ON 1
-/*
- * The fs segment register is used to store the address of the per-CPU data.
- * As a result, they must be at least 16-bytes aligned.
- */
-#define CPU_ALIGN (MAX(16, CPU_L1_SIZE))
-
struct cpu {
- struct cpu *self;
- struct tcb *tcb;
- struct pmap *pmap;
unsigned int id;
unsigned int apic_id;
char vendor_id[CPU_VENDOR_ID_SIZE];
@@ -256,7 +242,7 @@ struct cpu {
volatile int state;
unsigned long boot_stack;
unsigned long double_fault_stack;
-} __aligned(CPU_ALIGN);
+};
/*
* Macro to create functions that read/write control registers.
@@ -421,71 +407,68 @@ void cpu_halt_broadcast(void);
void cpu_halt_intr(struct trap_frame *frame);
/*
- * Macros to create access functions for per-CPU pointers.
- *
- * Changing such a pointer should only be done by low level scheduling
- * functions (e.g. context switching). Getting it is then migration-safe.
+ * This percpu variable contains the address of the percpu area for the local
+ * processor. This is normally the same value stored in the percpu module, but
+ * it can be directly accessed through a segment register.
*/
-#ifdef __LP64__
-#define CPU_ASM_MOV "movq"
-#else /* __LP64__ */
-#define CPU_ASM_MOV "movl"
-#endif /* __LP64__ */
+extern void *cpu_local_area;
-#define CPU_DECL_PERCPU(type, member) \
-static __always_inline type * \
-cpu_percpu_get_ ## member(void) \
-{ \
- type *ptr; \
- \
- asm volatile(CPU_ASM_MOV " %%fs:%1, %0" \
- : "=r" (ptr) \
- : "m" (*(type **)offsetof(struct cpu, member))); \
- return ptr; \
-} \
- \
-static __always_inline void \
-cpu_percpu_set_ ## member(type *ptr) \
-{ \
- asm volatile(CPU_ASM_MOV " %0, %%fs:%1" \
- : : "ri" (ptr), \
- "m" (*(type **)offsetof(struct cpu, member))); \
-}
+#define cpu_local_ptr(var) \
+MACRO_BEGIN \
+ typeof(var) *___ptr = &(var); \
+ \
+ asm volatile("add %%fs:%1, %0" \
+ : "+r" (___ptr) \
+ : "m" (cpu_local_area)); \
+ \
+ ___ptr; \
+MACRO_END
+
+#define cpu_local_var(var) (*cpu_local_ptr(var))
+
+/* Interrupt-safe percpu accessors for basic types */
-CPU_DECL_PERCPU(struct cpu, self)
-CPU_DECL_PERCPU(struct tcb, tcb)
-CPU_DECL_PERCPU(struct pmap, pmap)
+#define cpu_local_assign(var, val) \
+ asm volatile("mov %0, %%fs:%1" \
+ : : "r" (val), "m" (var));
+
+#define cpu_local_read(var) \
+MACRO_BEGIN \
+ typeof(var) ___val; \
+ \
+ asm volatile("mov %%fs:%1, %0" \
+ : "=r" (___val) \
+ : "m" (var)); \
+ \
+ ___val; \
+MACRO_END
static __always_inline struct cpu *
cpu_current(void)
{
- return cpu_percpu_get_self();
+ extern struct cpu cpu_desc;
+ return cpu_local_ptr(cpu_desc);
}
static __always_inline unsigned int
cpu_id(void)
{
- unsigned int id;
-
- asm volatile("movl %%fs:%1, %0"
- : "=r" (id)
- : "m" (*(unsigned int *)offsetof(struct cpu, id)));
- return id;
+ extern struct cpu cpu_desc;
+ return cpu_local_read(cpu_desc.id);
}
static __always_inline unsigned int
cpu_count(void)
{
- extern unsigned int cpu_array_size;
- return cpu_array_size;
+ extern unsigned int cpu_nr_active;
+ return cpu_nr_active;
}
static inline struct cpu *
cpu_from_id(unsigned int cpu)
{
- extern struct cpu cpu_array[MAX_CPUS];
- assert(cpu < ARRAY_SIZE(cpu_array));
- return &cpu_array[cpu];
+ extern struct cpu cpu_desc;
+ return percpu_ptr(cpu_desc, cpu);
}
static __always_inline void
@@ -610,6 +593,12 @@ void cpu_check(const struct cpu *cpu);
void cpu_info(const struct cpu *cpu);
/*
+ * Notify the cpu module that the true percpu area for the BSP has been
+ * created.
+ */
+void cpu_fixup_bsp_percpu_area(void);
+
+/*
* Register the presence of a local APIC.
*/
void cpu_mp_register_lapic(unsigned int apic_id, int is_bsp);
diff --git a/arch/x86/machine/cpu_asm.S b/arch/x86/machine/cpu_asm.S
index 56e8e2fb..9f8c0773 100644
--- a/arch/x86/machine/cpu_asm.S
+++ b/arch/x86/machine/cpu_asm.S
@@ -23,9 +23,9 @@
ASM_ENTRY(cpu_load_gdt)
#ifdef __LP64__
- lgdt (%rsi)
+ lgdt (%rdi)
#else /* __LP64__ */
- movl 8(%esp), %eax
+ movl 4(%esp), %eax
lgdt (%eax)
#endif /* __LP64__ */
@@ -34,17 +34,6 @@ ASM_ENTRY(cpu_load_gdt)
movl %eax, %es
movl %eax, %ss
-#ifdef __LP64__
- movq %rdi, %rax
- movq %rdi, %rdx
- shrq $32, %rdx
- movq $CPU_MSR_FSBASE, %rcx
- wrmsr
-#else /* __LP64__ */
- movl $CPU_GDT_SEL_CPU, %eax
- movl %eax, %fs
-#endif /* __LP64__ */
-
movl $CPU_GDT_SEL_NULL, %eax
movl %eax, %gs
diff --git a/arch/x86/machine/pmap.c b/arch/x86/machine/pmap.c
index 680396f9..064f2440 100644
--- a/arch/x86/machine/pmap.c
+++ b/arch/x86/machine/pmap.c
@@ -145,6 +145,8 @@ static struct pmap kernel_pmap_store __read_mostly;
struct pmap *kernel_pmap __read_mostly = &kernel_pmap_store;
static struct pmap_cpu_table kernel_pmap_cpu_tables[MAX_CPUS] __read_mostly;
+struct pmap *pmap_current_ptr __percpu;
+
#ifdef X86_PAE
/*
* Alignment required on page directory pointer tables.
@@ -716,7 +718,7 @@ pmap_bootstrap(void)
mutex_init(&cpu_table->lock);
}
- cpu_percpu_set_pmap(kernel_pmap);
+ cpu_local_assign(pmap_current_ptr, kernel_pmap);
pmap_boot_heap = (unsigned long)&_end;
pmap_boot_heap_current = pmap_boot_heap;
@@ -777,7 +779,7 @@ pmap_bootstrap(void)
void __init
pmap_ap_bootstrap(void)
{
- cpu_percpu_set_pmap(kernel_pmap);
+ cpu_local_assign(pmap_current_ptr, kernel_pmap);
if (cpu_has_global_pages())
cpu_enable_global_pages();
@@ -1667,7 +1669,7 @@ pmap_load(struct pmap *pmap)
/* TODO Lazy TLB invalidation */
- cpu_percpu_set_pmap(pmap);
+ cpu_local_assign(pmap_current_ptr, pmap);
/* TODO Implement per-CPU page tables for non-kernel pmaps */
cpu_table = pmap->cpu_tables[cpu_id()];
diff --git a/arch/x86/machine/pmap.h b/arch/x86/machine/pmap.h
index a494b2db..4f719cf1 100644
--- a/arch/x86/machine/pmap.h
+++ b/arch/x86/machine/pmap.h
@@ -269,7 +269,8 @@ void pmap_load(struct pmap *pmap);
static inline struct pmap *
pmap_current(void)
{
- return cpu_percpu_get_pmap();
+ extern struct pmap *pmap_current_ptr;
+ return cpu_local_read(pmap_current_ptr);
}
#endif /* __ASSEMBLER__ */
diff --git a/arch/x86/machine/tcb.c b/arch/x86/machine/tcb.c
index 6ba015aa..a154be11 100644
--- a/arch/x86/machine/tcb.c
+++ b/arch/x86/machine/tcb.c
@@ -30,6 +30,8 @@
void __noreturn tcb_context_load(struct tcb *tcb);
void __noreturn tcb_start(void);
+struct tcb *tcb_current_ptr __percpu;
+
int
tcb_init(struct tcb *tcb, void *stack, void (*fn)(void))
{
diff --git a/arch/x86/machine/tcb.h b/arch/x86/machine/tcb.h
index 79f953cb..5f455850 100644
--- a/arch/x86/machine/tcb.h
+++ b/arch/x86/machine/tcb.h
@@ -26,7 +26,7 @@
#include <machine/cpu.h>
/*
- * Architecture specific thread data.
+ * Thread control block.
*/
struct tcb {
unsigned long bp;
@@ -52,13 +52,15 @@ void tcb_context_switch(struct tcb *prev, struct tcb *next);
static inline struct tcb *
tcb_current(void)
{
- return cpu_percpu_get_tcb();
+ extern struct tcb *tcb_current_ptr;
+ return cpu_local_read(tcb_current_ptr);
}
static inline void
tcb_set_current(struct tcb *tcb)
{
- cpu_percpu_set_tcb(tcb);
+ extern struct tcb *tcb_current_ptr;
+ cpu_local_assign(tcb_current_ptr, tcb);
}
/*
diff --git a/kern/percpu.c b/kern/percpu.c
index 99398e0a..aab9d064 100644
--- a/kern/percpu.c
+++ b/kern/percpu.c
@@ -59,6 +59,7 @@ percpu_setup(void)
percpu_areas[0] = (void *)va;
memcpy(percpu_area(0), &_percpu, percpu_size);
+ cpu_fixup_bsp_percpu_area();
}
int __init