diff options
author | Richard Braun <rbraun@sceen.net> | 2014-09-03 21:22:26 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2014-09-03 21:22:26 +0200 |
commit | eed59b8076e7668b5e0f874bd3ed28230f470bb1 (patch) | |
tree | d5b7ddf4c01d3c8b9bbcd4fe957c0b40522ac15b | |
parent | 7fa931d4e56e8926058c0a2a10d731614dfd6f65 (diff) |
x86/cpu: improve percpu support
- declare CPU descriptors as percpu variables
- make the percpu segment register point to the percpu area instead of
the CPU descriptor
- remove the ugly accessors for the local CPU descriptor, pmap and TCB
and use percpu variables for them instead
- implement the cpu_local accessors as described in the percpu
documentation
-rw-r--r-- | arch/x86/machine/cpu.c | 156 | ||||
-rw-r--r-- | arch/x86/machine/cpu.h | 109 | ||||
-rw-r--r-- | arch/x86/machine/cpu_asm.S | 15 | ||||
-rw-r--r-- | arch/x86/machine/pmap.c | 8 | ||||
-rw-r--r-- | arch/x86/machine/pmap.h | 3 | ||||
-rw-r--r-- | arch/x86/machine/tcb.c | 2 | ||||
-rw-r--r-- | arch/x86/machine/tcb.h | 8 | ||||
-rw-r--r-- | kern/percpu.c | 1 |
8 files changed, 173 insertions, 129 deletions
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c index 73d7ca3d..0f362527 100644 --- a/arch/x86/machine/cpu.c +++ b/arch/x86/machine/cpu.c @@ -67,12 +67,17 @@ #define CPU_MP_CMOS_DATA_RESET_WARM 0x0a #define CPU_MP_CMOS_RESET_VECTOR 0x467 -struct cpu cpu_array[MAX_CPUS]; +void *cpu_local_area __percpu; /* - * Number of configured processors. + * Processor descriptor, one per CPU. */ -unsigned int cpu_array_size __read_mostly; +struct cpu cpu_desc __percpu; + +/* + * Number of active processors. + */ +unsigned int cpu_nr_active __read_mostly; /* * Interrupt descriptor table. @@ -81,6 +86,9 @@ static struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __aligned(8) __read_mostly; /* * Double fault handler, and stack for the main processor. + * + * TODO Declare as init data, and replace the BSP stack with kernel virtual + * memory. */ static unsigned long cpu_double_fault_handler; static char cpu_double_fault_stack[STACK_SIZE] __aligned(DATA_ALIGN); @@ -88,7 +96,16 @@ static char cpu_double_fault_stack[STACK_SIZE] __aligned(DATA_ALIGN); unsigned long __init cpu_get_boot_stack(void) { - return cpu_array[boot_ap_id].boot_stack; + return percpu_var(cpu_desc.boot_stack, boot_ap_id); +} + +static void __init +cpu_preinit(struct cpu *cpu, unsigned int id, unsigned int apic_id) +{ + cpu->id = id; + cpu->apic_id = apic_id; + cpu->state = CPU_STATE_OFF; + cpu->boot_stack = 0; } static void @@ -168,15 +185,43 @@ cpu_seg_set_tss(char *table, unsigned int selector, struct cpu_tss *tss) } /* - * Set the given GDT for the current processor, and reload its segment - * registers. + * Set the given GDT for the current processor. + * + * On i386, the ds, es and ss segment registers are reloaded. In any case, + * the gs segment register is set to the null selector. The fs segment + * register, which points to the percpu area, must be set separately. */ -void cpu_load_gdt(struct cpu *cpu, struct cpu_pseudo_desc *gdtr); +void cpu_load_gdt(struct cpu_pseudo_desc *gdtr); + +static inline void __init +cpu_set_percpu_area(const struct cpu *cpu, void *area) +{ +#ifdef __LP64__ + unsigned long va; + + va = (unsigned long)area; + cpu_set_msr(CPU_MSR_FSBASE, (uint32_t)va, (uint32_t)(va >> 32)); +#else /* __LP64__ */ + asm volatile("mov %0, %%fs" : : "r" (CPU_GDT_SEL_PERCPU)); +#endif /* __LP64__ */ + + percpu_var(cpu_local_area, cpu->id) = area; +} + +static void __init +cpu_init_gdtr(struct cpu_pseudo_desc *gdtr, const struct cpu *cpu) +{ + gdtr->address = (unsigned long)cpu->gdt; + gdtr->limit = sizeof(cpu->gdt) - 1; +} static void __init cpu_init_gdt(struct cpu *cpu) { struct cpu_pseudo_desc gdtr; + void *pcpu_area; + + pcpu_area = percpu_area(cpu->id); cpu_seg_set_null(cpu->gdt, CPU_GDT_SEL_NULL); cpu_seg_set_code(cpu->gdt, CPU_GDT_SEL_CODE); @@ -185,12 +230,12 @@ cpu_init_gdt(struct cpu *cpu) #ifndef __LP64__ cpu_seg_set_tss(cpu->gdt, CPU_GDT_SEL_DF_TSS, &cpu->double_fault_tss); - cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_CPU, (unsigned long)cpu); + cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_PERCPU, (unsigned long)pcpu_area); #endif /* __LP64__ */ - gdtr.address = (unsigned long)cpu->gdt; - gdtr.limit = sizeof(cpu->gdt) - 1; - cpu_load_gdt(cpu, &gdtr); + cpu_init_gdtr(&gdtr, cpu); + cpu_load_gdt(&gdtr); + cpu_set_percpu_area(cpu, pcpu_area); } static void __init @@ -235,7 +280,7 @@ cpu_init_double_fault_tss(struct cpu *cpu) tss->cs = CPU_GDT_SEL_CODE; tss->ss = CPU_GDT_SEL_DATA; tss->ds = CPU_GDT_SEL_DATA; - tss->fs = CPU_GDT_SEL_CPU; + tss->fs = CPU_GDT_SEL_PERCPU; } #endif /* __LP64__ */ @@ -297,9 +342,6 @@ cpu_cpuid(unsigned long *eax, unsigned long *ebx, unsigned long *ecx, /* * Initialize the given cpu structure for the current processor. - * - * On the BSP, this function is called before it can determine the cpu - * structure. It is part of its task to make it possible. */ static void __init cpu_init(struct cpu *cpu) @@ -411,19 +453,13 @@ cpu_init(struct cpu *cpu) void __init cpu_setup(void) { - size_t i; - - for (i = 0; i < ARRAY_SIZE(cpu_array); i++) { - cpu_array[i].self = &cpu_array[i]; - cpu_array[i].id = i; - cpu_array[i].apic_id = CPU_INVALID_APIC_ID; - cpu_array[i].state = CPU_STATE_OFF; - cpu_array[i].boot_stack = 0; - } + struct cpu *cpu; - cpu_array_size = 1; - cpu_array[0].double_fault_stack = (unsigned long)cpu_double_fault_stack; - cpu_init(&cpu_array[0]); + cpu = percpu_ptr(cpu_desc, 0); + cpu_preinit(cpu, 0, CPU_INVALID_APIC_ID); + cpu->double_fault_stack = (unsigned long)cpu_double_fault_stack; /* XXX */ + cpu_init(cpu); + cpu_nr_active = 1; } static void __init @@ -459,35 +495,54 @@ cpu_info(const struct cpu *cpu) } void __init +cpu_fixup_bsp_percpu_area(void) +{ + struct cpu_pseudo_desc gdtr; + struct cpu *cpu; + void *pcpu_area; + + /* + * It's important to use the percpu interface here, and not the cpu_local + * accessors : this function updates the GDTR (and the GDT on i386), as a + * result it must reference the future version of the GDT from the newly + * allocated percpu area. + */ + cpu = percpu_ptr(cpu_desc, 0); + pcpu_area = percpu_area(0); + +#ifndef __LP64__ + cpu_seg_set_data(cpu->gdt, CPU_GDT_SEL_PERCPU, (unsigned long)pcpu_area); +#endif /* __LP64__ */ + + cpu_init_gdtr(&gdtr, cpu); + cpu_load_gdt(&gdtr); + cpu_set_percpu_area(cpu, pcpu_area); +} + +void __init cpu_mp_register_lapic(unsigned int apic_id, int is_bsp) { - static int skip_warning __initdata; + struct cpu *cpu; int error; if (is_bsp) { - if (cpu_array[0].apic_id != CPU_INVALID_APIC_ID) - panic("cpu: another processor pretends to be the BSP"); + cpu = percpu_ptr(cpu_desc, 0); - cpu_array[0].apic_id = apic_id; - return; - } - - if (cpu_array_size == ARRAY_SIZE(cpu_array)) { - if (!skip_warning) { - printk("cpu: ignoring processor beyond id %u\n", MAX_CPUS - 1); - skip_warning = 1; - } + if (cpu->apic_id != CPU_INVALID_APIC_ID) + panic("cpu: another processor pretends to be the BSP"); + cpu->apic_id = apic_id; return; } - error = percpu_add(cpu_array_size); + error = percpu_add(cpu_nr_active); if (error) return; - cpu_array[cpu_array_size].apic_id = apic_id; - cpu_array_size++; + cpu = percpu_ptr(cpu_desc, cpu_nr_active); + cpu_preinit(cpu, cpu_nr_active, apic_id); + cpu_nr_active++; } void __init @@ -501,7 +556,7 @@ cpu_mp_probe(void) if (error) panic("cpu: ACPI required to initialize local APIC"); - printk("cpu: %u processor(s) configured\n", cpu_array_size); + printk("cpu: %u processor(s) configured\n", cpu_count()); } void __init @@ -514,7 +569,7 @@ cpu_mp_setup(void) size_t map_size; unsigned int i; - if (cpu_array_size == 1) { + if (cpu_count() == 1) { pmap_mp_setup(); return; } @@ -548,8 +603,8 @@ cpu_mp_setup(void) io_write_byte(CPU_MP_CMOS_PORT_REG, CPU_MP_CMOS_REG_RESET); io_write_byte(CPU_MP_CMOS_PORT_DATA, CPU_MP_CMOS_DATA_RESET_WARM); - for (i = 1; i < cpu_array_size; i++) { - cpu = &cpu_array[i]; + for (i = 1; i < cpu_count(); i++) { + cpu = percpu_ptr(cpu_desc, i); cpu->boot_stack = vm_kmem_alloc(STACK_SIZE); if (cpu->boot_stack == 0) @@ -568,8 +623,8 @@ cpu_mp_setup(void) */ pmap_mp_setup(); - for (i = 1; i < cpu_array_size; i++) { - cpu = &cpu_array[i]; + for (i = 1; i < cpu_count(); i++) { + cpu = percpu_ptr(cpu_desc, i); boot_ap_id = i; /* Perform the "Universal Start-up Algorithm" */ @@ -590,7 +645,10 @@ cpu_mp_setup(void) void __init cpu_ap_setup(void) { - cpu_init(&cpu_array[boot_ap_id]); + struct cpu *cpu; + + cpu = percpu_ptr(cpu_desc, boot_ap_id); + cpu_init(cpu); cpu_check(cpu_current()); lapic_ap_setup(); } diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h index 08420ac6..52a2af03 100644 --- a/arch/x86/machine/cpu.h +++ b/arch/x86/machine/cpu.h @@ -30,7 +30,7 @@ #define CPU_GDT_SIZE 40 #else /* __LP64__ */ #define CPU_GDT_SEL_DF_TSS 32 -#define CPU_GDT_SEL_CPU 40 +#define CPU_GDT_SEL_PERCPU 40 #define CPU_GDT_SIZE 48 #endif /* __LP64__ */ @@ -92,6 +92,7 @@ #include <kern/assert.h> #include <kern/macros.h> #include <kern/param.h> +#include <kern/percpu.h> #include <kern/stddef.h> #include <kern/stdint.h> #include <machine/lapic.h> @@ -211,27 +212,12 @@ struct cpu_tss { } __packed; /* - * Forward declarations. - */ -struct tcb; -struct pmap; - -/* * CPU states. */ #define CPU_STATE_OFF 0 #define CPU_STATE_ON 1 -/* - * The fs segment register is used to store the address of the per-CPU data. - * As a result, they must be at least 16-bytes aligned. - */ -#define CPU_ALIGN (MAX(16, CPU_L1_SIZE)) - struct cpu { - struct cpu *self; - struct tcb *tcb; - struct pmap *pmap; unsigned int id; unsigned int apic_id; char vendor_id[CPU_VENDOR_ID_SIZE]; @@ -256,7 +242,7 @@ struct cpu { volatile int state; unsigned long boot_stack; unsigned long double_fault_stack; -} __aligned(CPU_ALIGN); +}; /* * Macro to create functions that read/write control registers. @@ -421,71 +407,68 @@ void cpu_halt_broadcast(void); void cpu_halt_intr(struct trap_frame *frame); /* - * Macros to create access functions for per-CPU pointers. - * - * Changing such a pointer should only be done by low level scheduling - * functions (e.g. context switching). Getting it is then migration-safe. + * This percpu variable contains the address of the percpu area for the local + * processor. This is normally the same value stored in the percpu module, but + * it can be directly accessed through a segment register. */ -#ifdef __LP64__ -#define CPU_ASM_MOV "movq" -#else /* __LP64__ */ -#define CPU_ASM_MOV "movl" -#endif /* __LP64__ */ +extern void *cpu_local_area; -#define CPU_DECL_PERCPU(type, member) \ -static __always_inline type * \ -cpu_percpu_get_ ## member(void) \ -{ \ - type *ptr; \ - \ - asm volatile(CPU_ASM_MOV " %%fs:%1, %0" \ - : "=r" (ptr) \ - : "m" (*(type **)offsetof(struct cpu, member))); \ - return ptr; \ -} \ - \ -static __always_inline void \ -cpu_percpu_set_ ## member(type *ptr) \ -{ \ - asm volatile(CPU_ASM_MOV " %0, %%fs:%1" \ - : : "ri" (ptr), \ - "m" (*(type **)offsetof(struct cpu, member))); \ -} +#define cpu_local_ptr(var) \ +MACRO_BEGIN \ + typeof(var) *___ptr = &(var); \ + \ + asm volatile("add %%fs:%1, %0" \ + : "+r" (___ptr) \ + : "m" (cpu_local_area)); \ + \ + ___ptr; \ +MACRO_END + +#define cpu_local_var(var) (*cpu_local_ptr(var)) + +/* Interrupt-safe percpu accessors for basic types */ -CPU_DECL_PERCPU(struct cpu, self) -CPU_DECL_PERCPU(struct tcb, tcb) -CPU_DECL_PERCPU(struct pmap, pmap) +#define cpu_local_assign(var, val) \ + asm volatile("mov %0, %%fs:%1" \ + : : "r" (val), "m" (var)); + +#define cpu_local_read(var) \ +MACRO_BEGIN \ + typeof(var) ___val; \ + \ + asm volatile("mov %%fs:%1, %0" \ + : "=r" (___val) \ + : "m" (var)); \ + \ + ___val; \ +MACRO_END static __always_inline struct cpu * cpu_current(void) { - return cpu_percpu_get_self(); + extern struct cpu cpu_desc; + return cpu_local_ptr(cpu_desc); } static __always_inline unsigned int cpu_id(void) { - unsigned int id; - - asm volatile("movl %%fs:%1, %0" - : "=r" (id) - : "m" (*(unsigned int *)offsetof(struct cpu, id))); - return id; + extern struct cpu cpu_desc; + return cpu_local_read(cpu_desc.id); } static __always_inline unsigned int cpu_count(void) { - extern unsigned int cpu_array_size; - return cpu_array_size; + extern unsigned int cpu_nr_active; + return cpu_nr_active; } static inline struct cpu * cpu_from_id(unsigned int cpu) { - extern struct cpu cpu_array[MAX_CPUS]; - assert(cpu < ARRAY_SIZE(cpu_array)); - return &cpu_array[cpu]; + extern struct cpu cpu_desc; + return percpu_ptr(cpu_desc, cpu); } static __always_inline void @@ -610,6 +593,12 @@ void cpu_check(const struct cpu *cpu); void cpu_info(const struct cpu *cpu); /* + * Notify the cpu module that the true percpu area for the BSP has been + * created. + */ +void cpu_fixup_bsp_percpu_area(void); + +/* * Register the presence of a local APIC. */ void cpu_mp_register_lapic(unsigned int apic_id, int is_bsp); diff --git a/arch/x86/machine/cpu_asm.S b/arch/x86/machine/cpu_asm.S index 56e8e2fb..9f8c0773 100644 --- a/arch/x86/machine/cpu_asm.S +++ b/arch/x86/machine/cpu_asm.S @@ -23,9 +23,9 @@ ASM_ENTRY(cpu_load_gdt) #ifdef __LP64__ - lgdt (%rsi) + lgdt (%rdi) #else /* __LP64__ */ - movl 8(%esp), %eax + movl 4(%esp), %eax lgdt (%eax) #endif /* __LP64__ */ @@ -34,17 +34,6 @@ ASM_ENTRY(cpu_load_gdt) movl %eax, %es movl %eax, %ss -#ifdef __LP64__ - movq %rdi, %rax - movq %rdi, %rdx - shrq $32, %rdx - movq $CPU_MSR_FSBASE, %rcx - wrmsr -#else /* __LP64__ */ - movl $CPU_GDT_SEL_CPU, %eax - movl %eax, %fs -#endif /* __LP64__ */ - movl $CPU_GDT_SEL_NULL, %eax movl %eax, %gs diff --git a/arch/x86/machine/pmap.c b/arch/x86/machine/pmap.c index 680396f9..064f2440 100644 --- a/arch/x86/machine/pmap.c +++ b/arch/x86/machine/pmap.c @@ -145,6 +145,8 @@ static struct pmap kernel_pmap_store __read_mostly; struct pmap *kernel_pmap __read_mostly = &kernel_pmap_store; static struct pmap_cpu_table kernel_pmap_cpu_tables[MAX_CPUS] __read_mostly; +struct pmap *pmap_current_ptr __percpu; + #ifdef X86_PAE /* * Alignment required on page directory pointer tables. @@ -716,7 +718,7 @@ pmap_bootstrap(void) mutex_init(&cpu_table->lock); } - cpu_percpu_set_pmap(kernel_pmap); + cpu_local_assign(pmap_current_ptr, kernel_pmap); pmap_boot_heap = (unsigned long)&_end; pmap_boot_heap_current = pmap_boot_heap; @@ -777,7 +779,7 @@ pmap_bootstrap(void) void __init pmap_ap_bootstrap(void) { - cpu_percpu_set_pmap(kernel_pmap); + cpu_local_assign(pmap_current_ptr, kernel_pmap); if (cpu_has_global_pages()) cpu_enable_global_pages(); @@ -1667,7 +1669,7 @@ pmap_load(struct pmap *pmap) /* TODO Lazy TLB invalidation */ - cpu_percpu_set_pmap(pmap); + cpu_local_assign(pmap_current_ptr, pmap); /* TODO Implement per-CPU page tables for non-kernel pmaps */ cpu_table = pmap->cpu_tables[cpu_id()]; diff --git a/arch/x86/machine/pmap.h b/arch/x86/machine/pmap.h index a494b2db..4f719cf1 100644 --- a/arch/x86/machine/pmap.h +++ b/arch/x86/machine/pmap.h @@ -269,7 +269,8 @@ void pmap_load(struct pmap *pmap); static inline struct pmap * pmap_current(void) { - return cpu_percpu_get_pmap(); + extern struct pmap *pmap_current_ptr; + return cpu_local_read(pmap_current_ptr); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/machine/tcb.c b/arch/x86/machine/tcb.c index 6ba015aa..a154be11 100644 --- a/arch/x86/machine/tcb.c +++ b/arch/x86/machine/tcb.c @@ -30,6 +30,8 @@ void __noreturn tcb_context_load(struct tcb *tcb); void __noreturn tcb_start(void); +struct tcb *tcb_current_ptr __percpu; + int tcb_init(struct tcb *tcb, void *stack, void (*fn)(void)) { diff --git a/arch/x86/machine/tcb.h b/arch/x86/machine/tcb.h index 79f953cb..5f455850 100644 --- a/arch/x86/machine/tcb.h +++ b/arch/x86/machine/tcb.h @@ -26,7 +26,7 @@ #include <machine/cpu.h> /* - * Architecture specific thread data. + * Thread control block. */ struct tcb { unsigned long bp; @@ -52,13 +52,15 @@ void tcb_context_switch(struct tcb *prev, struct tcb *next); static inline struct tcb * tcb_current(void) { - return cpu_percpu_get_tcb(); + extern struct tcb *tcb_current_ptr; + return cpu_local_read(tcb_current_ptr); } static inline void tcb_set_current(struct tcb *tcb) { - cpu_percpu_set_tcb(tcb); + extern struct tcb *tcb_current_ptr; + cpu_local_assign(tcb_current_ptr, tcb); } /* diff --git a/kern/percpu.c b/kern/percpu.c index 99398e0a..aab9d064 100644 --- a/kern/percpu.c +++ b/kern/percpu.c @@ -59,6 +59,7 @@ percpu_setup(void) percpu_areas[0] = (void *)va; memcpy(percpu_area(0), &_percpu, percpu_size); + cpu_fixup_bsp_percpu_area(); } int __init |