/* * Copyright (c) 2010-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Delay used for frequency measurement, in microseconds. */ #define CPU_FREQ_CAL_DELAY 1000000 #define CPU_CPUID_TYPE_MASK 0x00003000 #define CPU_CPUID_TYPE_SHIFT 12 #define CPU_CPUID_FAMILY_MASK 0x00000f00 #define CPU_CPUID_FAMILY_SHIFT 8 #define CPU_CPUID_EXTFAMILY_MASK 0x0ff00000 #define CPU_CPUID_EXTFAMILY_SHIFT 20 #define CPU_CPUID_MODEL_MASK 0x000000f0 #define CPU_CPUID_MODEL_SHIFT 4 #define CPU_CPUID_EXTMODEL_MASK 0x000f0000 #define CPU_CPUID_EXTMODEL_SHIFT 16 #define CPU_CPUID_STEPPING_MASK 0x0000000f #define CPU_CPUID_STEPPING_SHIFT 0 #define CPU_CPUID_BRAND_MASK 0x000000ff #define CPU_CPUID_BRAND_SHIFT 0 #define CPU_CPUID_CLFLUSH_MASK 0x0000ff00 #define CPU_CPUID_CLFLUSH_SHIFT 8 #define CPU_CPUID_APIC_ID_MASK 0xff000000 #define CPU_CPUID_APIC_ID_SHIFT 24 #define CPU_INVALID_APIC_ID ((unsigned int)-1) struct cpu_vendor { unsigned int id; const char *str; }; /* * IST indexes (0 means no stack switch). */ #define CPU_TSS_IST_INTR 1 #define CPU_TSS_IST_DF 2 /* * MP related CMOS ports, registers and values. */ #define CPU_MP_CMOS_PORT_REG 0x70 #define CPU_MP_CMOS_PORT_DATA 0x71 #define CPU_MP_CMOS_REG_RESET 0x0f #define CPU_MP_CMOS_DATA_RESET_WARM 0x0a #define CPU_MP_CMOS_RESET_VECTOR 0x467 /* * Priority of the shutdown operations. * * Last resort, lower than everything else. */ #define CPU_SHUTDOWN_PRIORITY 0 struct cpu_gate_desc { uint32_t word1; uint32_t word2; #ifdef __LP64__ uint32_t word3; uint32_t word4; #endif /* __LP64__ */ }; struct cpu_idt { alignas(CPU_L1_SIZE) struct cpu_gate_desc descs[CPU_NR_EXC_VECTORS]; }; struct cpu_pseudo_desc { uint16_t limit; uintptr_t address; } __packed; #ifdef __LP64__ #define cpu_exc_frame_attrs #else /* __LP64__ */ #define cpu_exc_frame_attrs __packed #endif /* __LP64__ */ struct cpu_exc_frame { unsigned long words[CPU_EXC_FRAME_SIZE]; } cpu_exc_frame_attrs; /* * Type for low level exception handlers. * * Low level exception handlers are directly installed in the IDT and are * first run by the processor when an exception occurs. They route execution * through either the main exception or interrupt handler. */ typedef void (*cpu_ll_exc_fn_t)(void); typedef void (*cpu_exc_handler_fn_t)(const struct cpu_exc_frame *frame); struct cpu_exc_handler { cpu_exc_handler_fn_t fn; }; struct cpu_intr_handler { cpu_intr_handler_fn_t fn; }; /* * Set the given GDT for the current processor. * * On i386, the ds, es and ss segment registers are reloaded. * * The fs and gs segment registers, which point to the percpu and the TLS * areas respectively, must be set separately. */ void cpu_load_gdt(struct cpu_pseudo_desc *gdtr); /* * Return a pointer to the processor-local interrupt stack. * * This function is called by the low level exception handling code. */ void * cpu_get_intr_stack_ptr(void); /* * Common entry points for exceptions and interrupts. */ void cpu_exc_main(const struct cpu_exc_frame *frame); void cpu_intr_main(const struct cpu_exc_frame *frame); void *cpu_local_area __percpu; /* * CPU descriptor, one per CPU. */ struct cpu cpu_desc __percpu; /* * Number of active processors. */ unsigned int cpu_nr_active __read_mostly = 1; /* * Processor frequency, assumed fixed and equal on all processors. */ static uint64_t cpu_freq __read_mostly; static const struct cpu_tls_seg cpu_tls_seg = { .ssp_guard_word = SSP_GUARD_WORD, }; static struct cpu_idt cpu_idt; /* * This table only exists during initialization, and is a way to * communicate the list of low level handlers from assembly to C. */ extern cpu_ll_exc_fn_t cpu_ll_exc_handler_addrs[CPU_NR_EXC_VECTORS]; static struct cpu_exc_handler cpu_exc_handlers[CPU_NR_EXC_VECTORS] __read_mostly; static struct cpu_intr_handler cpu_intr_handlers[CPU_NR_EXC_VECTORS] __read_mostly; static const struct cpu_vendor cpu_vendors[] = { { CPU_VENDOR_INTEL, "GenuineIntel" }, { CPU_VENDOR_AMD, "AuthenticAMD" }, }; static const char *cpu_feature_names[] = { [CPU_FEATURE_FPU] = "fpu", [CPU_FEATURE_PSE] = "pse", [CPU_FEATURE_PAE] = "pae", [CPU_FEATURE_MSR] = "msr", [CPU_FEATURE_CX8] = "cx8", [CPU_FEATURE_APIC] = "apic", [CPU_FEATURE_PGE] = "pge", [CPU_FEATURE_1GP] = "1gp", [CPU_FEATURE_LM] = "lm", }; static void __init cpu_exc_handler_init(struct cpu_exc_handler *handler, cpu_exc_handler_fn_t fn) { handler->fn = fn; } static void cpu_exc_handler_run(const struct cpu_exc_handler *handler, const struct cpu_exc_frame *frame) { handler->fn(frame); } static void __init cpu_intr_handler_init(struct cpu_intr_handler *handler, cpu_intr_handler_fn_t fn) { handler->fn = fn; } static void cpu_intr_handler_run(const struct cpu_intr_handler *handler, unsigned int vector) { handler->fn(vector); } static cpu_ll_exc_fn_t __init cpu_get_ll_exc_handler(unsigned int vector) { assert(vector < ARRAY_SIZE(cpu_ll_exc_handler_addrs)); return cpu_ll_exc_handler_addrs[vector]; } static struct cpu_exc_handler * cpu_get_exc_handler(unsigned int vector) { assert(vector < ARRAY_SIZE(cpu_exc_handlers)); return &cpu_exc_handlers[vector]; } static void __init cpu_register_exc(unsigned int vector, cpu_exc_handler_fn_t fn) { cpu_exc_handler_init(cpu_get_exc_handler(vector), fn); } static struct cpu_intr_handler * cpu_get_intr_handler(unsigned int vector) { assert(vector < ARRAY_SIZE(cpu_intr_handlers)); return &cpu_intr_handlers[vector]; } void __init cpu_register_intr(unsigned int vector, cpu_intr_handler_fn_t fn) { cpu_intr_handler_init(cpu_get_intr_handler(vector), fn); } static void __init cpu_gate_desc_init_intr(struct cpu_gate_desc *desc, cpu_ll_exc_fn_t fn, unsigned int ist_index) { uintptr_t addr; addr = (uintptr_t)fn; desc->word1 = (CPU_GDT_SEL_CODE << 16) | (addr & CPU_DESC_GATE_OFFSET_LOW_MASK); desc->word2 = (addr & CPU_DESC_GATE_OFFSET_HIGH_MASK) | CPU_DESC_PRESENT | CPU_DESC_TYPE_GATE_INTR; #ifdef __LP64__ desc->word2 |= ist_index & CPU_DESC_SEG_IST_MASK; desc->word3 = addr >> 32; desc->word4 = 0; #else /* __LP64__ */ (void)ist_index; #endif /* __LP64__ */ } #ifndef __LP64__ static void __init cpu_gate_desc_init_task(struct cpu_gate_desc *desc, unsigned int tss_seg_sel) { desc->word2 = CPU_DESC_PRESENT | CPU_DESC_TYPE_GATE_TASK; desc->word1 = tss_seg_sel << 16; } #endif /* __LP64__ */ static struct cpu_gate_desc * __init cpu_idt_get_desc(struct cpu_idt *idt, unsigned int vector) { assert(vector < ARRAY_SIZE(idt->descs)); return &idt->descs[vector]; } static void __init cpu_idt_set_intr_gate(struct cpu_idt *idt, unsigned int vector, cpu_ll_exc_fn_t fn) { struct cpu_gate_desc *desc; desc = cpu_idt_get_desc(idt, vector); cpu_gate_desc_init_intr(desc, fn, CPU_TSS_IST_INTR); } static void __init cpu_idt_setup_double_fault(struct cpu_idt *idt) { struct cpu_gate_desc *desc; desc = cpu_idt_get_desc(idt, CPU_EXC_DF); #ifdef __LP64__ cpu_ll_exc_fn_t fn; fn = cpu_get_ll_exc_handler(CPU_EXC_DF); cpu_gate_desc_init_intr(desc, fn, CPU_TSS_IST_DF); #else /* __LP64__ */ cpu_gate_desc_init_task(desc, CPU_GDT_SEL_DF_TSS); #endif /* __LP64__ */ } static void cpu_idt_load(const struct cpu_idt *idt) { struct cpu_pseudo_desc idtr; idtr.address = (uintptr_t)idt->descs; idtr.limit = sizeof(idt->descs) - 1; asm volatile("lidt %0" : : "m" (idtr)); } uint64_t cpu_get_freq(void) { return cpu_freq; } static uint64_t cpu_get_tsc(void) { uint32_t high, low; asm volatile("rdtsc" : "=a" (low), "=d" (high)); return ((uint64_t)high << 32) | low; } void cpu_delay(unsigned long usecs) { int64_t total, prev, count, diff; assert(usecs != 0); total = DIV_CEIL((int64_t)usecs * cpu_freq, 1000000); prev = cpu_get_tsc(); do { count = cpu_get_tsc(); diff = count - prev; prev = count; total -= diff; cpu_pause(); } while (total > 0); } static void cpu_show_thread(void) { struct thread *thread; thread = thread_self(); printf("cpu: interrupted thread: %p (%s)\n", thread, thread_name(thread)); } #ifdef __LP64__ static void cpu_show_frame(const struct cpu_exc_frame *frame) { printf("cpu: rax: %016lx rbx: %016lx rcx: %016lx\n" "cpu: rdx: %016lx rbp: %016lx rsi: %016lx\n" "cpu: rdi: %016lx r8: %016lx r9: %016lx\n" "cpu: r10: %016lx r11: %016lx r12: %016lx\n" "cpu: r13: %016lx r14: %016lx r15: %016lx\n" "cpu: vector: %lu error: %08lx\n" "cpu: rip: %016lx cs: %lu rflags: %016lx\n" "cpu: rsp: %016lx ss: %lu\n", frame->words[CPU_EXC_FRAME_RAX], frame->words[CPU_EXC_FRAME_RBX], frame->words[CPU_EXC_FRAME_RCX], frame->words[CPU_EXC_FRAME_RDX], frame->words[CPU_EXC_FRAME_RBP], frame->words[CPU_EXC_FRAME_RSI], frame->words[CPU_EXC_FRAME_RDI], frame->words[CPU_EXC_FRAME_R8], frame->words[CPU_EXC_FRAME_R9], frame->words[CPU_EXC_FRAME_R10], frame->words[CPU_EXC_FRAME_R11], frame->words[CPU_EXC_FRAME_R12], frame->words[CPU_EXC_FRAME_R13], frame->words[CPU_EXC_FRAME_R14], frame->words[CPU_EXC_FRAME_R15], frame->words[CPU_EXC_FRAME_VECTOR], frame->words[CPU_EXC_FRAME_ERROR], frame->words[CPU_EXC_FRAME_RIP], frame->words[CPU_EXC_FRAME_CS], frame->words[CPU_EXC_FRAME_RFLAGS], frame->words[CPU_EXC_FRAME_RSP], frame->words[CPU_EXC_FRAME_SS]); /* XXX Until the page fault handler is written */ if (frame->words[CPU_EXC_FRAME_VECTOR] == 14) { printf("cpu: cr2: %016lx\n", cpu_get_cr2()); } } #else /* __LP64__ */ static void cpu_show_frame(const struct cpu_exc_frame *frame) { unsigned long esp, ss; if ((frame->words[CPU_EXC_FRAME_CS] & CPU_PL_USER) || (frame->words[CPU_EXC_FRAME_VECTOR] == CPU_EXC_DF)) { esp = frame->words[CPU_EXC_FRAME_ESP]; ss = frame->words[CPU_EXC_FRAME_SS]; } else { esp = 0; ss = 0; } printf("cpu: eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n" "cpu: ebp: %08lx esi: %08lx edi: %08lx\n" "cpu: ds: %lu es: %lu fs: %lu gs: %lu\n" "cpu: vector: %lu error: %08lx\n" "cpu: eip: %08lx cs: %lu eflags: %08lx\n" "cpu: esp: %08lx ss: %lu\n", frame->words[CPU_EXC_FRAME_EAX], frame->words[CPU_EXC_FRAME_EBX], frame->words[CPU_EXC_FRAME_ECX], frame->words[CPU_EXC_FRAME_EDX], frame->words[CPU_EXC_FRAME_EBP], frame->words[CPU_EXC_FRAME_ESI], frame->words[CPU_EXC_FRAME_EDI], frame->words[CPU_EXC_FRAME_DS], frame->words[CPU_EXC_FRAME_ES], frame->words[CPU_EXC_FRAME_FS], frame->words[CPU_EXC_FRAME_GS], frame->words[CPU_EXC_FRAME_VECTOR], frame->words[CPU_EXC_FRAME_ERROR], frame->words[CPU_EXC_FRAME_EIP], frame->words[CPU_EXC_FRAME_CS], frame->words[CPU_EXC_FRAME_EFLAGS], esp, ss); /* XXX Until the page fault handler is written */ if (frame->words[CPU_EXC_FRAME_VECTOR] == 14) { printf("cpu: cr2: %08lx\n", cpu_get_cr2()); } } #endif /* __LP64__ */ static void cpu_show_stack(const struct cpu_exc_frame *frame) { strace_show(frame->words[CPU_EXC_FRAME_PC], frame->words[CPU_EXC_FRAME_FP]); } static void cpu_exc_double_fault(const struct cpu_exc_frame *frame) { cpu_halt_broadcast(); #ifndef __LP64__ struct cpu_exc_frame frame_store; struct cpu *cpu; /* * Double faults are catched through a task gate, which makes the given * frame useless. The interrupted state is automatically saved in the * main TSS by the processor. Build a proper exception frame from there. */ cpu = cpu_current(); frame_store.words[CPU_EXC_FRAME_EAX] = cpu->tss.eax; frame_store.words[CPU_EXC_FRAME_EBX] = cpu->tss.ebx; frame_store.words[CPU_EXC_FRAME_ECX] = cpu->tss.ecx; frame_store.words[CPU_EXC_FRAME_EDX] = cpu->tss.edx; frame_store.words[CPU_EXC_FRAME_EBP] = cpu->tss.ebp; frame_store.words[CPU_EXC_FRAME_ESI] = cpu->tss.esi; frame_store.words[CPU_EXC_FRAME_EDI] = cpu->tss.edi; frame_store.words[CPU_EXC_FRAME_DS] = cpu->tss.ds; frame_store.words[CPU_EXC_FRAME_ES] = cpu->tss.es; frame_store.words[CPU_EXC_FRAME_FS] = cpu->tss.fs; frame_store.words[CPU_EXC_FRAME_GS] = cpu->tss.gs; frame_store.words[CPU_EXC_FRAME_VECTOR] = CPU_EXC_DF; frame_store.words[CPU_EXC_FRAME_ERROR] = 0; frame_store.words[CPU_EXC_FRAME_EIP] = cpu->tss.eip; frame_store.words[CPU_EXC_FRAME_CS] = cpu->tss.cs; frame_store.words[CPU_EXC_FRAME_EFLAGS] = cpu->tss.eflags; frame_store.words[CPU_EXC_FRAME_ESP] = cpu->tss.esp; frame_store.words[CPU_EXC_FRAME_SS] = cpu->tss.ss; frame = &frame_store; #endif /* __LP64__ */ printf("cpu: double fault (cpu%u):\n", cpu_id()); cpu_show_thread(); cpu_show_frame(frame); cpu_show_stack(frame); cpu_halt(); } void cpu_exc_main(const struct cpu_exc_frame *frame) { const struct cpu_exc_handler *handler; unsigned int vector; vector = (unsigned int)frame->words[CPU_EXC_FRAME_VECTOR]; handler = cpu_get_exc_handler(vector); cpu_exc_handler_run(handler, frame); assert(!cpu_intr_enabled()); } void cpu_intr_main(const struct cpu_exc_frame *frame) { const struct cpu_intr_handler *handler; unsigned int vector; vector = (unsigned int)frame->words[CPU_EXC_FRAME_VECTOR]; handler = cpu_get_intr_handler(vector); thread_intr_enter(); cpu_intr_handler_run(handler, vector); thread_intr_leave(); assert(!cpu_intr_enabled()); } static void cpu_exc_default(const struct cpu_exc_frame *frame) { cpu_halt_broadcast(); printf("cpu: unregistered exception (cpu%u):\n", cpu_id()); cpu_show_thread(); cpu_show_frame(frame); cpu_show_stack(frame); cpu_halt(); } static void cpu_intr_default(unsigned int vector) { cpu_halt_broadcast(); printf("cpu: unregistered interrupt %u (cpu%u):\n", vector, cpu_id()); cpu_show_thread(); cpu_halt(); } static void cpu_xcall_intr(unsigned int vector) { (void)vector; lapic_eoi(); xcall_intr(); } static void cpu_thread_schedule_intr(unsigned int vector) { (void)vector; lapic_eoi(); thread_schedule_intr(); } static void cpu_halt_intr(unsigned int vector) { (void)vector; lapic_eoi(); cpu_halt(); } static void __init cpu_setup_idt(void) { for (size_t i = 0; i < ARRAY_SIZE(cpu_ll_exc_handler_addrs); i++) { cpu_idt_set_intr_gate(&cpu_idt, i, cpu_get_ll_exc_handler(i)); } cpu_idt_setup_double_fault(&cpu_idt); } static void __init cpu_setup_intr(void) { cpu_setup_idt(); for (size_t i = 0; i < ARRAY_SIZE(cpu_exc_handlers); i++) { cpu_register_exc(i, cpu_exc_default); } /* Architecture defined exceptions */ cpu_register_exc(CPU_EXC_DE, cpu_exc_default); cpu_register_exc(CPU_EXC_DB, cpu_exc_default); cpu_register_intr(CPU_EXC_NMI, cpu_intr_default); cpu_register_exc(CPU_EXC_BP, cpu_exc_default); cpu_register_exc(CPU_EXC_OF, cpu_exc_default); cpu_register_exc(CPU_EXC_BR, cpu_exc_default); cpu_register_exc(CPU_EXC_UD, cpu_exc_default); cpu_register_exc(CPU_EXC_NM, cpu_exc_default); cpu_register_exc(CPU_EXC_DF, cpu_exc_double_fault); cpu_register_exc(CPU_EXC_TS, cpu_exc_default); cpu_register_exc(CPU_EXC_NP, cpu_exc_default); cpu_register_exc(CPU_EXC_SS, cpu_exc_default); cpu_register_exc(CPU_EXC_GP, cpu_exc_default); cpu_register_exc(CPU_EXC_PF, cpu_exc_default); cpu_register_exc(CPU_EXC_MF, cpu_exc_default); cpu_register_exc(CPU_EXC_AC, cpu_exc_default); cpu_register_intr(CPU_EXC_MC, cpu_intr_default); cpu_register_exc(CPU_EXC_XM, cpu_exc_default); /* System defined exceptions */ cpu_register_intr(CPU_EXC_XCALL, cpu_xcall_intr); cpu_register_intr(CPU_EXC_THREAD_SCHEDULE, cpu_thread_schedule_intr); cpu_register_intr(CPU_EXC_HALT, cpu_halt_intr); } static void __init cpu_seg_desc_init_null(struct cpu_seg_desc *desc) { desc->high = 0; desc->low = 0; } static void __init cpu_seg_desc_init_code(struct cpu_seg_desc *desc) { #ifdef __LP64__ desc->high = CPU_DESC_LONG | CPU_DESC_PRESENT | CPU_DESC_S | CPU_DESC_TYPE_CODE; desc->low = 0; #else /* __LP64__ */ desc->high = CPU_DESC_GRAN_4KB | CPU_DESC_DB | (0x000fffff & CPU_DESC_SEG_LIMIT_HIGH_MASK) | CPU_DESC_PRESENT | CPU_DESC_S | CPU_DESC_TYPE_CODE; desc->low = 0x000fffff & CPU_DESC_SEG_LIMIT_LOW_MASK; #endif /* __LP64__ */ } static void __init cpu_seg_desc_init_data(struct cpu_seg_desc *desc, uintptr_t base) { #ifdef __LP64__ (void)base; desc->high = CPU_DESC_DB | CPU_DESC_PRESENT | CPU_DESC_S | CPU_DESC_TYPE_DATA; desc->low = 0; #else /* __LP64__ */ desc->high = (base & CPU_DESC_SEG_BASE_HIGH_MASK) | CPU_DESC_GRAN_4KB | CPU_DESC_DB | (0x000fffff & CPU_DESC_SEG_LIMIT_HIGH_MASK) | CPU_DESC_PRESENT | CPU_DESC_S | CPU_DESC_TYPE_DATA | ((base & CPU_DESC_SEG_BASE_MID_MASK) >> 16); desc->low = ((base & CPU_DESC_SEG_BASE_LOW_MASK) << 16) | (0x000fffff & CPU_DESC_SEG_LIMIT_LOW_MASK); #endif /* __LP64__ */ } static void __init cpu_sysseg_desc_init_tss(struct cpu_sysseg_desc *desc, const struct cpu_tss *tss) { uintptr_t base, limit; base = (uintptr_t)tss; limit = base + sizeof(*tss) - 1; #ifdef __LP64__ desc->word4 = 0; desc->word3 = (base >> 32); #endif /* __LP64__ */ desc->word2 = (base & CPU_DESC_SEG_BASE_HIGH_MASK) | (limit & CPU_DESC_SEG_LIMIT_HIGH_MASK) | CPU_DESC_PRESENT | CPU_DESC_TYPE_TSS | ((base & CPU_DESC_SEG_BASE_MID_MASK) >> 16); desc->word1 = ((base & CPU_DESC_SEG_BASE_LOW_MASK) << 16) | (limit & CPU_DESC_SEG_LIMIT_LOW_MASK); } static void * __init cpu_gdt_get_desc(struct cpu_gdt *gdt, unsigned int selector) { assert((selector % sizeof(struct cpu_seg_desc)) == 0); assert(selector < sizeof(gdt->descs)); return gdt->descs + selector; } static void __init cpu_gdt_set_null(struct cpu_gdt *gdt, unsigned int selector) { struct cpu_seg_desc *desc; desc = cpu_gdt_get_desc(gdt, selector); cpu_seg_desc_init_null(desc); } static void __init cpu_gdt_set_code(struct cpu_gdt *gdt, unsigned int selector) { struct cpu_seg_desc *desc; desc = cpu_gdt_get_desc(gdt, selector); cpu_seg_desc_init_code(desc); } static void __init cpu_gdt_set_data(struct cpu_gdt *gdt, unsigned int selector, const void *base) { struct cpu_seg_desc *desc; desc = cpu_gdt_get_desc(gdt, selector); cpu_seg_desc_init_data(desc, (uintptr_t)base); } static void __init cpu_gdt_set_tss(struct cpu_gdt *gdt, unsigned int selector, const struct cpu_tss *tss) { struct cpu_sysseg_desc *desc; desc = cpu_gdt_get_desc(gdt, selector); cpu_sysseg_desc_init_tss(desc, tss); } static void __init cpu_gdt_init(struct cpu_gdt *gdt, const struct cpu_tss *tss, const struct cpu_tss *df_tss, void *pcpu_area) { cpu_gdt_set_null(gdt, CPU_GDT_SEL_NULL); cpu_gdt_set_code(gdt, CPU_GDT_SEL_CODE); cpu_gdt_set_data(gdt, CPU_GDT_SEL_DATA, 0); cpu_gdt_set_tss(gdt, CPU_GDT_SEL_TSS, tss); #ifdef __LP64__ (void)df_tss; (void)pcpu_area; #else /* __LP64__ */ cpu_gdt_set_tss(gdt, CPU_GDT_SEL_DF_TSS, df_tss); cpu_gdt_set_data(gdt, CPU_GDT_SEL_PERCPU, pcpu_area); cpu_gdt_set_data(gdt, CPU_GDT_SEL_TLS, &cpu_tls_seg); #endif /* __LP64__ */ } static void __init cpu_gdt_load(const struct cpu_gdt *gdt) { struct cpu_pseudo_desc gdtr; gdtr.address = (uintptr_t)gdt->descs; gdtr.limit = sizeof(gdt->descs) - 1; cpu_load_gdt(&gdtr); } static void __init cpu_tss_init(struct cpu_tss *tss, const void *intr_stack_top, const void *df_stack_top) { memset(tss, 0, sizeof(*tss)); #ifdef __LP64__ tss->ist[CPU_TSS_IST_INTR] = (uintptr_t)intr_stack_top; tss->ist[CPU_TSS_IST_DF] = (uintptr_t)df_stack_top; #else /* __LP64__ */ (void)intr_stack_top; (void)df_stack_top; #endif /* __LP64__ */ } #ifndef __LP64__ static void __init cpu_tss_init_i386_double_fault(struct cpu_tss *tss, const void *df_stack_top) { memset(tss, 0, sizeof(*tss)); tss->cr3 = cpu_get_cr3(); tss->eip = (uintptr_t)cpu_get_ll_exc_handler(CPU_EXC_DF); tss->eflags = CPU_EFL_ONE; tss->ebp = (uintptr_t)df_stack_top; tss->esp = tss->ebp; tss->es = CPU_GDT_SEL_DATA; tss->cs = CPU_GDT_SEL_CODE; tss->ss = CPU_GDT_SEL_DATA; tss->ds = CPU_GDT_SEL_DATA; tss->fs = CPU_GDT_SEL_PERCPU; } #endif /* __LP64__ */ static void __init cpu_feature_map_init(struct cpu_feature_map *map) { bitmap_zero(map->flags, CPU_NR_FEATURES); } static void __init cpu_feature_map_cset(struct cpu_feature_map *map, unsigned int word, unsigned int mask, enum cpu_feature feature) { if (word & mask) { bitmap_set(map->flags, feature); } } static void __init cpu_feature_map_basic1_edx(struct cpu_feature_map *map, unsigned int edx) { cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_FPU, CPU_FEATURE_FPU); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_PSE, CPU_FEATURE_PSE); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_PAE, CPU_FEATURE_PAE); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_MSR, CPU_FEATURE_MSR); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_CX8, CPU_FEATURE_CX8); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_APIC, CPU_FEATURE_APIC); cpu_feature_map_cset(map, edx, CPU_CPUID_BASIC1_EDX_PGE, CPU_FEATURE_PGE); } static void __init cpu_feature_map_ext1_edx(struct cpu_feature_map *map, unsigned int edx) { cpu_feature_map_cset(map, edx, CPU_CPUID_EXT1_EDX_1GP, CPU_FEATURE_1GP); cpu_feature_map_cset(map, edx, CPU_CPUID_EXT1_EDX_LM, CPU_FEATURE_LM); } static struct cpu_tss * __init cpu_get_tss(struct cpu *cpu) { return &cpu->tss; } static void * __init cpu_get_intr_stack_top(struct cpu *cpu) { return &cpu->intr_stack[sizeof(cpu->intr_stack)]; } static struct cpu_tss * __init cpu_get_df_tss(struct cpu *cpu) { #ifdef __LP64__ (void)cpu; return NULL; #else /* __LP64__ */ return &cpu->df_tss; #endif /* __LP64__ */ } static void * __init cpu_get_df_stack_top(struct cpu *cpu) { return &cpu->df_stack[sizeof(cpu->df_stack)]; } static void __init cpu_init(struct cpu *cpu, unsigned int id, unsigned int apic_id) { memset(cpu, 0, sizeof(*cpu)); cpu->id = id; cpu->apic_id = apic_id; } static void __init cpu_load_ldt(void) { asm volatile("lldt %w0" : : "q" (CPU_GDT_SEL_NULL)); } static void __init cpu_load_tss(void) { asm volatile("ltr %w0" : : "q" (CPU_GDT_SEL_TSS)); } static void __init cpu_set_percpu_area(const struct cpu *cpu, void *area) { #ifdef __LP64__ unsigned long va; va = (unsigned long)area; cpu_set_msr(CPU_MSR_FSBASE, (uint32_t)(va >> 32), (uint32_t)va); #else /* __LP64__ */ asm volatile("mov %0, %%fs" : : "r" (CPU_GDT_SEL_PERCPU)); #endif /* __LP64__ */ percpu_var(cpu_local_area, cpu->id) = area; } static void __init cpu_set_tls_area(void) { #ifdef __LP64__ uintptr_t va; va = (uintptr_t)&cpu_tls_seg; cpu_set_msr(CPU_MSR_GSBASE, (uint32_t)(va >> 32), (uint32_t)va); #else /* __LP64__ */ asm volatile("mov %0, %%gs" : : "r" (CPU_GDT_SEL_TLS)); #endif /* __LP64__ */ } static const struct cpu_vendor * __init cpu_vendor_lookup(const char *str) { for (size_t i = 0; i < ARRAY_SIZE(cpu_vendors); i++) { if (strcmp(str, cpu_vendors[i].str) == 0) { return &cpu_vendors[i]; } } return NULL; } static void __init cpu_init_vendor_id(struct cpu *cpu) { const struct cpu_vendor *vendor; vendor = cpu_vendor_lookup(cpu->vendor_str); if (vendor == NULL) { return; } cpu->vendor_id = vendor->id; } static void __init cpu_build(struct cpu *cpu) { unsigned int eax, ebx, ecx, edx, max_basic, max_extended; void *pcpu_area; pcpu_area = percpu_area(cpu->id); /* * Assume at least an i586 processor. */ cpu_intr_restore(CPU_EFL_ONE); cpu_set_cr0(CPU_CR0_PG | CPU_CR0_AM | CPU_CR0_WP | CPU_CR0_NE | CPU_CR0_ET | CPU_CR0_TS | CPU_CR0_MP | CPU_CR0_PE); cpu_gdt_init(&cpu->gdt, cpu_get_tss(cpu), cpu_get_df_tss(cpu), pcpu_area); cpu_gdt_load(&cpu->gdt); cpu_load_ldt(); cpu_tss_init(&cpu->tss, cpu_get_intr_stack_top(cpu), cpu_get_df_stack_top(cpu)); #ifndef __LP64__ cpu_tss_init_i386_double_fault(&cpu->df_tss, cpu_get_df_stack_top(cpu)); #endif /* __LP64__ */ cpu_load_tss(); cpu_idt_load(&cpu_idt); cpu_set_percpu_area(cpu, pcpu_area); cpu_set_tls_area(); /* * Perform the check after initializing the GDT and the per-CPU area * since cpu_id() relies on them to correctly work. */ assert(cpu->id == cpu_id()); eax = 0; cpu_cpuid(&eax, &ebx, &ecx, &edx); max_basic = eax; cpu->cpuid_max_basic = max_basic; memcpy(cpu->vendor_str, &ebx, sizeof(ebx)); memcpy(cpu->vendor_str + 4, &edx, sizeof(edx)); memcpy(cpu->vendor_str + 8, &ecx, sizeof(ecx)); cpu->vendor_str[sizeof(cpu->vendor_str) - 1] = '\0'; cpu_init_vendor_id(cpu); /* Some fields are only initialized if supported by the processor */ cpu->model_name[0] = '\0'; cpu->phys_addr_width = 0; cpu->virt_addr_width = 0; assert(max_basic >= 1); eax = 1; cpu_cpuid(&eax, &ebx, &ecx, &edx); cpu->type = (eax & CPU_CPUID_TYPE_MASK) >> CPU_CPUID_TYPE_SHIFT; cpu->family = (eax & CPU_CPUID_FAMILY_MASK) >> CPU_CPUID_FAMILY_SHIFT; if (cpu->family == 0xf) { cpu->family += (eax & CPU_CPUID_EXTFAMILY_MASK) >> CPU_CPUID_EXTFAMILY_SHIFT; } cpu->model = (eax & CPU_CPUID_MODEL_MASK) >> CPU_CPUID_MODEL_SHIFT; if ((cpu->model == 6) || (cpu->model == 0xf)) { cpu->model += (eax & CPU_CPUID_EXTMODEL_MASK) >> CPU_CPUID_EXTMODEL_SHIFT; } cpu->stepping = (eax & CPU_CPUID_STEPPING_MASK) >> CPU_CPUID_STEPPING_SHIFT; cpu->clflush_size = ((ebx & CPU_CPUID_CLFLUSH_MASK) >> CPU_CPUID_CLFLUSH_SHIFT) * 8; cpu->initial_apic_id = (ebx & CPU_CPUID_APIC_ID_MASK) >> CPU_CPUID_APIC_ID_SHIFT; cpu_feature_map_init(&cpu->feature_map); cpu_feature_map_basic1_edx(&cpu->feature_map, edx); eax = CPU_CPUID_EXT_BIT; cpu_cpuid(&eax, &ebx, &ecx, &edx); if (eax <= CPU_CPUID_EXT_BIT) { max_extended = 0; } else { max_extended = eax; } cpu->cpuid_max_extended = max_extended; if (max_extended >= (CPU_CPUID_EXT_BIT | 1)) { eax = CPU_CPUID_EXT_BIT | 1; cpu_cpuid(&eax, &ebx, &ecx, &edx); cpu_feature_map_ext1_edx(&cpu->feature_map, edx); } if (max_extended >= (CPU_CPUID_EXT_BIT | 4)) { eax = CPU_CPUID_EXT_BIT | 2; cpu_cpuid(&eax, &ebx, &ecx, &edx); memcpy(cpu->model_name, &eax, sizeof(eax)); memcpy(cpu->model_name + 4, &ebx, sizeof(ebx)); memcpy(cpu->model_name + 8, &ecx, sizeof(ecx)); memcpy(cpu->model_name + 12, &edx, sizeof(edx)); eax = CPU_CPUID_EXT_BIT | 3; cpu_cpuid(&eax, &ebx, &ecx, &edx); memcpy(cpu->model_name + 16, &eax, sizeof(eax)); memcpy(cpu->model_name + 20, &ebx, sizeof(ebx)); memcpy(cpu->model_name + 24, &ecx, sizeof(ecx)); memcpy(cpu->model_name + 28, &edx, sizeof(edx)); eax = CPU_CPUID_EXT_BIT | 4; cpu_cpuid(&eax, &ebx, &ecx, &edx); memcpy(cpu->model_name + 32, &eax, sizeof(eax)); memcpy(cpu->model_name + 36, &ebx, sizeof(ebx)); memcpy(cpu->model_name + 40, &ecx, sizeof(ecx)); memcpy(cpu->model_name + 44, &edx, sizeof(edx)); cpu->model_name[sizeof(cpu->model_name) - 1] = '\0'; } if (max_extended >= (CPU_CPUID_EXT_BIT | 8)) { eax = CPU_CPUID_EXT_BIT | 8; cpu_cpuid(&eax, &ebx, &ecx, &edx); cpu->phys_addr_width = (unsigned short)eax & 0xff; cpu->virt_addr_width = ((unsigned short)eax >> 8) & 0xff; } atomic_store(&cpu->started, 1, ATOMIC_RELEASE); } static void __init cpu_measure_freq(void) { uint64_t start, end; pit_setup_free_running(); start = cpu_get_tsc(); pit_delay(CPU_FREQ_CAL_DELAY); end = cpu_get_tsc(); cpu_freq = (end - start) / (1000000 / CPU_FREQ_CAL_DELAY); } static int __init cpu_setup(void) { struct cpu *cpu; cpu_setup_intr(); cpu = percpu_ptr(cpu_desc, 0); cpu_init(cpu, 0, CPU_INVALID_APIC_ID); cpu_build(cpu); cpu_measure_freq(); return 0; } INIT_OP_DEFINE(cpu_setup, INIT_OP_DEP(percpu_bootstrap, true)); static void __init cpu_panic_on_missing_feature(const char *feature) { panic("cpu: %s feature missing", feature); } static void __init cpu_check(const struct cpu *cpu) { if (!cpu_has_feature(cpu, CPU_FEATURE_FPU)) { cpu_panic_on_missing_feature("fpu"); } /* * The compiler is expected to produce cmpxchg8b instructions to * perform 64-bits atomic operations on a 32-bits processor. Clang * currently has trouble doing that so 64-bits atomic support is * just disabled when building with it. */ #if !defined(__LP64__) && !defined(__clang__) if (!cpu_has_feature(cpu, CPU_FEATURE_CX8)) { cpu_panic_on_missing_feature("cx8"); } #endif } static int __init cpu_check_bsp(void) { cpu_check(cpu_current()); return 0; } INIT_OP_DEFINE(cpu_check_bsp, INIT_OP_DEP(cpu_setup, true)); void * cpu_get_intr_stack_ptr(void) { struct cpu *cpu; assert(!thread_interrupted()); cpu = cpu_local_ptr(cpu_desc); return cpu_get_intr_stack_top(cpu); } void __init cpu_log_info(const struct cpu *cpu) { char features[60], *ptr; size_t size, bytes; log_info("cpu%u: %s, type %u, family %u, model %u, stepping %u", cpu->id, cpu->vendor_str, cpu->type, cpu->family, cpu->model, cpu->stepping); if (strlen(cpu->model_name) > 0) { log_info("cpu%u: %s", cpu->id, cpu->model_name); } if ((cpu->phys_addr_width != 0) && (cpu->virt_addr_width != 0)) { log_info("cpu%u: address widths: physical: %hu, virtual: %hu", cpu->id, cpu->phys_addr_width, cpu->virt_addr_width); } log_info("cpu%u: frequency: %llu.%02llu MHz", cpu->id, (unsigned long long)cpu_freq / 1000000, (unsigned long long)cpu_freq % 1000000); ptr = features; size = sizeof(features); for (size_t i = 0; i < ARRAY_SIZE(cpu_feature_names); i++) { if (!cpu_has_feature(cpu, i)) { continue; } assert(strlen(cpu_feature_names[i]) < sizeof(features)); bytes = snprintf(ptr, size, " %s", cpu_feature_names[i]); if (bytes >= size) { *ptr = '\0'; log_info("cpu%u:%s", cpu->id, features); ptr = features; size = sizeof(features); i--; continue; } ptr += bytes; size -= bytes; } log_info("cpu%u:%s", cpu->id, features); } void __init cpu_mp_register_lapic(unsigned int apic_id, bool is_bsp) { struct cpu *cpu; int error; if (is_bsp) { cpu = percpu_ptr(cpu_desc, 0); if (cpu->apic_id != CPU_INVALID_APIC_ID) { panic("cpu: another processor pretends to be the BSP"); } cpu->apic_id = apic_id; return; } error = percpu_add(cpu_nr_active); if (error) { return; } cpu = percpu_ptr(cpu_desc, cpu_nr_active); cpu_init(cpu, cpu_nr_active, apic_id); cpu_nr_active++; } static void cpu_trigger_double_fault(void) { asm volatile("movl $0xdead, %esp; push $0"); } static void cpu_shutdown_reset(void) { /* Generate a triple fault */ cpu_idt_load(NULL); cpu_trigger_double_fault(); } static struct shutdown_ops cpu_shutdown_ops = { .reset = cpu_shutdown_reset, }; static int __init cpu_mp_probe(void) { log_info("cpu: %u processor(s) configured", cpu_count()); return 0; } INIT_OP_DEFINE(cpu_mp_probe, INIT_OP_DEP(acpi_setup, true), INIT_OP_DEP(cpu_setup, true), INIT_OP_DEP(log_setup, true)); static int __init cpu_setup_shutdown(void) { if (cpu_count() == 1) { shutdown_register(&cpu_shutdown_ops, CPU_SHUTDOWN_PRIORITY); } return 0; } INIT_OP_DEFINE(cpu_setup_shutdown, INIT_OP_DEP(cpu_mp_probe, true), INIT_OP_DEP(shutdown_bootstrap, true)); void __init cpu_mp_setup(void) { uint16_t reset_vector[2]; unsigned int started; struct cpu *cpu; void *ptr; if (cpu_count() == 1) { pmap_mp_setup(); return; } assert(BOOT_MP_TRAMPOLINE_ADDR < BIOSMEM_BASE); assert(vm_page_aligned(BOOT_MP_TRAMPOLINE_ADDR)); assert(boot_mp_trampoline_size <= PAGE_SIZE); /* Set up the AP trampoline code */ ptr = (void *)vm_page_direct_va(BOOT_MP_TRAMPOLINE_ADDR); memcpy(ptr, boot_mp_trampoline, boot_mp_trampoline_size); /* Set up the warm reset vector */ reset_vector[0] = 0; reset_vector[1] = BOOT_MP_TRAMPOLINE_ADDR >> 4; ptr = (void *)vm_page_direct_va(CPU_MP_CMOS_RESET_VECTOR); memcpy(ptr, reset_vector, sizeof(reset_vector)); io_write_byte(CPU_MP_CMOS_PORT_REG, CPU_MP_CMOS_REG_RESET); io_write_byte(CPU_MP_CMOS_PORT_DATA, CPU_MP_CMOS_DATA_RESET_WARM); boot_alloc_ap_stacks(); /* * This function creates per-CPU copies of the page tables. Just in case, * call it last to make sure all processors get the same mappings. */ pmap_mp_setup(); for (unsigned int i = 1; i < cpu_count(); i++) { cpu = percpu_ptr(cpu_desc, i); boot_set_ap_id(i); /* Perform the "Universal Start-up Algorithm" */ lapic_ipi_init_assert(cpu->apic_id); cpu_delay(200); lapic_ipi_init_deassert(cpu->apic_id); cpu_delay(10000); lapic_ipi_startup(cpu->apic_id, BOOT_MP_TRAMPOLINE_ADDR >> 12); cpu_delay(200); lapic_ipi_startup(cpu->apic_id, BOOT_MP_TRAMPOLINE_ADDR >> 12); cpu_delay(200); for (;;) { started = atomic_load(&cpu->started, ATOMIC_ACQUIRE); if (started) { break; } } } } void __init cpu_ap_setup(unsigned int ap_id) { struct cpu *cpu; cpu = percpu_ptr(cpu_desc, ap_id); cpu_build(cpu); cpu_check(cpu_current()); lapic_ap_setup(); } void cpu_halt_broadcast(void) { unsigned int nr_cpus; assert(!cpu_intr_enabled()); nr_cpus = cpu_count(); if (nr_cpus == 1) { return; } lapic_ipi_broadcast(CPU_EXC_HALT); }