/* idt.c - idt code for x86_64 Copyright (C) 2007 Tom Bachmann This file is part of the GNU Hird. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #if HAVE_CONFIG_H #include #endif #include #include #include "idt.h" #include "gdt.h" #include "pic.h" #include "tss.h" #include "util.h" #include "scheduler.h" #include "server.h" #include "debug.h" /* human readable interrupt names */ const char* interrupt_names[] = { "divide-by-zero", "debug", "non-maskable-interrupt", "breakpoint", "overflow", "bound-range", "invalid-opcode", "device-not-available", "double-fault", "coprocessor-segment-overrun (reserved)", "invalid-tss", "segment-not-present", "stack", "general-protection", "page-fault", "reserved", "x87 floating-point exception-pending", "alignment-check", "machine-check", "simd floating-point", "reserved", "reserved", "reserved", "reserved", "reserved", "reserved", "reserved", "reserved", "reserved", "reserved", "security exception", "reserved", }; static const char* get_int_name (unsigned num) { if (num >= sizeof (interrupt_names) / sizeof (interrupt_names[0])) return ""; return interrupt_names[num]; } /* The x86_64 abi demands a so-called "red zone", a 128b area area below the rsp that must not be disturbed by interrupt or signal handlers. This must be disable passing -mno-red-zone, as the hardware will always overwrite that area on interrupt. (We could use the ist to switch the stack unconditionally on interrupt and switch to a new stack before enabling interrupts, but that seems to be overly complicated.) */ /* interrupt gate descriptor (mostly equivalent to sysdesc_t) */ typedef struct __attribute__((__packed__)) { uintptr_t offset_l : 16; uintptr_t selector : 16; uintptr_t ist : 3; uintptr_t rsv_ing1 : 5; uintptr_t type : 4; uintptr_t mbz : 1; uintptr_t dpl : 2; uintptr_t p : 1; uintptr_t offset_h : 48; uintptr_t rsv_ign2 : 32; } igd_t; /* interrupt_gate_descriptor_t or int_gate_desc_t are unspellable */ igd_t idt[NUM_INTERRUPT_VECTORS]; static void igd_init (igd_t* s, uintptr_t a, unsigned ist, int dpl) { s->offset_l = a & ((1 << 16) - 1); s->selector = SEGSEL_CODE_SYS; s->ist = ist; s->type = X86_64_SYSDESC_TYPE_INT_GATE; // XXX ? s->mbz = 0; s->dpl = dpl; s->p = 1; s->offset_h = a >> 16; } /* On entry, hardware writes some state to the stack. There are two variants: one with an error code and one without. For those cases that don't, we write an error code of 0. This creates a consistent stack layout. We also write the interrupt number to the stack. The cold end of the stack has the following format: Relative to RSP at Relative to RSP handler_common at start of entry register save +48 SS (2) +56 +40 RSP (8) +48 +32 RFLAGS (8) +40 +24 CS (2) +32 +16 RIP (8) +24 +8 Error code (4) +16 +0 Interrupt number (8) +8 saved RDI +0 We then stuff the context in CURRENT_THREAD's register file. Note: this assumes that CURRENT_THREAD was running at the time of interrupt. If the kernel can, e.g., fault, we need to save the state somewhere else as function of the CPL. Also, we need to use a different stack. */ /* handler for unimplemented interrupts and code common to all handler stubs */ static void __attribute__ ((noreturn, used)) dummy (void) { /* Generate the lowlevel stubs for interrupt handlers. */ #define HANDLER_IMPL(n) \ ".global handler" #n "\n\t" \ "handler" #n ":\n\t" \ " pushq $" #n "\n\t" \ " jmp handler_common\n\t" #define HANDLER_IMPL_NOERR(n) \ ".global handler" #n "\n\t" \ "handler" #n ":\n\t" \ " pushq $0\n\t" \ " pushq $" #n "\n\t" \ " jmp handler_common\n\t" #define I HANDLER_IMPL #define IN HANDLER_IMPL_NOERR /* Magic number passed by not-implemented interrupts. */ #define NOTIMP_MAGIC 1202458324 #define S_(x) #x #define S(x) S_(x) asm (".text\n" /* interrupts */ IN(0) IN(1) IN(2) IN(3) IN(4) IN(5) IN(6) IN(7) I(8) IN(9) /* reserved */ I(10) I(11) I(12) I(13) I(14) IN(15) /* reserved */ IN(16) IN(17) IN(18) IN(19) I(30) /* XXX 30? */ /* reserved */ I(20) I(21) I(22) I(23) I(24) I(25) I(26) I(27) I(28) I(29) I(31) /* irqs */ IN(32) IN(33) IN(34) IN(35) IN(36) IN(37) IN(38) IN(39) IN(40) IN(41) IN(42) IN(43) IN(44) IN(45) IN(46) IN(47) /* User interrupts. */ IN(48) ".global handler_notimp\n\t" "handler_notimp:\n\t" " pushq $" STRINGIFY (NOTIMP_MAGIC) "\n\t" " jmp handler_common\n\t" "handler_common:\n\t" /* If we come from user land, we want to save the register file in CURRENT_THREAD. If not, we want to save it on the stack. These are sufficiently different that we don't try to reuse the common code. */ /* Save RDI so we can use it as a scratch register. */ " pushq %%rdi\n\t" /* Check if we came from the kernel or user space by looking at the RPL of the saved CS segment descriptor. */ " movl 32(%%rsp), %%edi\n\t" " bt $0, %%edi\n\t" " jc from_user_land\n\t" /* We came from the kernel. */ /* Get a pointer to the saved interrupt context (which is currently RSP+8). This will be passed to HANDLER as the first C argument, which is RDI. */ " mov %%rsp, %%rdi\n\t" " add $8, %%rdi\n\t" /* Save the remaining caller saved registers. */ " pushq %%r11\n\t" " pushq %%r10\n\t" " pushq %%r9\n\t" " pushq %%r8\n\t" " pushq %%rsi\n\t" " pushq %%rdx\n\t" " pushq %%rcx\n\t" " pushq %%rax\n\t" " pushq %%rbp\n\t" /* Call the high-level interrupt handler. The first argument (%RDI) already points to the interrupt context. The second argument indicates if we came from user-space (0) or kernel (1). Clear RBP so that the frame pointer looks good. Clear the direction flag according to the ABI. */ " mov $1, %%esi\n\t" " xorq %%rbp, %%rbp\n\t" " cld\n\t" " call handler\n\t" " popq %%rbp\n\t" " popq %%rax\n\t" " popq %%rcx\n\t" " popq %%rdx\n\t" " popq %%rsi\n\t" " popq %%r8\n\t" " popq %%r9\n\t" " popq %%r10\n\t" " popq %%r11\n\t" " popq %%rdi\n\t" /* Skip the error and the interrupt number. */ " add $16, %%rsp\n\t" " iretq\n\t" /* We came from user land. Save all registers in CURRENT_THREAD. */ "from_user_land:\n\t" /* Save a copy of RSP in RDI and set RSP to the top of the save area in current thread. */ " movq %%rsp, %%rdi\n\t" /* Set up RSP to save the CPU state to CURRENT_THREAD. */ " movq current_thread, %%rsp\n\t" " addq %[save_area_restore_all_end], %%rsp\n\t" /* Save all registers. */ " pushq $1\n\t" /* Skip GS. */ " sub $8, %%rsp\n\t" " pushq %%r15\n\t" " pushq %%r14\n\t" " pushq %%r13\n\t" " pushq %%r12\n\t" " pushq %%r11\n\t" " pushq %%r10\n\t" " pushq %%r9\n\t" " pushq %%r8\n\t" " pushq %%rsi\n\t" " pushq %%rbp\n\t" " pushq %%rdx\n\t" " pushq %%rcx\n\t" " pushq %%rbx\n\t" " pushq %%rax\n\t" /* The saved RDI. */ " pushq 0(%%rdi)\n\t" /* The saved RFLAGS. */ " pushq 40(%%rdi)\n\t" /* The saved RIP. */ " pushq 24(%%rdi)\n\t" /* The saved RSP. */ " pushq 48(%%rdi)\n\t" /* Restore the stack. */ " mov %%rdi, %%rsp\n\t" /* Throw away the saved RDI. */ " popq %%rbx\n\t" /* Call the high-level interrupt handler. The first argument (%RDI) must point to the interrupt context (which RSP now points to). Clear RBP so that the frame pointer looks good. Clear the direction flag according to the ABI. */ " mov %%rsp, %%rdi\n\t" /* The second argument indicates if we came from user-space (0) or kernel (1). */ " xorl %%esi, %%esi\n\t" " xorq %%rbp, %%rbp\n\t" " cld\n\t" " call handler\n\t" /* Resume the current thread. */ /* Load current thread as the argument. */ " movq current_thread, %%rdi\n\t" /* No return address. */ " pushq $0\n\t" " cld\n\t" " jmp return_to_user_land\n\t" :: [save_area_restore_all_end] "n"(((uintptr_t) &((struct thread *)0)->regs_restore_all)+8), [save_area_rax_offset]"n"(offsetof (struct thread, regs[RAX]))); } void handler_notimp (); #include "page-tables.h" #include "sys.h" #ifndef NDEBUG int in_interrupt_handler; #endif /* We need to disable interrupts before triggering some asserts... */ #ifndef NDEBUG # define ASSERTX(x, fmt, ...) \ do { \ if (!(x)) \ { \ asm ("cli"); \ status (0); \ panic ("Assert " #x " failed: " fmt, ##__VA_ARGS__); \ } \ } while (0) #else # define ASSERTX(x, fmt, ...) do { } while (0) #endif #define ASSERT(x) ASSERTX(x, "") /* The high-level interrupt handler. If FROM_KERNEL is true, this function must return to resume the interrupted kernel context. Otherwise, it is permissible to not return (e.g., by calling thread_resume). */ void handler (struct interrupt_context *t, bool from_kernel) { static uintptr_t interrupts; interrupts ++; #ifndef NDEBUG __sync_fetch_and_add (&in_interrupt_handler, 1); #endif void status (int d) { uintptr_t cr2; asm ("movq %%cr2, %0" : "=r" (cr2)); uintptr_t db6; asm ("movq %%db6, %0" : "=r" (db6)); uintptr_t last_branch_from_ip = x86_64_rdmsr (0x1db); uintptr_t last_branch_to_ip = x86_64_rdmsr (0x1dc); uintptr_t last_exception_from_ip = x86_64_rdmsr (0x1dd); uintptr_t last_exception_to_ip = x86_64_rdmsr (0x1de); printf ("Interrupt %"PRIdPTR": #%ld (%s), from %s!\n" " err: 0x%lx\n" " rip: 0x%lx\n" " cs: 0x%lx\n" " rfl: 0x%lx "RFLAGS_FMT"\n" " rsp: 0x%lx\n" " ss: 0x%lx\n" " cr2 (last fault): %lx\n" " db6: %lx\n" " last_branch_from_ip: %lx\n" " last_branch_to_ip: %lx\n" " last_exception_from_ip: %lx\n" " last_exception_to_ip: %lx\n", interrupts, t->num, get_int_name (t->num), from_kernel ? "kernel" : "user space", t->err, t->rip, t->cs, t->rflags.raw, RFLAGS_PRINTF (t->rflags), t->rsp, t->ss, cr2, db6, last_branch_from_ip, last_branch_to_ip, last_exception_from_ip, last_exception_to_ip); if (current_thread) printf ("Current thread: " OBJECT_NAME_FMT":\n"REGS_FMT"\n", OBJECT_NAME_PRINTF ((struct vg_object *) current_thread), REGS_PRINTF (current_thread)); else printf ("current_thread is NULL.\n"); } if (! ((void *) &stack <= (void *) &t && (void *) &t < (void *) &stack_end)) { status (0); debug (0, "Not on kernel stack (should have: %p <= %p < %p)\n", (void *) &stack, (void *) &t, (void *) &stack_end); halt (); } assertx ((void *) &stack <= (void *) &t && (void *) &t < (void *) &stack_end, "Not on kernel stack (should have: %p <= %p < %p)\n", (status (0), (void *) &stack), (void *) &t, (void *) &stack_end); if (! from_kernel) { ASSERT (current_thread); ASSERT (t->rip == thread_ip (current_thread)); ASSERT (t->rsp == thread_sp (current_thread)); ASSERT (((struct segment_selector) { { .raw = t->ss } } ).rpl == SEG_PL_USER); ASSERT (((struct segment_selector) { { .raw = t->cs } } ).rpl == SEG_PL_USER); ASSERTX (t->rip < KERNEL_OFFSET, "%lx", t->rip); #ifndef NDEBUG /* Assert that interrupts are not enabled. */ struct rflags rflags; rflags.raw = 0; asm ("pushfq\n\t" "popq %0" : "=r" (rflags)); assertx (! rflags.if_, RFLAGS_FMT " (%lx)", RFLAGS_PRINTF (rflags), rflags.raw); #endif } else { ASSERT (((struct segment_selector) { { .raw = t->ss } } ).rpl == SEG_PL_KERNEL); ASSERT (((struct segment_selector) { { .raw = t->cs } } ).rpl == SEG_PL_KERNEL); ASSERT (t->rip >= KERNEL_OFFSET); } /* XXX Currently, we root all interrupts in this handler, just to demangle manually, to mangle them again calling pic_handle_irq, and to demangle them there again. */ if (t->num == NOTIMP_MAGIC) panic ("Not implemented/expected interrupt!"); if (t->num >= IRQ_BASE && t->num < IRQ_BASE + 16) { assert (t->err == 0); pic_handle_irq (t); /* Reenable interrupts. */ asm ("sti"); if (! from_kernel) schedule (NULL); goto out; } else /* Reenable interrupts. */ asm ("sti"); //if (object_to_object_desc (current_thread)->oid == 0x44b) //status (0); if (t->num == 14) /* It's a page fault. cr2 holds the fault address and cr3 the pml4. */ { #if 0 static int page_faults; page_faults ++; if (page_faults > 5) panic ("..."); #endif uintptr_t cr2; asm ("movq %%cr2, %0" : "=r" (cr2)); static struct thread *last_thread; static uintptr_t last_fault; static int refault; if (last_thread == current_thread && last_fault == cr2) { refault ++; if (refault > 2) { status (0); panic ("Looping."); } } else { last_thread = current_thread; last_fault = cr2; refault = 0; } int write_fault = (t->err & (1 << 1)); int execute_fault = (t->err & (1 << 4)); int user_fault = (t->err & (1 << 2)); void fault_info (int d) { debug (d, "Fault at %"PRIxPTR, cr2); if ((t->err & (1 << 0)) == 0) debug (d, "Not present page"); debug (d, "%s fault.", write_fault ? "Write" : "Read"); debug (d, "%s fault.", user_fault ? "User" : "Supervisor"); debug (d, "%s fetch.", execute_fault ? "Instruction" : "Data"); } int d = 5; fault_info (d); if (! user_fault) { status (0); fault_info (0); panic ("Kernel faulted!"); } if ((t->err & (1 << 3)) == 1) { status (0); fault_info (0); panic ("Processor read reserved bit as one."); } if (cr2 >= KERNEL_OFFSET) { status (0); fault_info (0); panic ("User touched kernel area."); } uintptr_t cr3; asm ("movq %%cr3, %0" : "=r" (cr3)); debug (d, "pml4: %"PRIxPTR, cr3); if (current_thread->pml4) assert (phys_to_kv (cr3) == current_thread->pml4); struct page_table *pt = phys_to_kv (cr3); int idx = ADDR_PML4_INDEX (cr2); struct pte *pte = &pt->entry[idx]; debug (d, "pml4[%d]: " PTE_FMT, idx, PTE_PRINTF (*pte)); if (pte->present) { pt = phys_to_kv (PTE_ADDRESS (*pte)); idx = ADDR_PDP_INDEX (cr2); pte = &pt->entry[idx]; debug (5, "pdp[%d]: " PTE_FMT, idx, PTE_PRINTF (*pte)); } if (pte->present) { pt = phys_to_kv (PTE_ADDRESS (*pte)); idx = ADDR_PD_INDEX (cr2); pte = &pt->entry[idx]; debug (5, "pd[%d]: " PTE_FMT, idx, PTE_PRINTF (*pte)); } if (pte->present) { pt = phys_to_kv (PTE_ADDRESS (*pte)); idx = ADDR_PT_INDEX (cr2); pte = &pt->entry[idx]; debug (5, "pt[%d]: " PTE_FMT, idx, PTE_PRINTF (*pte)); } /* Compute the required access. Execute and write imply read. Executing never writes. */ int access = MEM_READABLE; if ((t->err & (1 << 4))) access |= MEM_EXECUTABLE; else if ((t->err & (1 << 1))) access |= MEM_WRITABLE; page_fault_handler (current_thread, cr2, access); goto out; } else if (t->num == 48) /* Debug output. %rax identifies the buffer and %rbx the number of bytes. */ { int len = current_thread->regs[RAX]; if (len > sizeof (uintptr_t) * 8) len = sizeof (uintptr_t) * 8; union { uintptr_t chunk[4]; char data[4 * 8]; } buffer; buffer.chunk[0] = current_thread->regs[RCX]; buffer.chunk[1] = current_thread->regs[RDX]; buffer.chunk[2] = current_thread->regs[RDI]; buffer.chunk[3] = current_thread->regs[RSI]; int i; for (i = 0; i < len; i ++) putchar (buffer.data[i]); goto out; } else if (t->num == 3 || t->num == 1) { status (0); debugger (); goto out; } status (0); panic ("Unhandled exception %"PRIxPTR, t->num); out:; #ifndef NDEBUG int i = __sync_fetch_and_add (&in_interrupt_handler, -1); ASSERTX (i > 0, "%d", in_interrupt_handler); #endif return; } /* initialize the interrupt descriptor table */ void idt_init () { #define REGISTER(n) \ extern void handler##n(void); \ igd_init (&idt[n], (uintptr_t) handler##n, 0, 0); #define R REGISTER R (0) /* R (1) */ R (2) /* R (3) */ R (4) R (4) R (5) R (6) R (7) R (8) R (9) R (10) R (11) R (12) R (13) R (14) R (15) R (16) R (17) R (18) R (19) R (20) R (21) R (22) R (23) R (24) R (25) R (26) R (27) R (28) R (29) R (30) R (31) R (32) R (33) R (34) R (35) R (36) R (37) R (38) R (39) R (40) R (41) R (42) R (43) R (44) R (45) R (46) R (47) /* The debugging interrupt may be invoked from user space. */ extern void handler1 (void); igd_init (&idt[1], (uintptr_t) handler1, 0, 3); extern void handler3 (void); igd_init (&idt[3], (uintptr_t) handler3, 0, 3); extern void handler48 (void); igd_init (&idt[48], (uintptr_t) handler48, 0, 3); /* initialize all other vectors to the "not implemented" handler */ #define NUM_SPECIAL 49 /* XXX move somewhere better? */ for (unsigned i = NUM_SPECIAL;i < NUM_INTERRUPT_VECTORS;++i) igd_init (&idt[i], (uintptr_t) handler_notimp, 0, 0); /* load the idt address */ x86_64_pseudosel_t p = {sizeof (idt), (uintptr_t) &idt}; asm volatile ("lidt %0" :: "m" (p)); /* make sure #nm is triggered */ x86_64_load_cr0 (X86_64_CR0_EM, false); x86_64_load_cr0 (X86_64_CR0_MP, true); }