summaryrefslogtreecommitdiff
path: root/i386/i386
diff options
context:
space:
mode:
authorDamien Zammit <damien@zamaudio.com>2023-09-24 10:35:10 +0000
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2023-09-24 14:38:03 +0200
commitb11e10e2c81c2b608176021364a36d84173358e3 (patch)
tree8bde3f47151865143341e864040af3bde68385a5 /i386/i386
parent31d45d0d8ee1d8eee96fc2a283a388b6b6aca669 (diff)
percpu area using gs segment
This speeds up smp again, by storing the struct processor in a percpu area and avoiding an expensive cpu_number every call of current_processor(), as well as getting the cpu_number by an offset into the percpu area. Untested on 64 bit and work remains to use other percpu arrays. TESTED: (NCPUS=8) -smp 1 boots to login shell ~2x slower than uniprocessor TESTED: (NCPUS=8) -smp 2 boots to INIT but hangs there TESTED: (NCPUS=8) -smp 4 gets stuck seemingly within rumpdisk and hangs TESTED: (NCPUS=1) uniprocessor is a bit faster than normal Message-Id: <20230924103428.455966-3-damien@zamaudio.com>
Diffstat (limited to 'i386/i386')
-rw-r--r--i386/i386/cpu_number.h17
-rw-r--r--i386/i386/fpu.c2
-rw-r--r--i386/i386/gdt.c21
-rw-r--r--i386/i386/gdt.h8
-rw-r--r--i386/i386/i386asm.sym2
-rw-r--r--i386/i386/locore.S20
-rw-r--r--i386/i386/mp_desc.c3
-rw-r--r--i386/i386/percpu.c31
-rw-r--r--i386/i386/percpu.h83
-rw-r--r--i386/i386/pit.c2
-rw-r--r--i386/i386/spl.S16
11 files changed, 174 insertions, 31 deletions
diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h
index 8357be84..6ba46e4b 100644
--- a/i386/i386/cpu_number.h
+++ b/i386/i386/cpu_number.h
@@ -30,6 +30,8 @@
#ifndef _I386_CPU_NUMBER_H_
#define _I386_CPU_NUMBER_H_
+#define MY(stm) %gs:PERCPU_##stm
+
#if NCPUS > 1
#ifdef __i386__
@@ -45,8 +47,8 @@
shrl $24, reg ;\
movl %cs:CX(cpu_id_lut, reg), reg ;\
-/* Never call CPU_NUMBER(%esi) */
-#define CPU_NUMBER(reg) \
+/* Never call CPU_NUMBER_NO_GS(%esi) */
+#define CPU_NUMBER_NO_GS(reg) \
pushl %esi ;\
pushl %eax ;\
pushl %ebx ;\
@@ -63,20 +65,29 @@
movl %esi, reg ;\
popl %esi ;\
+#define CPU_NUMBER(reg) \
+ movl MY(CPU_ID), reg;
+
#ifndef __ASSEMBLER__
#include <kern/cpu_number.h>
#include <i386/apic.h>
+#include <i386/percpu.h>
-static inline int cpu_number(void)
+static inline int cpu_number_slow(void)
{
return cpu_id_lut[apic_get_current_cpu()];
}
+static inline int cpu_number(void)
+{
+ return percpu_get(int, cpu_id);
+}
#endif
#else /* NCPUS == 1 */
#define CPU_NUMBER_NO_STACK(reg)
+#define CPU_NUMBER_NO_GS(reg)
#define CPU_NUMBER(reg)
#define CX(addr,reg) addr
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index fefe5e49..e1818683 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -119,7 +119,7 @@ init_fpu(void)
#else /* MACH_RING1 */
unsigned int native = 0;
- if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486)
+ if (machine_slot[cpu_number_slow()].cpu_type >= CPU_TYPE_I486)
native = CR0_NE;
/*
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index ddda603b..4edd3ec5 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -35,6 +35,8 @@
#include <kern/assert.h>
#include <intel/pmap.h>
+#include <kern/cpu_number.h>
+#include <machine/percpu.h>
#include "vm_param.h"
#include "seg.h"
@@ -48,7 +50,7 @@ extern
struct real_descriptor gdt[GDTSZ];
static void
-gdt_fill(struct real_descriptor *mygdt)
+gdt_fill(int cpu, struct real_descriptor *mygdt)
{
/* Initialize the kernel code and data segment descriptors. */
#ifdef __x86_64__
@@ -73,6 +75,16 @@ gdt_fill(struct real_descriptor *mygdt)
0xffffffff,
ACC_PL_K|ACC_DATA_W, SZ_32);
#endif /* MACH_PV_DESCRIPTORS */
+ vm_offset_t thiscpu = kvtolin(&percpu_array[cpu]);
+ _fill_gdt_descriptor(mygdt, PERCPU_DS,
+ thiscpu,
+ thiscpu + sizeof(struct percpu) - 1,
+#ifdef __x86_64__
+ ACC_PL_K|ACC_DATA_W, SZ_64
+#else
+ ACC_PL_K|ACC_DATA_W, SZ_32
+#endif
+ );
#endif
#ifdef MACH_PV_DESCRIPTORS
@@ -119,15 +131,16 @@ reload_segs(void)
"movw %w1,%%ds\n"
"movw %w1,%%es\n"
+ "movw %w3,%%gs\n"
"movw %w1,%%ss\n"
- : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0));
+ : : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0), "r" (PERCPU_DS));
#endif
}
void
gdt_init(void)
{
- gdt_fill(gdt);
+ gdt_fill(0, gdt);
reload_segs();
@@ -146,7 +159,7 @@ gdt_init(void)
void
ap_gdt_init(int cpu)
{
- gdt_fill(mp_gdt[cpu]);
+ gdt_fill(cpu, mp_gdt[cpu]);
reload_segs();
}
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index 80ca8ada..c7da012a 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -77,11 +77,9 @@
/* 0x58 used by user TSS in 64bit mode */
-#ifdef __x86_64__
-#define GDTSZ sel_idx(0x60)
-#else
-#define GDTSZ sel_idx(0x58)
-#endif
+#define PERCPU_DS 0x68 /* per-cpu data mapping */
+
+#define GDTSZ sel_idx(0x70)
#ifndef __ASSEMBLER__
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 5d546c08..d96b8be8 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -53,6 +53,8 @@ expr CALL_PMAP_UPDATE
offset ApicLocalUnit lu apic_id APIC_ID
+offset percpu pc cpu_id PERCPU_CPU_ID
+
offset pcb pcb iss
offset thread th pcb
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 0cac8df4..870db785 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -244,7 +244,7 @@ timer_normalize:
* Switch to a new timer.
*/
ENTRY(timer_switch)
- CPU_NUMBER(%edx) /* get this CPU */
+ CPU_NUMBER_NO_GS(%edx) /* get this CPU */
movl VA_ETC,%ecx /* get timer */
movl CX(EXT(current_tstamp),%edx),%eax /* get old time stamp */
movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */
@@ -262,7 +262,7 @@ ENTRY(timer_switch)
* Initialize the first timer for a CPU.
*/
ENTRY(start_timer)
- CPU_NUMBER(%edx) /* get this CPU */
+ CPU_NUMBER_NO_GS(%edx) /* get this CPU */
movl VA_ETC,%ecx /* get timer */
movl %ecx,CX(EXT(current_tstamp),%edx) /* set initial time stamp */
movl S_ARG0,%ecx /* get timer */
@@ -469,7 +469,8 @@ trap_push_segs:
mov %ax,%ds /* (same as kernel stack segment) */
mov %ax,%es
mov %ax,%fs
- mov %ax,%gs
+ mov $(PERCPU_DS),%ax
+ movw %ax,%gs
trap_set_segs:
cld /* clear direction flag */
@@ -673,7 +674,7 @@ ENTRY(all_intrs)
pushl %edx
cld /* clear direction flag */
- CPU_NUMBER(%ecx)
+ CPU_NUMBER_NO_GS(%ecx)
movl %esp,%edx /* on an interrupt stack? */
and $(~(INTSTACK_SIZE-1)),%edx
cmpl %ss:CX(EXT(int_stack_base),%ecx),%edx
@@ -687,7 +688,8 @@ ENTRY(all_intrs)
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
CPU_NUMBER(%edx)
@@ -745,7 +747,7 @@ LEXT(return_to_iret) /* to find the return from calling interrupt) */
iret /* return to caller */
int_from_intstack:
- CPU_NUMBER(%edx)
+ CPU_NUMBER_NO_GS(%edx)
cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */
jb stack_overflowed /* if not: */
call EXT(interrupt) /* call interrupt routine */
@@ -793,7 +795,8 @@ ast_from_interrupt:
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
CPU_NUMBER(%edx)
TIME_TRAP_UENTRY
@@ -1052,7 +1055,8 @@ syscall_entry_2:
mov %dx,%ds
mov %dx,%es
mov %dx,%fs
- mov %dx,%gs
+ mov $(PERCPU_DS),%dx
+ movw %dx,%gs
/*
* Shuffle eflags,eip,cs into proper places
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index f1a1f989..f4ccc381 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -238,6 +238,7 @@ cpu_setup(int cpu)
flush_instr_queue();
printf("AP=(%u) paging done\n", cpu);
+ init_percpu(cpu);
mp_desc_init(cpu);
printf("AP=(%u) mpdesc done\n", cpu);
@@ -275,7 +276,7 @@ cpu_setup(int cpu)
void
cpu_ap_main()
{
- int cpu = cpu_number();
+ int cpu = cpu_number_slow();
do {
cpu_pause();
diff --git a/i386/i386/percpu.c b/i386/i386/percpu.c
new file mode 100644
index 00000000..a4db7b68
--- /dev/null
+++ b/i386/i386/percpu.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <i386/smp.h>
+#include <i386/apic.h>
+#include <kern/cpu_number.h>
+#include <i386/percpu.h>
+
+struct percpu percpu_array[NCPUS] = {0};
+
+void init_percpu(int cpu)
+{
+ int apic_id = apic_get_current_cpu();
+
+ percpu_array[cpu].self = &percpu_array[cpu];
+ percpu_array[cpu].apic_id = apic_id;
+ percpu_array[cpu].cpu_id = cpu;
+}
diff --git a/i386/i386/percpu.h b/i386/i386/percpu.h
new file mode 100644
index 00000000..202504da
--- /dev/null
+++ b/i386/i386/percpu.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2023 Free Software Foundation, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _PERCPU_H_
+#define _PERCPU_H_
+
+struct percpu;
+
+#define percpu_assign(stm, val) \
+ asm("mov %[src], %%gs:%c[offs]" \
+ : /* No outputs */ \
+ : [src] "r" (val), [offs] "e" (__builtin_offsetof(struct percpu, stm)) \
+ : );
+
+#define percpu_get(typ, stm) \
+MACRO_BEGIN \
+ typ val_; \
+ \
+ asm("mov %%gs:%c[offs], %[dst]" \
+ : [dst] "=r" (val_) \
+ : [offs] "e" (__builtin_offsetof(struct percpu, stm)) \
+ : ); \
+ \
+ val_; \
+MACRO_END
+
+#define percpu_ptr(typ, stm) \
+MACRO_BEGIN \
+ typ *ptr_ = (typ *)__builtin_offsetof(struct percpu, stm); \
+ \
+ asm("add %%gs:0, %[pointer]" \
+ : [pointer] "+r" (ptr_) \
+ : /* No inputs */ \
+ : ); \
+ \
+ ptr_; \
+MACRO_END
+
+#include <kern/processor.h>
+#include <kern/thread.h>
+
+struct percpu {
+ struct percpu *self;
+ int apic_id;
+ int cpu_id;
+ struct processor processor;
+/*
+ struct machine_slot machine_slot;
+ struct mp_desc_table mp_desc_table;
+ thread_t active_thread;
+ vm_offset_t active_stack;
+ vm_offset_t int_stack_top;
+ vm_offset_t int_stack_base;
+ ast_t need_ast;
+ ipc_kmsg_t ipc_kmsg_cache;
+ pmap_update_list cpu_update_list;
+ spl_t saved_ipl;
+ spl_t curr_ipl;
+ timer_data_t kernel_timer;
+ timer_t current_timer;
+ unsigned long in_interrupt;
+*/
+};
+
+extern struct percpu percpu_array[NCPUS];
+
+void init_percpu(int cpu);
+
+#endif /* _PERCPU_H_ */
diff --git a/i386/i386/pit.c b/i386/i386/pit.c
index 6c006a98..9e527fca 100644
--- a/i386/i386/pit.c
+++ b/i386/i386/pit.c
@@ -118,7 +118,7 @@ pit_mdelay(int msec)
void
clkstart(void)
{
- if (cpu_number() != 0)
+ if (cpu_number_slow() != 0)
/* Only one PIT initialization is needed */
return;
unsigned char byte;
diff --git a/i386/i386/spl.S b/i386/i386/spl.S
index 2f2c8e3a..9ce780f4 100644
--- a/i386/i386/spl.S
+++ b/i386/i386/spl.S
@@ -48,7 +48,7 @@ lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \
ENTRY(spl0)
mb;
- CPU_NUMBER(%edx)
+ CPU_NUMBER_NO_GS(%edx)
movl CX(EXT(curr_ipl),%edx),%eax /* save current ipl */
pushl %eax
cli /* disable interrupts */
@@ -77,7 +77,7 @@ ENTRY(spl0)
#endif
cli /* disable interrupts */
1:
- CPU_NUMBER(%edx)
+ CPU_NUMBER_NO_GS(%edx)
cmpl $(SPL0),CX(EXT(curr_ipl),%edx) /* are we at spl0? */
je 1f /* yes, all done */
movl $(SPL0),CX(EXT(curr_ipl),%edx) /* set ipl */
@@ -123,14 +123,14 @@ ENTRY(spl7)
mb;
/* just clear IF */
cli
- CPU_NUMBER(%edx)
+ CPU_NUMBER_NO_GS(%edx)
movl $SPL7,%eax
xchgl CX(EXT(curr_ipl),%edx),%eax
ret
ENTRY(splx)
movl S_ARG0,%edx /* get ipl */
- CPU_NUMBER(%eax)
+ CPU_NUMBER_NO_GS(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */
@@ -145,7 +145,7 @@ ENTRY(splx)
#endif /* (MACH_KDB || MACH_TTD) && !MACH_XEN */
testl %edx,%edx /* spl0? */
jz EXT(spl0) /* yes, handle specially */
- CPU_NUMBER(%eax)
+ CPU_NUMBER_NO_GS(%eax)
cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */
jne spl /* no */
cmpl $SPL7,%edx /* spl7? */
@@ -194,7 +194,7 @@ splx_cli:
1:
xorl %edx,%edx /* edx = ipl 0 */
2:
- CPU_NUMBER(%eax)
+ CPU_NUMBER_NO_GS(%eax)
cmpl CX(EXT(curr_ipl),%eax),%edx /* same ipl as current? */
je 1f /* yes, all done */
movl %edx,CX(EXT(curr_ipl),%eax) /* set ipl */
@@ -213,7 +213,7 @@ splx_cli:
.align TEXT_ALIGN
.globl spl
spl:
- CPU_NUMBER(%eax)
+ CPU_NUMBER_NO_GS(%eax)
#if (MACH_KDB || MACH_TTD) && !defined(MACH_XEN)
/* First make sure that if we're exitting from ipl7, IF is still cleared */
cmpl $SPL7,CX(EXT(curr_ipl),%eax) /* from ipl7? */
@@ -233,7 +233,7 @@ spl:
/* get int mask */
#endif
cli /* disable interrupts */
- CPU_NUMBER(%eax)
+ CPU_NUMBER_NO_GS(%eax)
xchgl CX(EXT(curr_ipl),%eax),%edx /* set ipl */
#ifdef MACH_XEN
XEN_SETMASK() /* program PICs with new mask */