summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--Makefile.am26
-rw-r--r--Makefile.correct_output_files_for_.S_files.patch18
-rw-r--r--Makefrag.am21
-rw-r--r--chips/busses.h4
-rw-r--r--config.status.dep.patch34
-rw-r--r--configure.ac150
-rw-r--r--ddb/db_aout.c3
-rw-r--r--ddb/db_break.c10
-rw-r--r--ddb/db_break.h5
-rw-r--r--ddb/db_sym.c31
-rw-r--r--ddb/db_sym.h49
-rw-r--r--device/chario.c6
-rw-r--r--device/cons.c11
-rw-r--r--device/cons.h10
-rw-r--r--device/dev_hdr.h16
-rw-r--r--device/dev_pager.c7
-rw-r--r--device/device_init.c3
-rw-r--r--device/ds_routines.c28
-rw-r--r--device/ds_routines.h5
-rw-r--r--device/io_req.h2
-rw-r--r--device/net_io.c45
-rw-r--r--device/net_io.h3
-rw-r--r--device/subrs.c5
-rw-r--r--device/subrs.h34
-rw-r--r--device/tty.h48
-rw-r--r--doc/Makefrag.am10
-rw-r--r--doc/mach.texi10
-rw-r--r--i386/Makefrag.am61
-rw-r--r--i386/configfrag.ac29
-rw-r--r--i386/i386/db_disasm.c11
-rw-r--r--i386/i386/db_interface.c108
-rw-r--r--i386/i386/db_machdep.h1
-rw-r--r--i386/i386/db_trace.c10
-rw-r--r--i386/i386/debug_i386.c2
-rw-r--r--i386/i386/debug_trace.S1
-rw-r--r--i386/i386/fpu.c331
-rw-r--r--i386/i386/fpu.h29
-rw-r--r--i386/i386/gdt.c43
-rw-r--r--i386/i386/gdt.h12
-rw-r--r--i386/i386/i386asm.sym23
-rw-r--r--i386/i386/idt.c5
-rw-r--r--i386/i386/idt_inittab.S20
-rw-r--r--i386/i386/io_map.c1
-rw-r--r--i386/i386/ipl.h2
-rw-r--r--i386/i386/ktss.c7
-rw-r--r--i386/i386/ktss.h2
-rw-r--r--i386/i386/kttd_interface.c8
-rw-r--r--i386/i386/ldt.c15
-rw-r--r--i386/i386/ldt.h2
-rw-r--r--i386/i386/lock.h6
-rw-r--r--i386/i386/locore.S76
-rw-r--r--i386/i386/locore.h4
-rw-r--r--i386/i386/loose_ends.c3
-rw-r--r--i386/i386/loose_ends.h33
-rw-r--r--i386/i386/model_dep.h60
-rw-r--r--i386/i386/mp_desc.c6
-rw-r--r--i386/i386/pcb.c40
-rw-r--r--i386/i386/pcb.h12
-rw-r--r--i386/i386/phys.c7
-rw-r--r--i386/i386/pic.h5
-rw-r--r--i386/i386/pit.c1
-rw-r--r--i386/i386/pit.h2
-rw-r--r--i386/i386/proc_reg.h65
-rw-r--r--i386/i386/seg.h26
-rw-r--r--i386/i386/spl.S18
-rw-r--r--i386/i386/spl.h19
-rw-r--r--i386/i386/thread.h9
-rw-r--r--i386/i386/trap.c43
-rw-r--r--i386/i386/trap.h3
-rw-r--r--i386/i386/tss.h2
-rw-r--r--i386/i386/user_ldt.c30
-rw-r--r--i386/i386/user_ldt.h3
-rw-r--r--i386/i386/vm_param.h42
-rw-r--r--i386/i386/xen.h363
-rw-r--r--i386/i386at/autoconf.c10
-rw-r--r--i386/i386at/autoconf.h43
-rw-r--r--i386/i386at/boothdr.S17
-rw-r--r--i386/i386at/com.c37
-rw-r--r--i386/i386at/com.h45
-rw-r--r--i386/i386at/conf.c33
-rw-r--r--i386/i386at/cons_conf.c8
-rw-r--r--i386/i386at/idt.h4
-rw-r--r--i386/i386at/int_init.h34
-rw-r--r--i386/i386at/kd.c105
-rw-r--r--i386/i386at/kd.h50
-rw-r--r--i386/i386at/kd_event.c1
-rw-r--r--i386/i386at/kd_mouse.c14
-rw-r--r--i386/i386at/lpr.c29
-rw-r--r--i386/i386at/mem.c47
-rw-r--r--i386/i386at/model_dep.c301
-rw-r--r--i386/i386at/pic_isa.c6
-rw-r--r--i386/i386at/rtc.c2
-rw-r--r--i386/i386at/rtc.h3
-rw-r--r--i386/include/mach/i386/cthreads.h4
-rw-r--r--i386/include/mach/i386/fp_reg.h23
-rw-r--r--i386/include/mach/i386/thread_status.h2
-rw-r--r--i386/include/mach/i386/vm_param.h11
-rw-r--r--i386/include/mach/i386/vm_types.h1
-rw-r--r--i386/include/mach/sa/stdarg.h12
-rw-r--r--i386/intel/pmap.c569
-rw-r--r--i386/intel/pmap.h94
-rw-r--r--i386/intel/read_fault.h35
-rw-r--r--i386/ldscript104
-rw-r--r--i386/linux/dev/include/linux/autoconf.h4
-rw-r--r--i386/xen/Makefrag.am33
-rw-r--r--i386/xen/xen.c77
-rw-r--r--i386/xen/xen_boothdr.S184
-rw-r--r--i386/xen/xen_locore.S110
-rw-r--r--include/device/net_status.h1
-rw-r--r--include/mach/mach4.defs30
-rw-r--r--include/mach/memory_object.h3
-rw-r--r--include/mach/xen.h85
-rw-r--r--include/string.h2
-rw-r--r--include/sys/types.h18
-rw-r--r--ipc/ipc_kmsg.c9
-rw-r--r--ipc/ipc_mqueue.h3
-rw-r--r--ipc/ipc_notify.c12
-rw-r--r--ipc/mach_debug.c6
-rw-r--r--ipc/mach_msg.c12
-rw-r--r--ipc/mach_port.c17
-rw-r--r--ipc/mach_port.h57
-rw-r--r--kern/ast.c1
-rw-r--r--kern/ast.h4
-rw-r--r--kern/bootstrap.c32
-rw-r--r--kern/debug.c60
-rw-r--r--kern/debug.h3
-rw-r--r--kern/eventcount.c3
-rw-r--r--kern/eventcount.h2
-rw-r--r--kern/exception.c27
-rw-r--r--kern/ipc_kobject.c7
-rw-r--r--kern/ipc_kobject.h10
-rw-r--r--kern/ipc_mig.c7
-rw-r--r--kern/ipc_mig.h65
-rw-r--r--kern/ipc_tt.c2
-rw-r--r--kern/ipc_tt.h3
-rw-r--r--kern/lock.c4
-rw-r--r--kern/lock_mon.c6
-rw-r--r--kern/mach_clock.c13
-rw-r--r--kern/mach_clock.h8
-rw-r--r--kern/machine.c6
-rw-r--r--kern/machine.h58
-rw-r--r--kern/pc_sample.c1
-rw-r--r--kern/printf.c6
-rw-r--r--kern/printf.h2
-rw-r--r--kern/priority.c1
-rw-r--r--kern/processor.c1
-rw-r--r--kern/processor.h2
-rw-r--r--kern/queue.h1
-rw-r--r--kern/sched_prim.c25
-rw-r--r--kern/sched_prim.h12
-rw-r--r--kern/startup.c9
-rw-r--r--kern/syscall_subr.c4
-rw-r--r--kern/syscall_subr.h3
-rw-r--r--kern/syscall_sw.c6
-rw-r--r--kern/task.c2
-rw-r--r--kern/task.h1
-rw-r--r--kern/thread.c5
-rw-r--r--kern/thread.h26
-rw-r--r--kern/time_stamp.c1
-rw-r--r--kern/time_stamp.h3
-rw-r--r--kern/timer.h4
-rw-r--r--kern/zalloc.c16
-rw-r--r--linux/Makefrag.am6
-rw-r--r--linux/configfrag.ac4
-rw-r--r--linux/dev/arch/i386/kernel/irq.c14
-rw-r--r--linux/dev/drivers/net/Space.c4
-rw-r--r--linux/dev/glue/block.c25
-rw-r--r--linux/dev/glue/kmem.c5
-rw-r--r--linux/dev/glue/net.c26
-rw-r--r--linux/dev/include/asm-i386/segment.h375
-rw-r--r--linux/dev/include/linux/netdevice.h1
-rw-r--r--linux/dev/kernel/printk.c2
-rw-r--r--linux/dev/net/core/dev.c28
-rw-r--r--linux/pcmcia-cs/clients/smc91c92_cs.c2
-rw-r--r--linux/pcmcia-cs/glue/ds.c8
-rw-r--r--linux/pcmcia-cs/glue/pcmcia_glue.h6
-rw-r--r--linux/pcmcia-cs/glue/wireless_glue.h2
-rw-r--r--linux/src/arch/i386/kernel/bios32.c20
-rw-r--r--linux/src/drivers/block/ide.c28
-rw-r--r--linux/src/drivers/block/ide.h2
-rw-r--r--linux/src/drivers/block/triton.c19
-rw-r--r--linux/src/drivers/net/apricot.c16
-rw-r--r--linux/src/drivers/net/rtl8139.c9
-rw-r--r--linux/src/drivers/net/sis900.c1803
-rw-r--r--linux/src/drivers/net/sis900.h284
-rw-r--r--linux/src/drivers/net/sundance.c2
-rw-r--r--linux/src/drivers/pci/pci.c4
-rw-r--r--linux/src/drivers/scsi/aha1542.c8
-rw-r--r--linux/src/drivers/scsi/eata.c2
-rw-r--r--linux/src/include/asm-i386/segment.h109
-rw-r--r--linux/src/include/linux/tqueue.h8
-rw-r--r--util/atoi.c108
-rw-r--r--util/atoi.h67
-rw-r--r--util/putchar.c2
-rw-r--r--util/putchar.h32
-rw-r--r--util/puts.c3
-rw-r--r--vm/memory_object.c36
-rw-r--r--vm/memory_object_proxy.c200
-rw-r--r--vm/pmap.h31
-rw-r--r--vm/vm_fault.c2
-rw-r--r--vm/vm_init.c2
-rw-r--r--vm/vm_kern.c6
-rw-r--r--vm/vm_kern.h5
-rw-r--r--vm/vm_map.c26
-rw-r--r--vm/vm_map.h63
-rw-r--r--vm/vm_object.c6
-rw-r--r--vm/vm_object.h12
-rw-r--r--vm/vm_pageout.c4
-rw-r--r--vm/vm_resident.c11
-rw-r--r--vm/vm_resident.h45
-rw-r--r--vm/vm_user.c24
-rw-r--r--xen/Makefrag.am83
-rw-r--r--xen/block.c689
-rw-r--r--xen/block.h24
-rw-r--r--xen/configfrag.ac44
-rw-r--r--xen/console.c237
-rw-r--r--xen/console.h33
-rw-r--r--xen/evt.c109
-rw-r--r--xen/evt.h29
-rw-r--r--xen/grant.c142
-rw-r--r--xen/grant.h33
-rw-r--r--xen/net.c665
-rw-r--r--xen/net.h24
-rw-r--r--xen/public/COPYING38
-rw-r--r--xen/public/arch-x86/xen-mca.h279
-rw-r--r--xen/public/arch-x86/xen-x86_32.h180
-rw-r--r--xen/public/arch-x86/xen-x86_64.h212
-rw-r--r--xen/public/arch-x86/xen.h204
-rw-r--r--xen/public/arch-x86_32.h27
-rw-r--r--xen/public/arch-x86_64.h27
-rw-r--r--xen/public/callback.h121
-rw-r--r--xen/public/dom0_ops.h120
-rw-r--r--xen/public/domctl.h680
-rw-r--r--xen/public/elfnote.h233
-rw-r--r--xen/public/elfstructs.h527
-rw-r--r--xen/public/event_channel.h264
-rw-r--r--xen/public/features.h83
-rw-r--r--xen/public/grant_table.h438
-rw-r--r--xen/public/io/blkif.h141
-rw-r--r--xen/public/io/console.h51
-rw-r--r--xen/public/io/fbif.h176
-rw-r--r--xen/public/io/fsif.h191
-rw-r--r--xen/public/io/kbdif.h132
-rw-r--r--xen/public/io/netif.h205
-rw-r--r--xen/public/io/pciif.h101
-rw-r--r--xen/public/io/protocols.h40
-rw-r--r--xen/public/io/ring.h307
-rw-r--r--xen/public/io/tpmif.h77
-rw-r--r--xen/public/io/xenbus.h80
-rw-r--r--xen/public/io/xs_wire.h132
-rw-r--r--xen/public/kexec.h189
-rw-r--r--xen/public/libelf.h265
-rw-r--r--xen/public/memory.h312
-rw-r--r--xen/public/nmi.h78
-rw-r--r--xen/public/physdev.h219
-rw-r--r--xen/public/platform.h346
-rw-r--r--xen/public/sched.h121
-rw-r--r--xen/public/sysctl.h308
-rw-r--r--xen/public/trace.h206
-rw-r--r--xen/public/vcpu.h213
-rw-r--r--xen/public/version.h91
-rw-r--r--xen/public/xen-compat.h44
-rw-r--r--xen/public/xen.h657
-rw-r--r--xen/public/xencomm.h41
-rw-r--r--xen/public/xenoprof.h138
-rw-r--r--xen/ring.c61
-rw-r--r--xen/ring.h34
-rw-r--r--xen/store.c334
-rw-r--r--xen/store.h54
-rw-r--r--xen/time.c148
-rw-r--r--xen/time.h25
-rw-r--r--xen/xen.c87
-rw-r--r--xen/xen.h27
274 files changed, 18295 insertions, 1351 deletions
diff --git a/ChangeLog b/ChangeLog
index fba7531e..9ed3b474 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,8 @@
-b09661f614823530846bc6c7c7ac0f503411ab6d is the last commit imported from CVS.
+e227045b06d62ee7d2fbab9d5ade9030ff43170b is the last commit imported from CVS.
All commits after that one have valid author and committer information.
Use this to examine the change log for earlier changes:
- $ git show b09661f614823530846bc6c7c7ac0f503411ab6d:ChangeLog.gdb
- $ git show b09661f614823530846bc6c7c7ac0f503411ab6d:ChangeLog
- $ git show b09661f614823530846bc6c7c7ac0f503411ab6d:ChangeLog.0
- $ git show b09661f614823530846bc6c7c7ac0f503411ab6d:ChangeLog.00
+ $ git show e227045b06d62ee7d2fbab9d5ade9030ff43170b:ChangeLog
+ $ git show e227045b06d62ee7d2fbab9d5ade9030ff43170b:ChangeLog.0
+ $ git show e227045b06d62ee7d2fbab9d5ade9030ff43170b:ChangeLog.00
diff --git a/Makefile.am b/Makefile.am
index a2e7c9db..04885ddb 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,6 +1,6 @@
# Makefile for GNU Mach.
-# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
@@ -66,10 +66,6 @@ if disable_smashing_stack_protector
AM_CFLAGS += \
-fno-stack-protector
endif
-
-# For pre Automake 1.10.
-AM_CCASFLAGS += \
- $(AM_CPPFLAGS)
#
# MIG Setup.
@@ -82,6 +78,19 @@ MIGCOM = $(MIG) -n -cc cat - /dev/null
CPP = @CPP@ -x c
#
+# Other Tools' Configuration.
+#
+
+# Don't needlessly overwrite files that whose contents haven't changed. This
+# helps for avoinding unneccessary recompilation cycles when keeping
+# cross-compilation toolchains up-to-date. Thus, unconditionally use the
+# `install-sh' that is supplied by GNU Automake 1.10.1, as the GNU Coreutils
+# one doesn't provide this functionality yet (TODO: change that). TODO:
+# `build-aux' is hardcoded.
+install_sh = $(SHELL) $(abs_srcdir)/build-aux/install-sh -C
+INSTALL = $(install_sh)
+
+#
# The main kernel functionality.
#
@@ -124,11 +133,12 @@ noinst_PROGRAMS += \
gnumach.o
# This is the list of routines we decide is OK to steal from the C library.
-clib_routines := memcpy memmove memset bcopy bzero \
+clib_routines := memcmp memcpy memmove memset bcopy bzero \
strchr strstr strsep strpbrk strtok \
htonl htons ntohl ntohs \
udivdi3 __udivdi3 \
- etext _edata end _end # actually ld magic, not libc.
+ __rel_iplt_start __rel_iplt_end \
+ _START _start etext _edata end _end # actually ld magic, not libc.
gnumach-undef: gnumach.$(OBJEXT)
$(NM) -u $< | sed 's/ *U *//' | sort -u > $@
MOSTLYCLEANFILES += gnumach-undef
@@ -158,11 +168,9 @@ exec_boot_PROGRAMS = \
#
EXTRA_DIST += \
- Makefile.correct_output_files_for_.S_files.patch \
config.status.dep.patch
EXTRA_DIST += \
- ChangeLog.0 ChangeLog.00 \
DEVELOPMENT
dist-hook:
diff --git a/Makefile.correct_output_files_for_.S_files.patch b/Makefile.correct_output_files_for_.S_files.patch
deleted file mode 100644
index eb0c365b..00000000
--- a/Makefile.correct_output_files_for_.S_files.patch
+++ /dev/null
@@ -1,18 +0,0 @@
---- Makefile.save 2006-10-09 23:43:54.000000000 +0200
-+++ Makefile 2006-10-09 23:57:00.000000000 +0200
-@@ -1864,6 +1864,6 @@
- $(SHELL) ./config.status;; \
- *) \
-- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
-- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
-+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ frob_Makefile_for_correct_output_files_for_.S_files $(am__depfiles_maybe)'; \
-+ cd $(top_builddir) && $(SHELL) ./config.status $@ frob_Makefile_for_correct_output_files_for_.S_files $(am__depfiles_maybe);; \
- esac;
-
-@@ -3520,5 +3520,5 @@
-
- .S.o:
-- $(CCASCOMPILE) -c $<
-+ $(CCASCOMPILE) -c -o $@ $<
-
- .S.obj:
diff --git a/Makefrag.am b/Makefrag.am
index 6a520373..176d4459 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -110,6 +110,7 @@ libkernel_a_SOURCES += \
ipc/mach_msg.c \
ipc/mach_msg.h \
ipc/mach_port.c \
+ ipc/mach_port.h \
ipc/mach_rpc.c \
ipc/mach_debug.c \
ipc/port.h
@@ -145,6 +146,7 @@ libkernel_a_SOURCES += \
kern/ipc_kobject.c \
kern/ipc_kobject.h \
kern/ipc_mig.c \
+ kern/ipc_mig.h \
kern/ipc_sched.c \
kern/ipc_sched.h \
kern/ipc_tt.c \
@@ -161,6 +163,7 @@ libkernel_a_SOURCES += \
kern/mach_factor.h \
kern/mach_param.h \
kern/machine.c \
+ kern/machine.h \
kern/macro_help.h \
kern/pc_sample.c \
kern/pc_sample.h \
@@ -191,7 +194,6 @@ libkernel_a_SOURCES += \
kern/thread.h \
kern/thread_swap.c \
kern/thread_swap.h \
- kern/time_out.h \
kern/time_stamp.c \
kern/time_stamp.h \
kern/timer.c \
@@ -215,13 +217,17 @@ EXTRA_DIST += \
libkernel_a_SOURCES += \
util/putchar.c \
- util/puts.c
+ util/putchar.h \
+ util/puts.c \
+ util/atoi.c \
+ util/atoi.h
#
# Virtual memory implementation.
#
libkernel_a_SOURCES += \
+ vm/memory_object_proxy.c \
vm/memory_object.c \
vm/memory_object.h \
vm/pmap.h \
@@ -241,6 +247,8 @@ libkernel_a_SOURCES += \
vm/vm_pageout.c \
vm/vm_pageout.h \
vm/vm_resident.c \
+ vm/vm_resident.h \
+ vm/vm_types.h \
vm/vm_user.c \
vm/vm_user.h
EXTRA_DIST += \
@@ -282,6 +290,7 @@ libkernel_a_SOURCES += \
device/net_io.h \
device/param.h \
device/subrs.c \
+ device/subrs.h \
device/tty.h
EXTRA_DIST += \
device/device.srv \
@@ -380,7 +389,8 @@ include_mach_HEADERS = \
include/mach/vm_param.h \
include/mach/vm_prot.h \
include/mach/vm_statistics.h \
- include/mach/inline.h
+ include/mach/inline.h \
+ include/mach/xen.h
# If we name this `*_execdir', Automake won't add it to `install-data'...
include_mach_eXecdir = $(includedir)/mach/exec
@@ -511,6 +521,11 @@ include linux/Makefrag.am
#
# Platform specific parts.
#
+
+# Xen.
+if PLATFORM_xen
+include xen/Makefrag.am
+endif
#
# Architecture specific parts.
diff --git a/chips/busses.h b/chips/busses.h
index 1ff89f0e..49c0e44f 100644
--- a/chips/busses.h
+++ b/chips/busses.h
@@ -73,7 +73,7 @@ struct bus_ctlr {
struct bus_driver *driver; /* myself, as a device */
char *name; /* readability */
int unit; /* index in driver */
- int (*intr)(); /* interrupt handler(s) */
+ void (*intr)(); /* interrupt handler(s) */
vm_offset_t address; /* device virtual address */
int am; /* address modifier */
vm_offset_t phys_address;/* device phys address */
@@ -93,7 +93,7 @@ struct bus_device {
struct bus_driver *driver; /* autoconf info */
char *name; /* my name */
int unit;
- int (*intr)();
+ void (*intr)();
vm_offset_t address; /* device address */
int am; /* address modifier */
vm_offset_t phys_address;/* device phys address */
diff --git a/config.status.dep.patch b/config.status.dep.patch
index 871fd773..868737bd 100644
--- a/config.status.dep.patch
+++ b/config.status.dep.patch
@@ -1,18 +1,18 @@
---- config.status.save 2006-10-09 22:28:47.000000000 +0200
-+++ config.status 2006-10-09 22:29:34.000000000 +0200
-@@ -1610,7 +1610,14 @@
- { (exit 1); exit 1; }; }; }
-
- # echo "creating $dirpart/$file"
-- echo '# dummy' > "$dirpart/$file"
-+ # Try to guess what file this dependency file is from...
-+ f=$srcdir/`dirname "$fdir"`/`basename "$file" .Po | sed s/lib[^-]\*-//`
-+ for f in "$f"*; do
-+ case $f in
-+ *.c | *.S) echo "$f"': $(filter-out $(DIST_SOURCES),$(SOURCES))';;
-+ *) echo '# dummy';;
-+ esac
-+ done > "$dirpart/$file"
+--- config.status 2009-10-26 23:57:14.000000000 +0100
++++ config.status.new 2009-10-27 00:04:26.000000000 +0100
+@@ -1553,7 +1553,14 @@
+ s/.*/./; q'`
+ as_dir=$dirpart/$fdir; as_fn_mkdir_p
+ # echo "creating $dirpart/$file"
+- echo '# dummy' > "$dirpart/$file"
++ # Try to guess what file this dependency file is from...
++ f=$srcdir/`dirname "$fdir"`/`basename "$file" .Po | sed s/lib[^-]\*-//`
++ for f in "$f"*; do
++ case $f in
++ *.c | *.S) echo "$f"': $(filter-out $(DIST_SOURCES),$(SOURCES))';;
++ *) echo '# dummy';;
++ esac
++ done > "$dirpart/$file"
+ done
done
- done
- ;;
+ }
diff --git a/configure.ac b/configure.ac
index c387349f..50ec6b40 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
dnl Configure script for GNU Mach.
-dnl Copyright (C) 1997, 1998, 1999, 2004, 2006, 2007
-dnl Free Software Foundation, Inc.
+dnl Copyright (C) 1997, 1998, 1999, 2004, 2006, 2007, 2008, 2010 Free Software
+dnl Foundation, Inc.
dnl Permission to use, copy, modify and distribute this software and its
dnl documentation is hereby granted, provided that both the copyright
@@ -24,13 +24,11 @@ AC_CONFIG_SRCDIR([kern/ipc_kobject.c])
AC_CONFIG_AUX_DIR([build-aux])
AM_INIT_AUTOMAKE(
+ [1.10.2]
dnl Don't define `PACKAGE' and `VERSION'.
[no-define]
dnl Do not clutter the main build directory.
[subdir-objects]
-dnl This is only a very rough estimation of the version we actually need. Feel
-dnl free to refine that.
- [1.9]
dnl We require GNU make.
[-Wall -Wno-portability]
)
@@ -38,19 +36,22 @@ dnl We require GNU make.
#
# Deduce the architecture we're building for.
#
+# TODO: Should we also support constructs like `i686_xen-pc-gnu' or
+# `i686-pc_xen-gnu'?
AC_CANONICAL_HOST
AC_ARG_ENABLE([platform],
AS_HELP_STRING([--enable-platform=PLATFORM], [specify the platform to build a
- kernel for. Defaults to `at' for `i?86'. No other possibilities.]),
+ kernel for. Defaults to `at' for `i?86'. The other possibility is
+ `xen'.]),
[host_platform=$enable_platform],
[host_platform=default])
[# Supported configurations.
case $host_platform:$host_cpu in
default:i?86)
host_platform=at;;
- at:i?86)
+ at:i?86 | xen:i?86)
:;;
*)]
AC_MSG_ERROR([unsupported combination of cpu type `$host_cpu' and platform
@@ -126,6 +127,9 @@ esac]
# PC AT.
# TODO. Currently handled in `i386/configfrag.ac'.
+# Xen.
+m4_include([xen/configfrag.ac])
+
# Machine-specific configuration.
# ix86.
@@ -214,138 +218,6 @@ AC_CONFIG_COMMANDS_POST([
[You have a serious problem. Please contact <$PACKAGE_BUGREPORT>.])
[fi
])
-
-
-#
-# Makefile.correct_output_files_for_.S_files.patch
-#
-# The used version of Automake might emit rules that don't put compiled (pre
-# processed) Assembler files where they belong. We're bitten by this bug
-# because we use the Automake option `subdir-objects', to not clutter the main
-# build directory with all the object files. The problem is that the emitted
-# rules will place those in the main build directory, but later, when creating
-# the libraries, look for them in the subdirectories.
-#
-# See <http://sourceware.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=automake&pr=492>.
-#
-# Another note: even with versions of Automake that don't have this issue
-# fixed, a correct rule is emitted for linux/src/arch/i386/lib/semaphore.S, see
-# the comment by Thomas Schwinge on 2006-10-10 on the above bug report.
-#
-# Again, a real patch is being used here and not some `sed' magic, so that
-# it'll fail loudly in case some internals we're relying on are changed.
-#
-# TODO. Remove this block as soon as we don't want to support such legacy
-# versions of Automake anymore.
-#
-
-AC_CONFIG_COMMANDS([frob_Makefile_for_correct_output_files_for_.S_files],[
- # Do we have a version of Automake that doesn't have this bug?
- if grep -q CPPASCOMPILE.\*-c.\*-o < "$srcdir"/Makefile.in
- then] AC_MSG_NOTICE([... not a problem with the used version of Automake.])
- [# We don't.
- elif "$PATCH" -f \
- < "$srcdir"/Makefile.correct_output_files_for_.S_files.patch
- then]
- AC_MSG_NOTICE([Applied a patch to fix a bug in your version of Automake.]
- [The applied fix should work in most cases. You could also consider]
- [updating to Automake 1.10 or better.])
- [else]
- AC_MSG_ERROR([failed to patch using ]
- [`Makefile.correct_output_files_for_.S_files.patch'.]
- [You have a serious problem. Please contact <$PACKAGE_BUGREPORT>.])
- [fi
-],[
- PACKAGE_BUGREPORT=$PACKAGE_BUGREPORT
- PATCH=$PATCH
-])
-
-
-#
-# Dependency tracking for `.S' files
-#
-# The used version of Automake might not support automatic dependency tracking
-# for (preprocessed) Assembler source files. We might add such support here.
-# But since adding such support is nontrivial and the nonexistent dependency
-# tracking (missing only for .S files) is not considered a fundamental problem,
-# we save the costs of adding such support here. You should instead consider
-# updating to Automake 1.10 or better.
-#
-# TODO. Remove this block as soon as we don't want to support such legacy
-# versions of Automake anymore.
-#
-
-AC_CONFIG_COMMANDS([frob_Makefile_for_dependency_tracking_for_.S_files],[
- # Do we have a version of Automake that doesn't have this bug?
- if grep -q CPPASCOMPILE.\*depbase < "$srcdir"/Makefile.in
- then] AC_MSG_NOTICE([... not a problem with the used version of Automake.])
- [# We don't.
- else]
- AC_MSG_NOTICE([Your build of GNU Mach won't have dependency tracking for]
- [(preprocessed) Assembler source files. Consider updating to Automake]
- [1.10 or better if you need such support.])
- [# Make ``make -j'' parallel builds work at least.
- cat >> Makefile <<"EOT"
-
-i386/i386at/boothdr.o: i386/i386/i386asm.h
-i386/i386at/interrupt.o: i386/i386/i386asm.h
-i386/i386/cswitch.o: i386/i386/i386asm.h
-i386/i386/locore.o: i386/i386/i386asm.h
-EOT]
-dnl In case someone feels like completing this instead of simply using a fixed
-dnl version of Automake...
-dnl if "$PATCH" -f; then :; else]
-dnl AC_MSG_ERROR([failed to patch using]
-dnl [`Makefile.dependency_tracking_for_.S_files.patch'.]
-dnl [You have a serious problem. Please contact <$PACKAGE_BUGREPORT>.])
-dnl [fi < "$srcdir"/Makefile.dependency_tracking_for_.S_files.patch
-dnl
-dnl # Change the Makefile to care for dependencies of .S files.
-dnl ( cd "$srcdir"/ &&
-dnl find ./ -name \*.S ) | \
-dnl sed -e 's%^\./%%' -e 's%\.S$%%' -e 's%^\(.*\)/%\1 %' | \
-dnl while read d f; do
-dnl echo &&
-dnl echo "$d"/"$f"'.$(OBJEXT): '"$d"'/$(am__dirstamp) ' \
-dnl "$d"'/$(DEPDIR)/$(am__dirstamp)' &&
-dnl echo 'include '"$d"'/$(DEPDIR)/'"$f"'.Po' &&
-dnl
-dnl # Create the dependency files if they don't exist so far.
-dnl if test -f "$d"/"$DEPDIR"/"$f"'.Po'; then :; else
-dnl echo '# dummy' > "$d"/"$DEPDIR"/"$f"'.Po'
-dnl fi
-dnl done >> Makefile
-dnl
-dnl TODO. linux/src/arch/i386/lib/.deps/liblinux_a-semaphore.Po vs.
-dnl linux/src/arch/i386/lib/.deps/semaphore.Po. Because of the above
-dnl mentioned special rule for `semaphore.S' the .S.o rule is not used.
-dnl
-dnl gcc -nostdinc -imacros config.h -Ii386 -I. -I../i386 -I../include -I../i386/include/mach/sa -DASSEMBLER -g -O2 -c ../i386/i386at/boothdr.S
-dnl
-dnl Makefile
-dnl
-dnl COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-dnl .c.o:
-dnl depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`; \
-dnl if $(COMPILE) -MT $@ -MD -MP -MF "$$depbase.Tpo" -c -o $@ $<; \
-dnl then mv -f "$$depbase.Tpo" "$$depbase.Po"; else rm -f "$$depbase.Tpo"; exit 1; fi
-dnl # source='$<' object='$@' libtool=no \
-dnl # DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) \
-dnl # $(COMPILE) -c -o $@ $<
-dnl
-dnl CCAS = gcc
-dnl CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
-dnl .S.o:
-dnl $(CCASCOMPILE) -c $<
-dnl
-dnl source=../i386/i386at/boothdr.S object=i386/i386at/boothdr.o libtool=no DEPDIR=.deps depmode=gcc3 ../depcomp gcc -nostdinc -imacros config.h -Ii386 -I. -I../i386 -I../include -I../i386/include/mach/sa -DASSEMBLER -g -O2 -c -o i386/i386at/boothdr.o ../i386/i386at/boothdr.S
-dnl ... creates a correct i386/i386at/.deps/boothdr.Po.
- [fi
-dnl ],[
-dnl DEPDIR=$DEPDIR
-dnl PACKAGE_BUGREPORT=$PACKAGE_BUGREPORT
-dnl PATCH=$PATCH
-])
#
# Fire.
diff --git a/ddb/db_aout.c b/ddb/db_aout.c
index b55a4a14..42fa6f75 100644
--- a/ddb/db_aout.c
+++ b/ddb/db_aout.c
@@ -376,7 +376,8 @@ aout_db_search_symbol(symtab, off, strategy, diffp)
* Return the name and value for a symbol.
*/
void
-aout_db_symbol_values(sym, namep, valuep)
+aout_db_symbol_values(stab, sym, namep, valuep)
+ db_symtab_t *stab;
db_sym_t sym;
char **namep;
db_expr_t *valuep;
diff --git a/ddb/db_break.c b/ddb/db_break.c
index c8e12f56..9b1d6049 100644
--- a/ddb/db_break.c
+++ b/ddb/db_break.c
@@ -264,7 +264,7 @@ db_check_breakpoint_valid()
}
}
-void
+db_breakpoint_t
db_set_breakpoint(task, addr, count, thread, task_bpt)
task_t task;
db_addr_t addr;
@@ -281,17 +281,17 @@ db_set_breakpoint(task, addr, count, thread, task_bpt)
if (thread == THREAD_NULL
|| db_find_thread_breakpoint(bkpt, thread)) {
db_printf("Already set.\n");
- return;
+ return NULL;
}
} else {
if (!DB_CHECK_ACCESS(addr, BKPT_SIZE, task)) {
db_printf("Cannot set break point at %X\n", addr);
- return;
+ return NULL;
}
alloc_bkpt = bkpt = db_breakpoint_alloc();
if (bkpt == 0) {
db_printf("Too many breakpoints.\n");
- return;
+ return NULL;
}
bkpt->task = task;
bkpt->flags = (task && thread == THREAD_NULL)?
@@ -306,12 +306,14 @@ db_set_breakpoint(task, addr, count, thread, task_bpt)
if (alloc_bkpt)
db_breakpoint_free(alloc_bkpt);
db_printf("Too many thread_breakpoints.\n");
+ return NULL;
} else {
db_printf("set breakpoint #%d\n", db_breakpoint_number);
if (alloc_bkpt) {
bkpt->link = db_breakpoint_list;
db_breakpoint_list = bkpt;
}
+ return bkpt;
}
}
diff --git a/ddb/db_break.h b/ddb/db_break.h
index ec7d8806..20d74d26 100644
--- a/ddb/db_break.h
+++ b/ddb/db_break.h
@@ -64,6 +64,7 @@ struct db_breakpoint {
#define BKPT_USR_GLOBAL 0x8 /* global user space break point */
#define BKPT_SET_IN_MEM 0x10 /* break point is set in memory */
#define BKPT_1ST_SET 0x20 /* 1st time set of user global bkpt */
+#define BKPT_EXTERNAL 0x40 /* break point is not from ddb */
vm_size_t bkpt_inst; /* saved instruction at bkpt */
struct db_breakpoint *link; /* link in in-use or free chain */
};
@@ -83,4 +84,8 @@ extern db_breakpoint_t db_set_temp_breakpoint( task_t task, db_addr_t addr);
extern void db_delete_temp_breakpoint
( task_t task, db_breakpoint_t bkpt);
+extern db_breakpoint_t db_set_breakpoint(task_t task, db_addr_t addr,
+ int count, thread_t thread,
+ boolean_t task_bpt);
+
#endif /* _DDB_DB_BREAK_H_ */
diff --git a/ddb/db_sym.c b/ddb/db_sym.c
index 6bf8ec2e..dd721384 100644
--- a/ddb/db_sym.c
+++ b/ddb/db_sym.c
@@ -129,6 +129,8 @@ db_value_of_name(name, valuep)
if (sym == DB_SYM_NULL)
return (FALSE);
db_symbol_values(0, sym, &name, valuep);
+
+ db_free_symbol(sym);
return (TRUE);
}
@@ -177,6 +179,7 @@ db_lookup(symstr)
db_last_symtab = &db_symtabs[i];
return sp;
}
+ db_free_symbol(sp);
}
return 0;
}
@@ -273,11 +276,16 @@ db_name_is_ambiguous(sym_name)
return FALSE;
for (i = 0; i < db_nsymtab; i++) {
- if (X_db_lookup(&db_symtabs[i], sym_name)) {
+ db_sym_t sp;
+ if (sp = X_db_lookup(&db_symtabs[i], sym_name)) {
if (found_once)
+ {
+ db_free_symbol(sp);
return TRUE;
+ }
found_once = TRUE;
}
+ db_free_symbol(sp);
}
return FALSE;
}
@@ -316,6 +324,7 @@ db_search_task_symbol(val, strategy, offp, task)
*/
if (ret == DB_SYM_NULL || (*offp) > 0x1000000)
{
+ db_free_symbol(ret);
task = db_current_task();
ret = db_search_in_task_symbol(val, strategy, offp, task);
}
@@ -354,6 +363,7 @@ db_search_in_task_symbol(val, strategy, offp, task)
{ /* first hit */
db_last_symtab = sp;
diff = newdiff;
+ db_free_symbol(ret);
ret = sym;
continue;
}
@@ -363,6 +373,7 @@ db_search_in_task_symbol(val, strategy, offp, task)
{ /* closer null map match */
db_last_symtab = sp;
diff = newdiff;
+ db_free_symbol(ret);
ret = sym;
continue;
}
@@ -376,6 +387,7 @@ db_search_in_task_symbol(val, strategy, offp, task)
*/
db_last_symtab = sp;
diff = newdiff;
+ db_free_symbol(ret);
ret = sym;
continue;
}
@@ -450,8 +462,9 @@ db_task_printsym(off, strategy, task)
cursym = db_search_task_symbol(off, strategy, &d, task);
db_symbol_values(0, cursym, &name, &value);
- if (name == 0 || d >= db_maxoff || value == 0) {
+ if (name == 0 || d >= db_maxoff || value == 0 || *name == 0) {
db_printf("%#n", off);
+ db_free_symbol(cursym);
return;
}
db_printf("%s", name);
@@ -465,6 +478,7 @@ db_task_printsym(off, strategy, task)
db_printf("]");
}
}
+ db_free_symbol(cursym);
}
void
@@ -487,6 +501,13 @@ db_line_at_pc( sym, filename, linenum, pc)
FALSE;
}
+void db_free_symbol(db_sym_t s)
+{
+ return (db_last_symtab) ?
+ X_db_free_symbol( db_last_symtab, s) :
+ FALSE;
+}
+
/*
* Switch into symbol-table specific routines
*/
@@ -499,6 +520,8 @@ extern boolean_t coff_db_sym_init(), coff_db_line_at_pc();
extern db_sym_t coff_db_lookup(), coff_db_search_symbol();
extern void coff_db_symbol_values();
+void dummy_db_free_symbol(sym_t) { }
+
struct db_sym_switch x_db[] = {
/* BSD a.out format (really, sdb/dbx(1) symtabs) */
@@ -506,14 +529,14 @@ struct db_sym_switch x_db[] = {
{ 0,},
#else /* DB_NO_AOUT */
{ aout_db_sym_init, aout_db_lookup, aout_db_search_symbol,
- aout_db_line_at_pc, aout_db_symbol_values },
+ aout_db_line_at_pc, aout_db_symbol_values, dummy_db_free_symbol },
#endif /* DB_NO_AOUT */
#ifdef DB_NO_COFF
{ 0,},
#else /* DB_NO_COFF */
{ coff_db_sym_init, coff_db_lookup, coff_db_search_symbol,
- coff_db_line_at_pc, coff_db_symbol_values },
+ coff_db_line_at_pc, coff_db_symbol_values, dummy_db_free_symbol },
#endif /* DB_NO_COFF */
/* Machdep, not inited here */
diff --git a/ddb/db_sym.h b/ddb/db_sym.h
index 216e327d..e40264ab 100644
--- a/ddb/db_sym.h
+++ b/ddb/db_sym.h
@@ -28,6 +28,9 @@
* Date: 8/90
*/
+#ifndef _DDB_DB_SYM_H_
+#define _DDB_DB_SYM_H_
+
#include <mach/boolean.h>
#include <mach/machine/vm_types.h>
#include <machine/db_machdep.h>
@@ -115,22 +118,43 @@ extern void db_symbol_values( db_symtab_t *stab,
/* find name&value given approx val */
#define db_find_sym_and_offset(val,namep,offp) \
- db_symbol_values(0, db_search_symbol(val,DB_STGY_ANY,offp),namep,0)
+ do { \
+ db_sym_t s; \
+ db_symbol_values(0, s = db_search_symbol(val,DB_STGY_ANY,offp) \
+ ,namep,0); \
+ db_free_symbol(s); \
+ } while(0);
+
/* ditto, but no locals */
#define db_find_xtrn_sym_and_offset(val,namep,offp) \
- db_symbol_values(0, db_search_symbol(val,DB_STGY_XTRN,offp),namep,0)
+ do { \
+ db_sym_t s; \
+ db_symbol_values(0, s = db_search_symbol(val,DB_STGY_XTRN,offp) \
+ ,namep,0); \
+ db_free_symbol(s); \
+ } while(0);
/* find name&value given approx val */
#define db_find_task_sym_and_offset(val,namep,offp,task) \
- db_symbol_values(0, db_search_task_symbol(val,DB_STGY_ANY,offp,task), \
- namep, 0)
+ do { \
+ db_sym_t s; \
+ db_symbol_values(0, s = db_search_task_symbol(val,DB_STGY_ANY \
+ ,offp,task), \
+ namep, 0); \
+ db_free_symbol(s); \
+ } while(0);
/* ditto, but no locals */
#define db_find_xtrn_task_sym_and_offset(val,namep,offp,task) \
- db_symbol_values(0, db_search_task_symbol(val,DB_STGY_XTRN,offp,task), \
- namep,0)
+ do { \
+ db_sym_t s; \
+ db_symbol_values(0, s = db_search_task_symbol(val,DB_STGY_XTRN \
+ ,offp,task), \
+ namep,0); \
+ db_free_symbol(s); \
+ } while(0);
/* find symbol in current task */
#define db_search_symbol(val,strgy,offp) \
@@ -147,6 +171,10 @@ extern void db_task_printsym( db_expr_t off,
/* print closest symbol to a value */
extern void db_printsym( db_expr_t off, db_strategy_t strategy);
+/* free a symbol */
+extern void db_free_symbol(db_sym_t s);
+
+
/*
* Symbol table switch, defines the interface
* to symbol-table specific routines.
@@ -181,11 +209,15 @@ extern struct db_sym_switch {
);
void (*symbol_values)(
+ db_symtab_t *stab,
db_sym_t sym,
char **namep,
db_expr_t *valuep
);
+ void (*free_symbol)(
+ db_sym_t sym
+ );
} x_db[];
#ifndef symtab_type
@@ -196,10 +228,13 @@ extern struct db_sym_switch {
#define X_db_lookup(s,n) x_db[(s)->type].lookup(s,n)
#define X_db_search_symbol(s,o,t,d) x_db[(s)->type].search_symbol(s,o,t,d)
#define X_db_line_at_pc(s,p,f,l,a) x_db[(s)->type].line_at_pc(s,p,f,l,a)
-#define X_db_symbol_values(s,p,n,v) x_db[(s)->type].symbol_values(p,n,v)
+#define X_db_symbol_values(s,p,n,v) x_db[(s)->type].symbol_values(s,p,n,v)
+#define X_db_free_symbol(s,m) x_db[(s)->type].free_symbol(m)
extern boolean_t db_line_at_pc(
db_sym_t sym,
char **filename,
int *linenum,
db_expr_t pc);
+
+#endif
diff --git a/device/chario.c b/device/chario.c
index 8108d210..d7c092e9 100644
--- a/device/chario.c
+++ b/device/chario.c
@@ -43,6 +43,7 @@
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <device/device_types.h>
#include <device/io_req.h>
@@ -894,8 +895,9 @@ void tty_output(
* Send any buffered recvd chars up to user
*/
void ttypush(
- register struct tty *tp)
+ void * _tp)
{
+ register struct tty *tp = _tp;
spl_t s = spltty();
register int state;
@@ -1006,7 +1008,7 @@ void ttyinput(
*/
void ttyinput_many(
struct tty *tp,
- unsigned char *chars,
+ char *chars,
int count)
{
/*
diff --git a/device/cons.c b/device/cons.c
index 7fdb959f..e3e95ffb 100644
--- a/device/cons.c
+++ b/device/cons.c
@@ -250,7 +250,7 @@ cnmaygetc()
void
cnputc(c)
- int c;
+ char c;
{
if (c == 0)
return;
@@ -260,6 +260,15 @@ cnputc(c)
kmsg_putchar (c);
#endif
+#if defined(MACH_HYP) && 0
+ {
+ /* Also output on hypervisor's emergency console, for
+ * debugging */
+ unsigned char d = c;
+ hyp_console_write(&d, 1);
+ }
+#endif /* MACH_HYP */
+
if (cn_tab) {
(*cn_tab->cn_putc)(cn_tab->cn_dev, c);
if (c == '\n')
diff --git a/device/cons.h b/device/cons.h
index dbe29934..a6b04fff 100644
--- a/device/cons.h
+++ b/device/cons.h
@@ -21,6 +21,8 @@
* Utah $Hdr: cons.h 1.10 94/12/14$
*/
+#include <sys/types.h>
+
struct consdev {
#ifdef MACH_KERNEL
char *cn_name; /* name of device in dev_name_list */
@@ -47,3 +49,11 @@ struct consdev {
#ifdef KERNEL
extern struct consdev constab[];
#endif
+
+extern void cninit(void);
+
+extern int cngetc(void);
+
+extern int cnmaygetc(void);
+
+extern void cnputc(char);
diff --git a/device/dev_hdr.h b/device/dev_hdr.h
index d5b87705..340a2db1 100644
--- a/device/dev_hdr.h
+++ b/device/dev_hdr.h
@@ -130,4 +130,20 @@ boolean_t dev_map(boolean_t (*)(), mach_port_t);
#define device_lock(device) simple_lock(&(device)->lock)
#define device_unlock(device) simple_unlock(&(device)->lock)
+/*
+ * device name lookup
+ */
+extern boolean_t dev_name_lookup(
+ char * name,
+ dev_ops_t *ops, /* out */
+ int *unit); /* out */
+
+/*
+ * Change an entry in the indirection list.
+ */
+extern void dev_set_indirection(
+ char *name,
+ dev_ops_t ops,
+ int unit);
+
#endif /* _DEVICE_DEV_HDR_H_ */
diff --git a/device/dev_pager.c b/device/dev_pager.c
index 6779d8e7..224be850 100644
--- a/device/dev_pager.c
+++ b/device/dev_pager.c
@@ -49,6 +49,7 @@
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <device/device_types.h>
#include <device/ds_routines.h>
@@ -340,7 +341,7 @@ kern_return_t device_pager_data_request(
#endif /* lint */
if (device_pager_debug)
- printf("(device_pager)data_request: pager=%d, offset=0x%x, length=0x%x\n",
+ printf("(device_pager)data_request: pager=%p, offset=0x%x, length=0x%x\n",
pager, offset, length);
ds = dev_pager_hash_lookup((ipc_port_t)pager);
@@ -428,7 +429,7 @@ boolean_t device_pager_data_request_done(register io_req_t ior)
size_read = ior->io_count;
if (ior->io_residual) {
if (device_pager_debug)
- printf("(device_pager)data_request_done: r: 0x%x\n",ior->io_residual);
+ printf("(device_pager)data_request_done: r: 0x%lx\n",ior->io_residual);
memset((&ior->io_data[ior->io_count - ior->io_residual]), 0,
(unsigned) ior->io_residual);
}
@@ -606,7 +607,7 @@ kern_return_t device_pager_init_pager(
register dev_pager_t ds;
if (device_pager_debug)
- printf("(device_pager)init: pager=%d, request=%d, name=%d\n",
+ printf("(device_pager)init: pager=%p, request=%p, name=%p\n",
pager, pager_request, pager_name);
assert(pager_page_size == PAGE_SIZE);
diff --git a/device/device_init.c b/device/device_init.c
index 4f57fc2a..1da2a6fe 100644
--- a/device/device_init.c
+++ b/device/device_init.c
@@ -36,14 +36,13 @@
#include <device/device_types.h>
#include <device/device_port.h>
-
+#include <device/tty.h>
extern void mach_device_init();
extern void dev_lookup_init();
extern void net_io_init();
extern void device_pager_init();
-extern void chario_init(void);
extern void io_done_thread();
extern void net_thread();
diff --git a/device/ds_routines.c b/device/ds_routines.c
index 8b5e1e54..d0f0879d 100644
--- a/device/ds_routines.c
+++ b/device/ds_routines.c
@@ -62,6 +62,7 @@
#include <mach/port.h>
#include <mach/vm_param.h>
#include <mach/notify.h>
+#include <machine/locore.h>
#include <machine/machspl.h> /* spl definitions */
#include <ipc/ipc_port.h>
@@ -80,6 +81,7 @@
#include <vm/memory_object.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <device/device_types.h>
#include <device/dev_hdr.h>
@@ -102,6 +104,10 @@ extern struct device_emulation_ops linux_pcmcia_emulation_ops;
#endif
#endif
#endif
+#ifdef MACH_HYP
+extern struct device_emulation_ops hyp_block_emulation_ops;
+extern struct device_emulation_ops hyp_net_emulation_ops;
+#endif
extern struct device_emulation_ops mach_device_emulation_ops;
/* List of emulations. */
@@ -116,6 +122,10 @@ static struct device_emulation_ops *emulation_list[] =
#endif
#endif
#endif
+#ifdef MACH_HYP
+ &hyp_block_emulation_ops,
+ &hyp_net_emulation_ops,
+#endif
&mach_device_emulation_ops,
};
@@ -137,7 +147,7 @@ ds_device_open (ipc_port_t open_port, ipc_port_t reply_port,
if (! IP_VALID (reply_port))
{
printf ("ds_* invalid reply port\n");
- Debugger ("ds_* reply_port");
+ SoftDebugger ("ds_* reply_port");
return MIG_NO_REPLY;
}
@@ -939,7 +949,7 @@ device_write_dealloc(ior)
kmem_io_map_deallocate(device_io_map,
trunc_page(ior->io_data),
- (vm_size_t) ior->io_alloc_size);
+ ior->io_alloc_size);
if (vm_map_copy_has_cont(io_copy)) {
@@ -1082,7 +1092,7 @@ device_read(device, reply_port, reply_port_type, mode, recnum,
*/
if (!IP_VALID(reply_port)) {
printf("ds_* invalid reply port\n");
- Debugger("ds_* reply_port");
+ SoftDebugger("ds_* reply_port");
return (MIG_NO_REPLY); /* no sense in doing anything */
}
@@ -1164,7 +1174,7 @@ device_read_inband(device, reply_port, reply_port_type, mode, recnum,
*/
if (!IP_VALID(reply_port)) {
printf("ds_* invalid reply port\n");
- Debugger("ds_* reply_port");
+ SoftDebugger("ds_* reply_port");
return (MIG_NO_REPLY); /* no sense in doing anything */
}
@@ -1816,9 +1826,9 @@ device_writev_trap (mach_device_t device, dev_mode_t mode,
struct device_emulation_ops mach_device_emulation_ops =
{
- mach_device_reference,
- mach_device_deallocate,
- mach_convert_device_to_port,
+ (void*) mach_device_reference,
+ (void*) mach_device_deallocate,
+ (void*) mach_convert_device_to_port,
device_open,
device_close,
device_write,
@@ -1830,6 +1840,6 @@ struct device_emulation_ops mach_device_emulation_ops =
device_set_filter,
device_map,
ds_no_senders,
- device_write_trap,
- device_writev_trap
+ (void*) device_write_trap,
+ (void*) device_writev_trap
};
diff --git a/device/ds_routines.h b/device/ds_routines.h
index 3281c4c7..e1f6aadb 100644
--- a/device/ds_routines.h
+++ b/device/ds_routines.h
@@ -46,9 +46,14 @@ vm_map_t device_io_map;
kern_return_t device_read_alloc(io_req_t, vm_size_t);
kern_return_t device_write_get(io_req_t, boolean_t *);
boolean_t device_write_dealloc(io_req_t);
+void device_reference(device_t);
+void device_deallocate(device_t);
+boolean_t ds_notify(mach_msg_header_t *msg);
boolean_t ds_open_done(io_req_t);
boolean_t ds_read_done(io_req_t);
boolean_t ds_write_done(io_req_t);
+void iowait (io_req_t ior);
+
#endif /* DS_ROUTINES_H */
diff --git a/device/io_req.h b/device/io_req.h
index 5988f55c..162524d7 100644
--- a/device/io_req.h
+++ b/device/io_req.h
@@ -66,7 +66,7 @@ struct io_req {
#define io_data io_un.data
long io_count; /* amount requested */
- long io_alloc_size; /* amount allocated */
+ vm_size_t io_alloc_size; /* amount allocated */
long io_residual; /* amount NOT done */
io_return_t io_error; /* error code */
/* call when done - returns TRUE if IO really finished */
diff --git a/device/net_io.c b/device/net_io.c
index b565aa3f..1db9bca9 100644
--- a/device/net_io.c
+++ b/device/net_io.c
@@ -367,6 +367,36 @@ decl_simple_lock_data(,net_hash_header_lock)
extern boolean_t net_do_filter(); /* CSPF */
extern int bpf_do_filter(); /* BPF */
+int hash_ent_remove (
+ struct ifnet *ifp,
+ net_hash_header_t hp,
+ int used,
+ net_hash_entry_t *head,
+ net_hash_entry_t entp,
+ queue_entry_t *dead_p);
+
+void net_free_dead_infp (queue_entry_t dead_infp);
+void net_free_dead_entp (queue_entry_t dead_entp);
+
+int bpf_validate(
+ bpf_insn_t f,
+ int bytes,
+ bpf_insn_t *match);
+
+int bpf_eq (
+ bpf_insn_t f1,
+ bpf_insn_t f2,
+ register int bytes);
+
+int net_add_q_info (ipc_port_t rcv_port);
+
+int bpf_match (
+ net_hash_header_t hash,
+ int n_keys,
+ unsigned int *keys,
+ net_hash_entry_t **hash_headpp,
+ net_hash_entry_t *entpp);
+
/*
* ethernet_priority:
@@ -478,8 +508,8 @@ boolean_t net_deliver(nonblocking)
MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0);
/* remember message sizes must be rounded up */
kmsg->ikm_header.msgh_size =
- ((mach_msg_size_t) (sizeof(struct net_rcv_msg)
- - NET_RCV_MAX + count))+3 &~ 3;
+ (((mach_msg_size_t) (sizeof(struct net_rcv_msg)
+ - NET_RCV_MAX + count)) + 3) &~ 3;
kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL;
kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL;
kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID;
@@ -741,13 +771,16 @@ net_filter(kmsg, send_list)
if ((infp->filter[0] & NETF_TYPE_MASK) == NETF_BPF) {
ret_count = bpf_do_filter(infp, net_kmsg(kmsg)->packet
+ sizeof(struct packet_header),
- count, net_kmsg(kmsg)->header,
+ count - sizeof(struct packet_header),
+ net_kmsg(kmsg)->header,
ifp->if_header_size, &hash_headp,
&entp);
if (entp == (net_hash_entry_t) 0)
dest = infp->rcv_port;
else
dest = entp->rcv_port;
+ if (ret_count)
+ ret_count += sizeof(struct packet_header);
} else {
ret_count = net_do_filter(infp, net_kmsg(kmsg)->packet, count,
net_kmsg(kmsg)->header);
@@ -1193,7 +1226,7 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count)
&& my_infp == 0
&& (infp->filter_end - infp->filter) == filter_count
&& bpf_eq((bpf_insn_t)infp->filter,
- filter, filter_bytes))
+ (bpf_insn_t)filter, filter_bytes))
my_infp = infp;
for (i = 0; i < NET_HASH_SIZE; i++) {
@@ -1341,10 +1374,10 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count)
/* Not checking for the same key values */
if (*p == 0) {
- queue_init ((queue_t) hash_entp);
+ queue_init (&hash_entp->chain);
*p = hash_entp;
} else {
- enqueue_tail((queue_t)*p, hash_entp);
+ enqueue_tail(&(*p)->chain, &hash_entp->chain);
}
((net_hash_header_t)my_infp)->ref_count++;
diff --git a/device/net_io.h b/device/net_io.h
index 6842dec0..073bdca7 100644
--- a/device/net_io.h
+++ b/device/net_io.h
@@ -65,6 +65,7 @@ extern void net_kmsg_put(ipc_kmsg_t);
* Network utility routines.
*/
+extern void net_ast();
extern void net_packet(struct ifnet *, ipc_kmsg_t, unsigned int, boolean_t);
extern void net_filter(ipc_kmsg_t, ipc_kmsg_queue_t);
extern io_return_t net_getstat(struct ifnet *, dev_flavor_t, dev_status_t,
@@ -77,6 +78,8 @@ extern io_return_t net_write(struct ifnet *, int (*)(), io_req_t);
extern vm_size_t net_kmsg_size;
+extern void net_kmsg_collect (void);
+
#define net_kmsg_alloc() ((ipc_kmsg_t) kalloc(net_kmsg_size))
#define net_kmsg_free(kmsg) kfree((vm_offset_t) (kmsg), net_kmsg_size)
diff --git a/device/subrs.c b/device/subrs.c
index ea63a5c6..a82bae38 100644
--- a/device/subrs.c
+++ b/device/subrs.c
@@ -30,6 +30,7 @@
#include <kern/debug.h>
#include <kern/printf.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <device/buf.h>
#include <device/if_hdr.h>
#include <device/if_ether.h>
@@ -62,7 +63,7 @@ char *
ether_sprintf(ap)
register u_char *ap;
{
- register i;
+ register int i;
static char etherbuf[18];
register char *cp = etherbuf;
static char digits[] = "0123456789abcdef";
@@ -139,6 +140,6 @@ void brelse(bp)
(void) vm_deallocate(kernel_map,
(vm_offset_t) ior->io_data,
- (vm_size_t) ior->io_alloc_size);
+ ior->io_alloc_size);
io_req_free(ior);
}
diff --git a/device/subrs.h b/device/subrs.h
new file mode 100644
index 00000000..680aaa6f
--- /dev/null
+++ b/device/subrs.h
@@ -0,0 +1,34 @@
+/*
+ * Random device functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Random device functions.
+ *
+ */
+
+#ifndef _SUBRS_H_
+#define _SUBRS_H_
+
+#include <mach/std_types.h>
+
+extern void sleep (vm_offset_t channel, int priority);
+extern void wakeup (vm_offset_t channel);
+
+#endif /* _SUBRS_H_ */
diff --git a/device/tty.h b/device/tty.h
index 94229962..be287083 100644
--- a/device/tty.h
+++ b/device/tty.h
@@ -52,16 +52,16 @@ struct tty {
struct cirbuf t_outq; /* output buffer */
char * t_addr; /* device pointer */
int t_dev; /* device number */
- int (*t_start)(struct tty *);
+ void (*t_start)(struct tty *);
/* routine to start output */
#define t_oproc t_start
- int (*t_stop)(struct tty *, int);
+ void (*t_stop)(struct tty *, int);
/* routine to stop output */
int (*t_mctl)(struct tty *, int, int);
/* (optional) routine to control
modem signals */
- char t_ispeed; /* input speed */
- char t_ospeed; /* output speed */
+ unsigned char t_ispeed; /* input speed */
+ unsigned char t_ospeed; /* output speed */
char t_breakc; /* character to deliver when 'break'
condition received */
int t_flags; /* mode flags */
@@ -104,10 +104,19 @@ extern void ttyinput(
unsigned int c,
struct tty * tp);
+extern void ttyinput_many(
+ struct tty * tp,
+ char * chars,
+ int count);
+
extern boolean_t ttymodem(
struct tty * tp,
boolean_t carrier_up);
+extern void tty_cts(
+ struct tty * tp,
+ boolean_t cts_up);
+
extern void tty_queue_completion(
queue_t queue);
#define tt_open_wakeup(tp) \
@@ -124,6 +133,35 @@ short tthiwat[NSPEEDS], ttlowat[NSPEEDS];
#define TTHIWAT(tp) tthiwat[(tp)->t_ospeed]
#define TTLOWAT(tp) ttlowat[(tp)->t_ospeed]
+extern io_return_t tty_get_status(
+ struct tty * tp,
+ dev_flavor_t flavor,
+ int * data,
+ natural_t * count);
+
+extern io_return_t tty_set_status(
+ struct tty * tp,
+ dev_flavor_t flavor,
+ int * data,
+ natural_t count);
+
+extern void tty_flush(
+ struct tty * tp,
+ int rw);
+
+extern void ttrstrt(
+ struct tty * tp);
+
+extern void ttstart(
+ struct tty * tp);
+
+extern void ttyclose(
+ struct tty * tp);
+
+extern boolean_t tty_portdeath(
+ struct tty * tp,
+ ipc_port_t port);
+
/* internal state bits */
#define TS_INIT 0x00000001 /* tty structure initialized */
#define TS_TIMEOUT 0x00000002 /* delay timeout in progress */
@@ -200,4 +238,6 @@ struct ldisc_switch {
extern struct ldisc_switch linesw[];
+extern void chario_init(void);
+
#endif /* _DEVICE_TTY_H_ */
diff --git a/doc/Makefrag.am b/doc/Makefrag.am
index 654611e5..490ebf5c 100644
--- a/doc/Makefrag.am
+++ b/doc/Makefrag.am
@@ -1,6 +1,6 @@
# Makefile fragment for the documentation.
-# Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
@@ -105,9 +105,13 @@ pdf-local:
# TODO. There doesn't seem to be a hook or `-local' target suitable for this.
$(srcdir)/doc/version.texi: $(srcdir)/doc/stamp-vti
@if test -d $(web); then :; \
- elif grep -q :ext: $(top_srcdir)/CVS/Root 2> /dev/null; then \
+ elif grep 2> /dev/null \
+ -q :ext: $(top_srcdir)/CVS/Root 2> /dev/null && \
+ grep 2> /dev/null \
+ -q ^Tgnumach-1-branch$$ $(top_srcdir)/CVS/Tag; \
+ then \
echo "*** As it seems that you'd be allowed to check in the" \
- "possible resulting fixes, you may cosider running" \
+ "possible resulting fixes, you may consider running" \
" \`make $(web)' to get a checkout of the web pages of the" \
"GNU Mach manual and have possible changes installed into" \
"\`$(web)/', ready for checking them in in there." && \
diff --git a/doc/mach.texi b/doc/mach.texi
index 0e17128c..858880ad 100644
--- a/doc/mach.texi
+++ b/doc/mach.texi
@@ -26,7 +26,8 @@ This file documents the GNU Mach microkernel.
This is edition @value{EDITION}, last updated on @value{UPDATED}, of @cite{The
GNU Mach Reference Manual}, for version @value{VERSION}.
-Copyright @copyright{} 2001, 2002, 2006, 2007 Free Software Foundation, Inc.
+Copyright @copyright{} 2001, 2002, 2006, 2007, 2008 Free Software
+Foundation, Inc.
@c @sp 2
@c Published by the Free Software Foundation @*
@@ -523,6 +524,13 @@ the kernel. It is not enabled by default because it adds considerably to the
unpageable memory footprint of the kernel. @xref{Kernel Debugger}.
@end table
+@table @code
+@item --enable-pae
+@acronym{PAE, Physical Address Extension} feature (@samp{ix86}-only),
+which is available on modern @samp{ix86} processors; on @samp{ix86-at} disabled
+by default, on @samp{ix86-xen} enabled by default.
+@end table
+
@subsection Turning device drivers on or off
Each device driver has an associated configure switch. The following table
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index 4e6731dd..755b4be2 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -19,33 +19,41 @@
libkernel_a_SOURCES += \
i386/i386at/autoconf.c \
+ i386/i386at/autoconf.h \
+ i386/i386at/conf.c \
+ i386/i386at/cons_conf.c \
+ i386/i386at/idt.h \
+ i386/i386at/kd_event.c \
+ i386/i386at/kd_event.h \
+ i386/i386at/kd_queue.c \
+ i386/i386at/kd_queue.h \
+ i386/i386at/model_dep.c \
+ i386/include/mach/sa/stdarg.h
+
+if PLATFORM_at
+libkernel_a_SOURCES += \
i386/i386at/boothdr.S \
i386/i386at/com.c \
+ i386/i386at/com.h \
i386/i386at/comreg.h \
- i386/i386at/conf.c \
- i386/i386at/cons_conf.c \
i386/i386at/cram.h \
i386/i386at/disk.h \
i386/i386at/i8250.h \
- i386/i386at/idt.h \
i386/i386at/immc.c \
i386/i386at/int_init.c \
+ i386/i386at/int_init.h \
i386/i386at/interrupt.S \
i386/i386at/kd.c \
i386/i386at/kd.h \
- i386/i386at/kd_event.c \
- i386/i386at/kd_event.h \
i386/i386at/kd_mouse.c \
i386/i386at/kd_mouse.h \
- i386/i386at/kd_queue.c \
- i386/i386at/kd_queue.h \
i386/i386at/kdasm.S \
i386/i386at/kdsoft.h \
- i386/i386at/model_dep.c \
+ i386/i386at/mem.c \
i386/i386at/pic_isa.c \
i386/i386at/rtc.c \
- i386/i386at/rtc.h \
- i386/include/mach/sa/stdarg.h
+ i386/i386at/rtc.h
+endif
#
# `lpr' device support.
@@ -80,11 +88,9 @@ libkernel_a_SOURCES += \
i386/i386/fpu.h \
i386/i386/gdt.c \
i386/i386/gdt.h \
- i386/i386/hardclock.c \
i386/i386/idt-gen.h \
i386/i386/idt.c \
i386/i386/idt_inittab.S \
- i386/i386/io_map.c \
i386/i386/io_perm.c \
i386/i386/io_perm.h \
i386/i386/ipl.h \
@@ -98,20 +104,18 @@ libkernel_a_SOURCES += \
i386/i386/locore.S \
i386/i386/locore.h \
i386/i386/loose_ends.c \
+ i386/i386/loose_ends.h \
i386/i386/mach_param.h \
i386/i386/machine_routines.h \
i386/i386/machine_task.c \
i386/i386/machspl.h \
+ i386/i386/model_dep.h \
i386/i386/mp_desc.c \
i386/i386/mp_desc.h \
i386/i386/pcb.c \
i386/i386/pcb.h \
i386/i386/phys.c \
- i386/i386/pic.c \
- i386/i386/pic.h \
i386/i386/pio.h \
- i386/i386/pit.c \
- i386/i386/pit.h \
i386/i386/pmap.h \
i386/i386/proc_reg.h \
i386/i386/sched_param.h \
@@ -123,7 +127,6 @@ libkernel_a_SOURCES += \
i386/i386/task.h \
i386/i386/thread.h \
i386/i386/time_stamp.h \
- i386/i386/timer.h \
i386/i386/trap.c \
i386/i386/trap.h \
i386/i386/tss.h \
@@ -135,10 +138,20 @@ libkernel_a_SOURCES += \
i386/i386/zalloc.h \
i386/intel/pmap.c \
i386/intel/pmap.h \
- i386/intel/read_fault.c
+ i386/intel/read_fault.c \
+ i386/intel/read_fault.h
EXTRA_DIST += \
i386/i386/mach_i386.srv
+if PLATFORM_at
+libkernel_a_SOURCES += \
+ i386/i386/hardclock.c \
+ i386/i386/io_map.c \
+ i386/i386/pic.c \
+ i386/i386/pic.h \
+ i386/i386/pit.c \
+ i386/i386/pit.h
+endif
#
# KDB support.
@@ -231,3 +244,15 @@ EXTRA_DIST += \
# Instead of listing each file individually...
EXTRA_DIST += \
i386/include
+
+#
+# Platform specific parts.
+#
+
+if PLATFORM_xen
+include i386/xen/Makefrag.am
+
+libkernel_a_SOURCES += \
+ i386/i386/xen.h
+
+endif
diff --git a/i386/configfrag.ac b/i386/configfrag.ac
index bada87df..1132b695 100644
--- a/i386/configfrag.ac
+++ b/i386/configfrag.ac
@@ -1,6 +1,6 @@
dnl Configure fragment for i386.
-dnl Copyright (C) 1999, 2004, 2006, 2007 Free Software Foundation, Inc.
+dnl Copyright (C) 1999, 2004, 2006, 2007, 2008 Free Software Foundation, Inc.
dnl Permission to use, copy, modify and distribute this software and its
dnl documentation is hereby granted, provided that both the copyright
@@ -51,6 +51,12 @@ case $host_platform:$host_cpu in
# i386/bogus/platforms.h]
AC_DEFINE([AT386], [1], [AT386])[;;
+ xen:i?86)
+ # TODO. That should probably not be needed.
+ ncom=1
+ # TODO. That should probably not be needed.
+ # i386/bogus/platforms.h]
+ AC_DEFINE([AT386], [1], [AT386])[;;
*)
:;;
esac]
@@ -102,6 +108,27 @@ if [ x"$enable_lpr" = xyes ]; then]
[else]
AM_CONDITIONAL([enable_lpr], [false])
[fi]
+
+
+AC_ARG_ENABLE([pae],
+ AS_HELP_STRING([--enable-pae], [PAE support (ix86-only); on ix86-at disabled
+ by default, on ix86-xen enabled by default]))
+[case $host_platform:$host_cpu in
+ xen:i?86)
+ enable_pae=${enable_pae-yes};;
+ *:i?86)
+ :;;
+ *)
+ if [ x"$enable_pae" = xyes ]; then]
+ AC_MSG_ERROR([can only enable the `PAE' feature on ix86.])
+ [fi;;
+esac
+if [ x"$enable_pae" = xyes ]; then]
+ AC_DEFINE([PAE], [1], [PAE support])
+ AM_CONDITIONAL([enable_pae], [true])
+[else]
+ AM_CONDITIONAL([enable_pae], [false])
+[fi]
dnl Local Variables:
dnl mode: autoconf
diff --git a/i386/i386/db_disasm.c b/i386/i386/db_disasm.c
index 00327218..e15293b0 100644
--- a/i386/i386/db_disasm.c
+++ b/i386/i386/db_disasm.c
@@ -94,6 +94,7 @@ boolean_t db_disasm_16 = FALSE;
#define STI 33 /* FP stack */
#define X 34 /* extended FP op */
#define XA 35 /* for 'fstcw %ax' */
+#define Iba 36 /* byte immediate, don't print if 0xa */
struct inst {
char * i_name; /* name */
@@ -749,8 +750,8 @@ struct inst db_inst_table[256] = {
/*d1*/ { "", TRUE, LONG, op2(o1, E), (char *)db_Grp2 },
/*d2*/ { "", TRUE, BYTE, op2(CL, E), (char *)db_Grp2 },
/*d3*/ { "", TRUE, LONG, op2(CL, E), (char *)db_Grp2 },
-/*d4*/ { "aam", TRUE, NONE, 0, 0 },
-/*d5*/ { "aad", TRUE, NONE, 0, 0 },
+/*d4*/ { "aam", FALSE, NONE, op1(Iba), 0 },
+/*d5*/ { "aad", FALSE, NONE, op1(Iba), 0 },
/*d6*/ { "", FALSE, NONE, 0, 0 },
/*d7*/ { "xlat", FALSE, BYTE, op1(BX), 0 },
@@ -1333,6 +1334,12 @@ db_disasm(
db_printf("$%#n", imm);
break;
+ case Iba:
+ get_value_inc(imm, loc, 1, FALSE, task);
+ if (imm != 0x0a)
+ db_printf("$%#r", imm);
+ break;
+
case Ibs:
get_value_inc(imm, loc, 1, TRUE, task); /* signed */
db_printf("$%#r", imm);
diff --git a/i386/i386/db_interface.c b/i386/i386/db_interface.c
index b8968de3..d149adc5 100644
--- a/i386/i386/db_interface.c
+++ b/i386/i386/db_interface.c
@@ -134,7 +134,7 @@ kdb_trap(
/* XXX Should switch to ddb`s own stack here. */
ddb_regs = *regs;
- if ((regs->cs & 0x3) == 0) {
+ if ((regs->cs & 0x3) == KERNEL_RING) {
/*
* Kernel mode - esp and ss not saved
*/
@@ -152,7 +152,7 @@ kdb_trap(
regs->ecx = ddb_regs.ecx;
regs->edx = ddb_regs.edx;
regs->ebx = ddb_regs.ebx;
- if (regs->cs & 0x3) {
+ if ((regs->cs & 0x3) != KERNEL_RING) {
/*
* user mode - saved esp and ss valid
*/
@@ -205,7 +205,7 @@ kdb_kentry(
if (db_enter())
#endif /* NCPUS > 1 */
{
- if (is->cs & 0x3) {
+ if ((is->cs & 0x3) != KERNEL_RING) {
ddb_regs.uesp = ((int *)(is+1))[0];
ddb_regs.ss = ((int *)(is+1))[1];
}
@@ -232,7 +232,7 @@ kdb_kentry(
db_task_trap(-1, 0, (ddb_regs.cs & 0x3) != 0);
cnpollc(FALSE);
- if (ddb_regs.cs & 0x3) {
+ if ((ddb_regs.cs & 0x3) != KERNEL_RING) {
((int *)(is+1))[0] = ddb_regs.uesp;
((int *)(is+1))[1] = ddb_regs.ss & 0xffff;
}
@@ -311,15 +311,16 @@ db_read_bytes(
unsigned kern_addr;
src = (char *)addr;
- if (addr >= VM_MIN_KERNEL_ADDRESS || task == TASK_NULL) {
+ if ((addr >= VM_MIN_KERNEL_ADDRESS && addr < VM_MAX_KERNEL_ADDRESS) || task == TASK_NULL) {
if (task == TASK_NULL)
task = db_current_task();
while (--size >= 0) {
- if (addr++ < VM_MIN_KERNEL_ADDRESS && task == TASK_NULL) {
+ if (addr < VM_MIN_KERNEL_ADDRESS && task == TASK_NULL) {
db_printf("\nbad address %x\n", addr);
db_error(0);
/* NOTREACHED */
}
+ addr++;
*data++ = *src++;
}
return;
@@ -500,7 +501,7 @@ db_search_null(
register unsigned *kaddr;
kaddr = (unsigned *)*skaddr;
- for (vaddr = *svaddr; vaddr > evaddr; vaddr -= sizeof(unsigned)) {
+ for (vaddr = *svaddr; vaddr > evaddr; ) {
if (vaddr % INTEL_PGBYTES == 0) {
vaddr -= sizeof(unsigned);
if (db_user_to_kernel_address(task, vaddr, skaddr, 0) < 0)
@@ -519,6 +520,53 @@ db_search_null(
return FALSE;
}
+#define GNU
+
+#ifdef GNU
+static boolean_t
+looks_like_command(
+ task_t task,
+ char* kaddr)
+{
+ char *c;
+
+ assert(!((vm_offset_t) kaddr & (INTEL_PGBYTES-1)));
+
+ /*
+ * Must be the environment.
+ */
+ if (!memcmp(kaddr, "PATH=", 5) || !memcmp(kaddr, "TERM=", 5) || !memcmp(kaddr, "SHELL=", 6) || !memcmp(kaddr, "LOCAL_PART=", 11) || !memcmp(kaddr, "LC_ALL=", 7))
+ return FALSE;
+
+ /*
+ * This is purely heuristical but works quite nicely.
+ * We know that it should look like words separated by \0, and
+ * eventually only \0s.
+ */
+ c = kaddr;
+ while (c < kaddr + INTEL_PGBYTES) {
+ if (!*c) {
+ if (c == kaddr)
+ /* Starts by \0. */
+ return FALSE;
+ break;
+ }
+ while (c < kaddr + INTEL_PGBYTES && *c)
+ c++;
+ if (c < kaddr + INTEL_PGBYTES)
+ c++; /* Skip \0 */
+ }
+ /*
+ * Check that the remainder is just \0s.
+ */
+ while (c < kaddr + INTEL_PGBYTES)
+ if (*c++)
+ return FALSE;
+
+ return TRUE;
+}
+#endif
+
void
db_task_name(
task_t task)
@@ -526,7 +574,46 @@ db_task_name(
register char *p;
register int n;
unsigned vaddr, kaddr;
+ unsigned sp;
+ if (task->map->pmap == kernel_pmap) {
+ db_printf(DB_GNUMACH_TASK_NAME);
+ return;
+ }
+
+#ifdef GNU
+ /*
+ * GNU Hurd-specific heuristics.
+ */
+
+ /* Heuristical address first. */
+ vaddr = 0x1026000;
+ if (db_user_to_kernel_address(task, vaddr, &kaddr, 0) >= 0 &&
+ looks_like_command(task, (char*) kaddr))
+ goto ok;
+
+ /* Try to catch SP of the main thread. */
+ thread_t thread;
+
+ task_lock(task);
+ thread = (thread_t) queue_first(&task->thread_list);
+ if (!thread) {
+ task_unlock(task);
+ db_printf(DB_NULL_TASK_NAME);
+ return;
+ }
+ sp = thread->pcb->iss.uesp;
+ task_unlock(task);
+
+ vaddr = (sp & ~(INTEL_PGBYTES - 1)) + INTEL_PGBYTES;
+ while (1) {
+ if (db_user_to_kernel_address(task, vaddr, &kaddr, 0) < 0)
+ return FALSE;
+ if (looks_like_command(task, (char*) kaddr))
+ break;
+ vaddr += INTEL_PGBYTES;
+ }
+#else
vaddr = DB_USER_STACK_ADDR;
kaddr = 0;
@@ -544,11 +631,18 @@ db_task_name(
db_printf(DB_NULL_TASK_NAME);
return;
}
+#endif
+ok:
n = DB_TASK_NAME_LEN-1;
+#ifdef GNU
+ p = (char *)kaddr;
+ for (; n > 0; vaddr++, p++, n--) {
+#else
p = (char *)kaddr + sizeof(unsigned);
for (vaddr += sizeof(int); vaddr < DB_USER_STACK_ADDR && n > 0;
vaddr++, p++, n--) {
+#endif
if (vaddr % INTEL_PGBYTES == 0) {
(void)db_user_to_kernel_address(task, vaddr, &kaddr, 0);
p = (char*)kaddr;
diff --git a/i386/i386/db_machdep.h b/i386/i386/db_machdep.h
index cba259c0..95e37971 100644
--- a/i386/i386/db_machdep.h
+++ b/i386/i386/db_machdep.h
@@ -94,6 +94,7 @@ db_regs_t ddb_regs; /* register state */
#define DB_TASK_NAME_TITLE "COMMAND "
#define DB_TASK_NAME_LEN 23
#define DB_NULL_TASK_NAME "? "
+#define DB_GNUMACH_TASK_NAME "gnumach "
/* macro for checking if a thread has used floating-point */
diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c
index aad305c3..c4019b59 100644
--- a/i386/i386/db_trace.c
+++ b/i386/i386/db_trace.c
@@ -452,10 +452,14 @@ db_i386_stack_trace(
if (INKERNEL((unsigned)callpc) && user_frame == 0) {
db_addr_t call_func = 0;
- db_symbol_values(0, db_search_task_symbol(callpc,
- DB_STGY_XTRN, (db_addr_t *)&offset,
- TASK_NULL),
+ db_sym_t sym_tmp;
+ db_symbol_values(0,
+ sym_tmp = db_search_task_symbol(callpc,
+ DB_STGY_XTRN,
+ (db_addr_t *)&offset,
+ TASK_NULL),
&name, (db_expr_t *)&call_func);
+ db_free_symbol(sym_tmp);
if ((db_user_trap_symbol_value && call_func == db_user_trap_symbol_value) ||
(db_kernel_trap_symbol_value && call_func == db_kernel_trap_symbol_value)) {
frame_type = TRAP;
diff --git a/i386/i386/debug_i386.c b/i386/i386/debug_i386.c
index b897e631..f3e18835 100644
--- a/i386/i386/debug_i386.c
+++ b/i386/i386/debug_i386.c
@@ -29,7 +29,7 @@
void dump_ss(struct i386_saved_state *st)
{
- printf("Dump of i386_saved_state %08x:\n", st);
+ printf("Dump of i386_saved_state %p:\n", st);
printf("EAX %08x EBX %08x ECX %08x EDX %08x\n",
st->eax, st->ebx, st->ecx, st->edx);
printf("ESI %08x EDI %08x EBP %08x ESP %08x\n",
diff --git a/i386/i386/debug_trace.S b/i386/i386/debug_trace.S
index e741516f..f275e1bc 100644
--- a/i386/i386/debug_trace.S
+++ b/i386/i386/debug_trace.S
@@ -24,6 +24,7 @@
#ifdef DEBUG
#include <mach/machine/asm.h>
+#include <i386/xen.h>
#include "debug.h"
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index ad2173c5..2a4b9c09 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -23,6 +23,15 @@
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
+
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
/*
* Support for 80387 floating point or FP emulator.
*/
@@ -43,6 +52,7 @@
#include <i386/thread.h>
#include <i386/fpu.h>
#include <i386/pio.h>
+#include <i386/locore.h>
#include "cpu_number.h"
#if 0
@@ -63,6 +73,10 @@ extern void i386_exception();
int fp_kind = FP_387; /* 80387 present */
zone_t ifps_zone; /* zone for FPU save area */
+static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */
+
+void fp_save(thread_t thread);
+void fp_load(thread_t thread);
#if NCPUS == 1
volatile thread_t fp_thread = THREAD_NULL;
@@ -96,12 +110,21 @@ init_fpu()
{
unsigned short status, control;
+#ifdef MACH_HYP
+ clear_ts();
+#else /* MACH_HYP */
+ unsigned int native = 0;
+
+ if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486)
+ native = CR0_NE;
+
/*
* Check for FPU by initializing it,
* then trying to read the correct bit patterns from
* the control and status registers.
*/
- set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */
+ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | native); /* allow use of FPU */
+#endif /* MACH_HYP */
fninit();
status = fnstsw();
@@ -130,19 +153,37 @@ init_fpu()
/*
* We have a 387.
*/
- fp_kind = FP_387;
+ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
+ static /* because we _need_ alignment */
+ struct i386_xfp_save save;
+ unsigned long mask;
+ fp_kind = FP_387X;
+#ifndef MACH_HYP
+ printf("Enabling FXSR\n");
+ set_cr4(get_cr4() | CR4_OSFXSR);
+#endif /* MACH_HYP */
+ fxsave(&save);
+ mask = save.fp_mxcsr_mask;
+ if (!mask)
+ mask = 0x0000ffbf;
+ mxcsr_feature_mask &= mask;
+ } else
+ fp_kind = FP_387;
}
+#ifdef MACH_HYP
+ set_ts();
+#else /* MACH_HYP */
/*
* Trap wait instructions. Turn off FPU for now.
*/
set_cr0(get_cr0() | CR0_TS | CR0_MP);
+#endif /* MACH_HYP */
}
else {
/*
* NO FPU.
*/
- fp_kind = FP_NO;
- set_cr0(get_cr0() | CR0_EM);
+ panic("No FPU!");
}
}
@@ -152,7 +193,7 @@ init_fpu()
void
fpu_module_init()
{
- ifps_zone = zinit(sizeof(struct i386_fpsave_state), 0,
+ ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16,
THREAD_MAX * sizeof(struct i386_fpsave_state),
THREAD_CHUNK * sizeof(struct i386_fpsave_state),
0, "i386 fpsave state");
@@ -183,6 +224,73 @@ ASSERT_IPL(SPL0);
zfree(ifps_zone, (vm_offset_t) fps);
}
+/* The two following functions were stolen from Linux's i387.c */
+static inline unsigned short
+twd_i387_to_fxsr (unsigned short twd)
+{
+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+ tmp = ~twd;
+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+ /* and move the valid bits to the lower byte. */
+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+ return tmp;
+}
+
+static inline unsigned long
+twd_fxsr_to_i387 (struct i386_xfp_save *fxsave)
+{
+ struct {
+ unsigned short significand[4];
+ unsigned short exponent;
+ unsigned short padding[3];
+ } *st = NULL;
+ unsigned long tos = (fxsave->fp_status >> 11) & 7;
+ unsigned long twd = (unsigned long) fxsave->fp_tag;
+ unsigned long tag;
+ unsigned long ret = 0xffff0000u;
+ int i;
+
+#define FPREG_ADDR(f, n) ((void *)&(f)->fp_reg_word + (n) * 16);
+
+ for (i = 0 ; i < 8 ; i++) {
+ if (twd & 0x1) {
+ st = FPREG_ADDR (fxsave, (i - tos) & 7);
+
+ switch (st->exponent & 0x7fff) {
+ case 0x7fff:
+ tag = 2; /* Special */
+ break;
+ case 0x0000:
+ if (!st->significand[0] &&
+ !st->significand[1] &&
+ !st->significand[2] &&
+ !st->significand[3] ) {
+ tag = 1; /* Zero */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ default:
+ if (st->significand[3] & 0x8000) {
+ tag = 0; /* Valid */
+ } else {
+ tag = 2; /* Special */
+ }
+ break;
+ }
+ } else {
+ tag = 3; /* Empty */
+ }
+ ret |= (tag << (2 * i));
+ twd = twd >> 1;
+ }
+ return ret;
+}
+
/*
* Set the floating-point state for a thread.
* If the thread is not the current thread, it is
@@ -261,16 +369,30 @@ ASSERT_IPL(SPL0);
*/
memset(&ifps->fp_save_state, 0, sizeof(struct i386_fp_save));
- ifps->fp_save_state.fp_control = user_fp_state->fp_control;
- ifps->fp_save_state.fp_status = user_fp_state->fp_status;
- ifps->fp_save_state.fp_tag = user_fp_state->fp_tag;
- ifps->fp_save_state.fp_eip = user_fp_state->fp_eip;
- ifps->fp_save_state.fp_cs = user_fp_state->fp_cs;
- ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode;
- ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
- ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
- ifps->fp_regs = *user_fp_regs;
- ifps->fp_valid = TRUE;
+ if (fp_kind == FP_387X) {
+ int i;
+
+ ifps->xfp_save_state.fp_control = user_fp_state->fp_control;
+ ifps->xfp_save_state.fp_status = user_fp_state->fp_status;
+ ifps->xfp_save_state.fp_tag = twd_i387_to_fxsr(user_fp_state->fp_tag);
+ ifps->xfp_save_state.fp_eip = user_fp_state->fp_eip;
+ ifps->xfp_save_state.fp_cs = user_fp_state->fp_cs;
+ ifps->xfp_save_state.fp_opcode = user_fp_state->fp_opcode;
+ ifps->xfp_save_state.fp_dp = user_fp_state->fp_dp;
+ ifps->xfp_save_state.fp_ds = user_fp_state->fp_ds;
+ for (i=0; i<8; i++)
+ memcpy(&ifps->xfp_save_state.fp_reg_word[i], &user_fp_regs[i], sizeof(user_fp_regs[i]));
+ } else {
+ ifps->fp_save_state.fp_control = user_fp_state->fp_control;
+ ifps->fp_save_state.fp_status = user_fp_state->fp_status;
+ ifps->fp_save_state.fp_tag = user_fp_state->fp_tag;
+ ifps->fp_save_state.fp_eip = user_fp_state->fp_eip;
+ ifps->fp_save_state.fp_cs = user_fp_state->fp_cs;
+ ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode;
+ ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
+ ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
+ ifps->fp_regs = *user_fp_regs;
+ }
simple_unlock(&pcb->lock);
if (new_ifps != 0)
@@ -340,15 +462,30 @@ ASSERT_IPL(SPL0);
*/
memset(user_fp_state, 0, sizeof(struct i386_fp_save));
- user_fp_state->fp_control = ifps->fp_save_state.fp_control;
- user_fp_state->fp_status = ifps->fp_save_state.fp_status;
- user_fp_state->fp_tag = ifps->fp_save_state.fp_tag;
- user_fp_state->fp_eip = ifps->fp_save_state.fp_eip;
- user_fp_state->fp_cs = ifps->fp_save_state.fp_cs;
- user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode;
- user_fp_state->fp_dp = ifps->fp_save_state.fp_dp;
- user_fp_state->fp_ds = ifps->fp_save_state.fp_ds;
- *user_fp_regs = ifps->fp_regs;
+ if (fp_kind == FP_387X) {
+ int i;
+
+ user_fp_state->fp_control = ifps->xfp_save_state.fp_control;
+ user_fp_state->fp_status = ifps->xfp_save_state.fp_status;
+ user_fp_state->fp_tag = twd_fxsr_to_i387(&ifps->xfp_save_state);
+ user_fp_state->fp_eip = ifps->xfp_save_state.fp_eip;
+ user_fp_state->fp_cs = ifps->xfp_save_state.fp_cs;
+ user_fp_state->fp_opcode = ifps->xfp_save_state.fp_opcode;
+ user_fp_state->fp_dp = ifps->xfp_save_state.fp_dp;
+ user_fp_state->fp_ds = ifps->xfp_save_state.fp_ds;
+ for (i=0; i<8; i++)
+ memcpy(&user_fp_regs[i], &ifps->xfp_save_state.fp_reg_word[i], sizeof(user_fp_regs[i]));
+ } else {
+ user_fp_state->fp_control = ifps->fp_save_state.fp_control;
+ user_fp_state->fp_status = ifps->fp_save_state.fp_status;
+ user_fp_state->fp_tag = ifps->fp_save_state.fp_tag;
+ user_fp_state->fp_eip = ifps->fp_save_state.fp_eip;
+ user_fp_state->fp_cs = ifps->fp_save_state.fp_cs;
+ user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode;
+ user_fp_state->fp_dp = ifps->fp_save_state.fp_dp;
+ user_fp_state->fp_ds = ifps->fp_save_state.fp_ds;
+ *user_fp_regs = ifps->fp_regs;
+ }
}
simple_unlock(&pcb->lock);
@@ -481,11 +618,80 @@ fpextovrflt()
/*NOTREACHED*/
}
+static int
+fphandleerr()
+{
+ register thread_t thread = current_thread();
+
+ /*
+ * Save the FPU context to the thread using it.
+ */
+#if NCPUS == 1
+ if (fp_thread == THREAD_NULL) {
+ printf("fphandleerr: FPU not belonging to anyone!\n");
+ clear_ts();
+ fninit();
+ clear_fpu();
+ return 1;
+ }
+
+ if (fp_thread != thread) {
+ /*
+ * FPU exception is for a different thread.
+ * When that thread again uses the FPU an exception will be
+ * raised in fp_load. Remember the condition in fp_valid (== 2).
+ */
+ clear_ts();
+ fp_save(fp_thread);
+ fp_thread->pcb->ims.ifps->fp_valid = 2;
+ fninit();
+ clear_fpu();
+ /* leave fp_intr_thread THREAD_NULL */
+ return 1;
+ }
+#endif /* NCPUS == 1 */
+
+ /*
+ * Save the FPU state and turn off the FPU.
+ */
+ clear_ts();
+ fp_save(thread);
+ fninit();
+ clear_fpu();
+
+ return 0;
+}
+
+/*
+ * FPU error. Called by exception handler.
+ */
+void
+fpexterrflt()
+{
+ register thread_t thread = current_thread();
+
+ if (fphandleerr())
+ return;
+
+ /*
+ * Raise FPU exception.
+ * Locking not needed on pcb->ims.ifps,
+ * since thread is running.
+ */
+ i386_exception(EXC_ARITHMETIC,
+ EXC_I386_EXTERR,
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
+ /*NOTREACHED*/
+}
+
+#ifndef MACH_XEN
/*
* FPU error. Called by AST.
*/
void
-fpexterrflt()
+fpastintr()
{
register thread_t thread = current_thread();
@@ -532,9 +738,12 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thread->pcb->ims.ifps->fp_save_state.fp_status);
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
}
+#endif /* MACH_XEN */
/*
* Save FPU state.
@@ -554,7 +763,10 @@ fp_save(thread)
if (ifps != 0 && !ifps->fp_valid) {
/* registers are in FPU */
ifps->fp_valid = TRUE;
- fnsave(&ifps->fp_save_state);
+ if (fp_kind == FP_387X)
+ fxsave(&ifps->xfp_save_state);
+ else
+ fnsave(&ifps->fp_save_state);
}
}
@@ -595,14 +807,19 @@ ASSERT_IPL(SPL0);
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thread->pcb->ims.ifps->fp_save_state.fp_status);
+ fp_kind == FP_387X ?
+ thread->pcb->ims.ifps->xfp_save_state.fp_status :
+ thread->pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
#endif
} else if (! ifps->fp_valid) {
printf("fp_load: invalid FPU state!\n");
fninit ();
} else {
- frstor(ifps->fp_save_state);
+ if (fp_kind == FP_387X)
+ fxrstor(ifps->xfp_save_state);
+ else
+ frstor(ifps->fp_save_state);
}
ifps->fp_valid = FALSE; /* in FPU */
}
@@ -624,14 +841,25 @@ fp_state_alloc()
pcb->ims.ifps = ifps;
ifps->fp_valid = TRUE;
- ifps->fp_save_state.fp_control = (0x037f
- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
- | (FPC_PC_53|FPC_IC_AFF);
- ifps->fp_save_state.fp_status = 0;
- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
+
+ if (fp_kind == FP_387X) {
+ ifps->xfp_save_state.fp_control = (0x037f
+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
+ | (FPC_PC_53|FPC_IC_AFF);
+ ifps->xfp_save_state.fp_status = 0;
+ ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */
+ if (CPU_HAS_FEATURE(CPU_FEATURE_SSE))
+ ifps->xfp_save_state.fp_mxcsr = 0x1f80;
+ } else {
+ ifps->fp_save_state.fp_control = (0x037f
+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
+ | (FPC_PC_53|FPC_IC_AFF);
+ ifps->fp_save_state.fp_status = 0;
+ ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
+ }
}
-#if AT386
+#if AT386 && !defined(MACH_XEN)
/*
* Handle a coprocessor error interrupt on the AT386.
* This comes in on line 5 of the slave PIC at SPL1.
@@ -648,42 +876,15 @@ ASSERT_IPL(SPL1);
*/
outb(0xf0, 0);
- /*
- * Save the FPU context to the thread using it.
- */
-#if NCPUS == 1
- if (fp_thread == THREAD_NULL) {
- printf("fpintr: FPU not belonging to anyone!\n");
- clear_ts();
- fninit();
- clear_fpu();
+ if (fphandleerr())
return;
- }
- if (fp_thread != thread) {
- /*
- * FPU exception is for a different thread.
- * When that thread again uses the FPU an exception will be
- * raised in fp_load. Remember the condition in fp_valid (== 2).
- */
- clear_ts();
- fp_save(fp_thread);
- fp_thread->pcb->ims.ifps->fp_valid = 2;
- fninit();
- clear_fpu();
- /* leave fp_intr_thread THREAD_NULL */
- return;
- }
- if (fp_intr_thread != THREAD_NULL)
+#if NCPUS == 1
+ if (fp_intr_thread != THREAD_NULL && fp_intr_thread != thread)
panic("fp_intr: already caught intr");
fp_intr_thread = thread;
#endif /* NCPUS == 1 */
- clear_ts();
- fp_save(thread);
- fninit();
- clear_fpu();
-
/*
* Since we are running on the interrupt stack, we must
* signal the thread to take the exception when we return
diff --git a/i386/i386/fpu.h b/i386/i386/fpu.h
index 3e92de73..1a1b61f6 100644
--- a/i386/i386/fpu.h
+++ b/i386/i386/fpu.h
@@ -32,8 +32,9 @@
* floating-point processor.
*/
+#include <sys/types.h>
#include <i386/proc_reg.h>
-#include <i386/thread.h>
+#include <kern/thread.h>
/*
* FPU instructions.
@@ -66,6 +67,12 @@
#define frstor(state) \
asm volatile("frstor %0" : : "m" (state))
+#define fxsave(state) \
+ asm volatile("fxsave %0" : "=m" (*state))
+
+#define fxrstor(state) \
+ asm volatile("fxrstor %0" : : "m" (state))
+
#define fwait() \
asm("fwait");
@@ -85,7 +92,10 @@
if (ifps != 0 && !ifps->fp_valid) { \
/* registers are in FPU - save to memory */ \
ifps->fp_valid = TRUE; \
- fnsave(&ifps->fp_save_state); \
+ if (fp_kind == FP_387X) \
+ fxsave(&ifps->xfp_save_state); \
+ else \
+ fnsave(&ifps->fp_save_state); \
set_ts(); \
} \
}
@@ -99,5 +109,20 @@
#endif /* NCPUS == 1 */
extern int fp_kind;
+extern void fp_save(thread_t thread);
+extern void fp_load(thread_t thread);
+extern void fp_free(struct i386_fpsave_state *fps);
+extern void fpu_module_init(void);
+extern kern_return_t fpu_set_state(
+ thread_t thread,
+ struct i386_float_state *state);
+extern kern_return_t fpu_get_state(
+ thread_t thread,
+ struct i386_float_state *state);
+extern void fpnoextflt(void);
+extern void fpextovrflt(void);
+extern void fpexterrflt(void);
+extern void fpastintr(void);
+extern void init_fpu(void);
#endif /* _I386_FPU_H_ */
diff --git a/i386/i386/gdt.c b/i386/i386/gdt.c
index 9438ee1a..f26a50c9 100644
--- a/i386/i386/gdt.c
+++ b/i386/i386/gdt.c
@@ -31,11 +31,18 @@
* Global descriptor table.
*/
#include <mach/machine/vm_types.h>
+#include <mach/xen.h>
+
+#include <intel/pmap.h>
#include "vm_param.h"
#include "seg.h"
#include "gdt.h"
+#ifdef MACH_XEN
+/* It is actually defined in xen_boothdr.S */
+extern
+#endif /* MACH_XEN */
struct real_descriptor gdt[GDTSZ];
void
@@ -43,14 +50,32 @@ gdt_init()
{
/* Initialize the kernel code and data segment descriptors. */
fill_gdt_descriptor(KERNEL_CS,
- LINEAR_MIN_KERNEL_ADDRESS,
- LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS - 1,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
ACC_PL_K|ACC_CODE_R, SZ_32);
fill_gdt_descriptor(KERNEL_DS,
- LINEAR_MIN_KERNEL_ADDRESS,
- LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS - 1,
+ LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ LINEAR_MAX_KERNEL_ADDRESS - (LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) - 1,
+ ACC_PL_K|ACC_DATA_W, SZ_32);
+#ifndef MACH_HYP
+ fill_gdt_descriptor(LINEAR_DS,
+ 0,
+ 0xffffffff,
ACC_PL_K|ACC_DATA_W, SZ_32);
+#endif /* MACH_HYP */
+#ifdef MACH_XEN
+ unsigned long frame = kv_to_mfn(gdt);
+ pmap_set_page_readonly(gdt);
+ if (hyp_set_gdt(kv_to_la(&frame), GDTSZ))
+ panic("couldn't set gdt\n");
+ if (hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments))
+ panic("couldn't set 4gb segments vm assist");
+#if 0
+ if (hyp_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify))
+ panic("couldn't set 4gb segments vm assist notify");
+#endif
+#else /* MACH_XEN */
/* Load the new GDT. */
{
struct pseudo_descriptor pdesc;
@@ -59,6 +84,7 @@ gdt_init()
pdesc.linear_base = kvtolin(&gdt);
lgdt(&pdesc);
}
+#endif /* MACH_XEN */
/* Reload all the segment registers from the new GDT.
We must load ds and es with 0 before loading them with KERNEL_DS
@@ -75,5 +101,14 @@ gdt_init()
"movw %w1,%%es\n"
"movw %w1,%%ss\n"
: : "i" (KERNEL_CS), "r" (KERNEL_DS), "r" (0));
+#ifdef MACH_XEN
+#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+ /* things now get shifted */
+#ifdef MACH_PSEUDO_PHYS
+ pfn_list = (void*) pfn_list + VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS;
+#endif /* MACH_PSEUDO_PHYS */
+ la_shift += LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
+#endif
+#endif /* MACH_XEN */
}
diff --git a/i386/i386/gdt.h b/i386/i386/gdt.h
index 558bd15f..41ace791 100644
--- a/i386/i386/gdt.h
+++ b/i386/i386/gdt.h
@@ -38,14 +38,18 @@
/*
* Kernel descriptors for Mach - 32-bit flat address space.
*/
-#define KERNEL_CS 0x08 /* kernel code */
-#define KERNEL_DS 0x10 /* kernel data */
+#define KERNEL_CS (0x08 | KERNEL_RING) /* kernel code */
+#define KERNEL_DS (0x10 | KERNEL_RING) /* kernel data */
+#ifndef MACH_XEN
#define KERNEL_LDT 0x18 /* master LDT */
+#endif /* MACH_XEN */
#define KERNEL_TSS 0x20 /* master TSS (uniprocessor) */
#define USER_LDT 0x28 /* place for per-thread LDT */
#define USER_TSS 0x30 /* place for per-thread TSS
that holds IO bitmap */
-/* 0x38 was FPE_CS, now free */
+#ifndef MACH_HYP
+#define LINEAR_DS 0x38 /* linear mapping */
+#endif /* MACH_HYP */
/* 0x40 was USER_FPREGS, now free */
#define USER_GDT 0x48 /* user-defined GDT entries */
@@ -59,4 +63,6 @@ extern struct real_descriptor gdt[GDTSZ];
#define fill_gdt_descriptor(segment, base, limit, access, sizebits) \
fill_descriptor(&gdt[segment/8], base, limit, access, sizebits)
+extern void gdt_init(void);
+
#endif /* _I386_GDT_ */
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 8cfb2c09..b1670e8b 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -45,6 +45,7 @@
#include <i386/gdt.h>
#include <i386/ldt.h>
#include <i386/mp_desc.h>
+#include <i386/xen.h>
offset thread th pcb
@@ -90,11 +91,20 @@ expr VM_MIN_ADDRESS
expr VM_MAX_ADDRESS
expr VM_MIN_KERNEL_ADDRESS KERNELBASE
expr KERNEL_STACK_SIZE
+#if VM_MIN_KERNEL_ADDRESS == LINEAR_MIN_KERNEL_ADDRESS
+expr PFN_LIST pfn_list
+#endif
+#if PAE
+expr PDPSHIFT
+#endif /* PAE */
expr PDESHIFT
+expr PDEMASK
expr PTESHIFT
expr PTEMASK
+expr sizeof(pt_entry_t) PTE_SIZE
+
expr INTEL_PTE_PFN PTE_PFN
expr INTEL_PTE_VALID PTE_V
expr INTEL_PTE_WRITE PTE_W
@@ -106,10 +116,14 @@ expr IDTSZ
expr GDTSZ
expr LDTSZ
+expr KERNEL_RING
+
expr KERNEL_CS
expr KERNEL_DS
expr KERNEL_TSS
+#ifndef MACH_XEN
expr KERNEL_LDT
+#endif /* MACH_XEN */
expr (VM_MIN_KERNEL_ADDRESS>>PDESHIFT)*sizeof(pt_entry_t) KERNELBASEPDE
@@ -127,3 +141,12 @@ expr TIMER_HIGH_UNIT
offset thread th system_timer
offset thread th user_timer
#endif
+
+#ifdef MACH_XEN
+offset shared_info si vcpu_info[0].evtchn_upcall_mask CPU_CLI
+offset shared_info si vcpu_info[0].evtchn_upcall_pending CPU_PENDING
+offset shared_info si vcpu_info[0].evtchn_pending_sel CPU_PENDING_SEL
+offset shared_info si evtchn_pending PENDING
+offset shared_info si evtchn_mask EVTMASK
+offset shared_info si vcpu_info[0].arch.cr2 CR2
+#endif /* MACH_XEN */
diff --git a/i386/i386/idt.c b/i386/i386/idt.c
index 1a8f9177..b5e3d080 100644
--- a/i386/i386/idt.c
+++ b/i386/i386/idt.c
@@ -38,6 +38,10 @@ extern struct idt_init_entry idt_inittab[];
void idt_init()
{
+#ifdef MACH_HYP
+ if (hyp_set_trap_table(kvtolin(idt_inittab)))
+ panic("couldn't set trap table\n");
+#else /* MACH_HYP */
struct idt_init_entry *iie = idt_inittab;
/* Initialize the exception vectors from the idt_inittab. */
@@ -55,5 +59,6 @@ void idt_init()
pdesc.linear_base = kvtolin(&idt);
lidt(&pdesc);
}
+#endif /* MACH_HYP */
}
diff --git a/i386/i386/idt_inittab.S b/i386/i386/idt_inittab.S
index 77185681..63e554bd 100644
--- a/i386/i386/idt_inittab.S
+++ b/i386/i386/idt_inittab.S
@@ -25,7 +25,8 @@
*/
#include <mach/machine/asm.h>
-#include "seg.h"
+#include <i386/seg.h>
+#include <i386/i386asm.h>
/* We'll be using macros to fill in a table in data hunk 2
@@ -38,12 +39,22 @@ ENTRY(idt_inittab)
/*
* Interrupt descriptor table and code vectors for it.
*/
+#ifdef MACH_XEN
+#define IDT_ENTRY(n,entry,type) \
+ .data 2 ;\
+ .byte n ;\
+ .byte (((type)&ACC_PL)>>5)|((((type)&(ACC_TYPE|ACC_A))==ACC_INTR_GATE)<<2) ;\
+ .word KERNEL_CS ;\
+ .long entry ;\
+ .text
+#else /* MACH_XEN */
#define IDT_ENTRY(n,entry,type) \
.data 2 ;\
.long entry ;\
.word n ;\
.word type ;\
.text
+#endif /* MACH_XEN */
/*
* No error code. Clear error code and push trap number.
@@ -97,7 +108,11 @@ EXCEP_SPC(0x0b,t_segnp)
EXCEP_ERR(0x0c,t_stack_fault)
EXCEP_SPC(0x0d,t_gen_prot)
EXCEP_SPC(0x0e,t_page_fault)
+#ifdef MACH_XEN
+EXCEP_ERR(0x0f,t_trap_0f)
+#else
EXCEPTION(0x0f,t_trap_0f)
+#endif
EXCEPTION(0x10,t_fpu_err)
EXCEPTION(0x11,t_trap_11)
EXCEPTION(0x12,t_trap_12)
@@ -118,4 +133,7 @@ EXCEPTION(0x1f,t_trap_1f)
/* Terminator */
.data 2
.long 0
+#ifdef MACH_XEN
+ .long 0
+#endif /* MACH_XEN */
diff --git a/i386/i386/io_map.c b/i386/i386/io_map.c
index 7ed0e263..5b77552d 100644
--- a/i386/i386/io_map.c
+++ b/i386/i386/io_map.c
@@ -26,6 +26,7 @@
#include <kern/printf.h>
#include <mach/vm_param.h>
+#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
diff --git a/i386/i386/ipl.h b/i386/i386/ipl.h
index 86ed660a..557cd8df 100644
--- a/i386/i386/ipl.h
+++ b/i386/i386/ipl.h
@@ -70,7 +70,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#ifdef KERNEL
#ifndef __ASSEMBLER__
#include <machine/machspl.h>
-extern int (*ivect[])();
+extern void (*ivect[])();
extern int iunit[];
extern int intpri[];
#endif /* __ASSEMBLER__ */
diff --git a/i386/i386/ktss.c b/i386/i386/ktss.c
index 03d9a04c..66432f3e 100644
--- a/i386/i386/ktss.c
+++ b/i386/i386/ktss.c
@@ -45,6 +45,12 @@ ktss_init()
/* XXX temporary exception stack */
static int exception_stack[1024];
+#ifdef MACH_XEN
+ /* Xen won't allow us to do any I/O by default anyway, just register
+ * exception stack */
+ if (hyp_stack_switch(KERNEL_DS, (unsigned)(exception_stack+1024)))
+ panic("couldn't register exception stack\n");
+#else /* MACH_XEN */
/* Initialize the master TSS descriptor. */
fill_gdt_descriptor(KERNEL_TSS,
kvtolin(&ktss), sizeof(struct task_tss) - 1,
@@ -59,5 +65,6 @@ ktss_init()
/* Load the TSS. */
ltr(KERNEL_TSS);
+#endif /* MACH_XEN */
}
diff --git a/i386/i386/ktss.h b/i386/i386/ktss.h
index 3522e866..304a877a 100644
--- a/i386/i386/ktss.h
+++ b/i386/i386/ktss.h
@@ -27,4 +27,6 @@
extern struct task_tss ktss;
+extern void ktss_init(void);
+
#endif /* _I386_KTSS_ */
diff --git a/i386/i386/kttd_interface.c b/i386/i386/kttd_interface.c
index 9097061b..b9e0624b 100644
--- a/i386/i386/kttd_interface.c
+++ b/i386/i386/kttd_interface.c
@@ -395,7 +395,7 @@ boolean_t kttd_trap(int type, int code, struct i386_saved_state *regs)
kttd_regs = *regs;
- if ((regs->cs & 0x3) == 0) {
+ if ((regs->cs & 0x3) == KERNEL_RING) {
/*
* Kernel mode - esp and ss not saved
*/
@@ -442,7 +442,7 @@ boolean_t kttd_trap(int type, int code, struct i386_saved_state *regs)
regs->ecx = kttd_regs.ecx;
regs->edx = kttd_regs.edx;
regs->ebx = kttd_regs.ebx;
- if (regs->cs & 0x3) {
+ if ((regs->cs & 0x3) != KERNEL_RING) {
/*
* user mode - saved esp and ss valid
*/
@@ -513,7 +513,7 @@ kttd_netentry(int_regs)
if (kttd_debug)
printf("kttd_NETENTRY after slphigh()\n");
- if (is->cs & 0x3) {
+ if ((is->cs & 0x3) != KERNEL_RING) {
/*
* Interrupted from User Space
*/
@@ -546,7 +546,7 @@ kttd_netentry(int_regs)
kttd_task_trap(-1, 0, (kttd_regs.cs & 0x3) != 0);
kttd_active--;
- if (kttd_regs.cs & 0x3) {
+ if ((kttd_regs.cs & 0x3) != KERNEL_RING) {
((int *)(is+1))[0] = kttd_regs.uesp;
((int *)(is+1))[1] = kttd_regs.ss & 0xffff;
}
diff --git a/i386/i386/ldt.c b/i386/i386/ldt.c
index 7299377e..0ef7a8c4 100644
--- a/i386/i386/ldt.c
+++ b/i386/i386/ldt.c
@@ -28,6 +28,9 @@
* same LDT.
*/
#include <mach/machine/vm_types.h>
+#include <mach/xen.h>
+
+#include <intel/pmap.h>
#include "vm_param.h"
#include "seg.h"
@@ -36,15 +39,23 @@
extern int syscall();
+#ifdef MACH_XEN
+/* It is actually defined in xen_boothdr.S */
+extern
+#endif /* MACH_XEN */
struct real_descriptor ldt[LDTSZ];
void
ldt_init()
{
+#ifdef MACH_XEN
+ pmap_set_page_readwrite(ldt);
+#else /* MACH_XEN */
/* Initialize the master LDT descriptor in the GDT. */
fill_gdt_descriptor(KERNEL_LDT,
kvtolin(&ldt), sizeof(ldt)-1,
ACC_PL_K|ACC_LDT, 0);
+#endif /* MACH_XEN */
/* Initialize the LDT descriptors. */
fill_ldt_gate(USER_SCALL,
@@ -61,5 +72,9 @@ ldt_init()
ACC_PL_U|ACC_DATA_W, SZ_32);
/* Activate the LDT. */
+#ifdef MACH_HYP
+ hyp_set_ldt(&ldt, LDTSZ);
+#else /* MACH_HYP */
lldt(KERNEL_LDT);
+#endif /* MACH_HYP */
}
diff --git a/i386/i386/ldt.h b/i386/i386/ldt.h
index 3c38109e..f196c74b 100644
--- a/i386/i386/ldt.h
+++ b/i386/i386/ldt.h
@@ -61,6 +61,8 @@ extern struct real_descriptor ldt[LDTSZ];
fill_gate((struct real_gate*)&ldt[selector/8], \
offset, dest_selector, access, word_count)
+void ldt_init(void);
+
#endif /* !__ASSEMBLER__ */
#endif /* _I386_LDT_ */
diff --git a/i386/i386/lock.h b/i386/i386/lock.h
index 2b5c4aa6..38a66c87 100644
--- a/i386/i386/lock.h
+++ b/i386/i386/lock.h
@@ -47,7 +47,7 @@
({ register int _old_val_; \
asm volatile("xchgl %0, %2" \
: "=r" (_old_val_) \
- : "0" (new_val), "m" (*(lock)) \
+ : "0" (new_val), "m" (*(lock) : "memory") \
); \
_old_val_; \
})
@@ -81,7 +81,7 @@
btsl %0, %1 \n\
jb 0b" \
: \
- : "r" (bit), "m" (*(volatile int *)(l))); \
+ : "r" (bit), "m" (*(volatile int *)(l)) : "memory"); \
0; \
})
@@ -90,7 +90,7 @@
asm volatile(" lock \n\
btrl %0, %1" \
: \
- : "r" (bit), "m" (*(volatile int *)(l))); \
+ : "r" (bit), "m" (*(volatile int *)(l)) : "memory"); \
0; \
})
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index 97af53fd..847aa985 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -36,6 +36,7 @@
#include <i386/ldt.h>
#include <i386/i386asm.h>
#include <i386/cpu_number.h>
+#include <i386/xen.h>
/*
* Fault recovery.
@@ -192,7 +193,7 @@ LEXT(retry_table_end) ;\
addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\
leal CX(0,%edx),%ecx /* timer is 16 bytes */ ;\
lea CX(EXT(kernel_timer),%edx),%ecx /* get interrupt timer*/;\
- movl %ecx,CX(EXT(current_timer),%edx) /* set timer
+ movl %ecx,CX(EXT(current_timer),%edx) /* set timer */
/*
* update time on interrupt exit.
@@ -323,8 +324,9 @@ ENTRY(t_segnp)
trap_check_kernel_exit:
testl $(EFL_VM),16(%esp) /* is trap from V86 mode? */
jnz EXT(alltraps) /* isn`t kernel trap if so */
- testl $3,12(%esp) /* is trap from kernel mode? */
- jne EXT(alltraps) /* if so: */
+ /* Note: handling KERNEL_RING value by hand */
+ testl $2,12(%esp) /* is trap from kernel mode? */
+ jnz EXT(alltraps) /* if so: */
/* check for the kernel exit sequence */
cmpl $_kret_iret,8(%esp) /* on IRET? */
je fault_iret
@@ -410,7 +412,8 @@ push_segregs:
ENTRY(t_debug)
testl $(EFL_VM),8(%esp) /* is trap from V86 mode? */
jnz 0f /* isn`t kernel trap if so */
- testl $3,4(%esp) /* is trap from kernel mode? */
+ /* Note: handling KERNEL_RING value by hand */
+ testl $2,4(%esp) /* is trap from kernel mode? */
jnz 0f /* if so: */
cmpl $syscall_entry,(%esp) /* system call entry? */
jne 0f /* if so: */
@@ -439,7 +442,11 @@ ENTRY(t_debug)
ENTRY(t_page_fault)
pushl $(T_PAGE_FAULT) /* mark a page fault trap */
pusha /* save the general registers */
+#ifdef MACH_XEN
+ movl %ss:hyp_shared_info+CR2,%eax
+#else /* MACH_XEN */
movl %cr2,%eax /* get the faulting address */
+#endif /* MACH_XEN */
movl %eax,12(%esp) /* save in esp save slot */
jmp trap_push_segs /* continue fault */
@@ -475,7 +482,8 @@ trap_set_segs:
cld /* clear direction flag */
testl $(EFL_VM),R_EFLAGS(%esp) /* in V86 mode? */
jnz trap_from_user /* user mode trap if so */
- testb $3,R_CS(%esp) /* user mode trap? */
+ /* Note: handling KERNEL_RING value by hand */
+ testb $2,R_CS(%esp) /* user mode trap? */
jz trap_from_kernel /* kernel trap if not */
trap_from_user:
@@ -690,7 +698,8 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */
testl $(EFL_VM),I_EFL(%esp) /* if in V86 */
jnz 0f /* or */
- testb $3,I_CS(%esp) /* user mode, */
+ /* Note: handling KERNEL_RING value by hand */
+ testb $2,I_CS(%esp) /* user mode, */
jz 1f /* check for ASTs */
0:
cmpl $0,CX(EXT(need_ast),%edx)
@@ -767,8 +776,10 @@ ast_from_interrupt:
*
* frame-> saved %ebp
* return address in interrupt handler
+ * #ifndef MACH_XEN
* iunit
* saved SPL
+ * #endif
* return address == return_to_iret_i
* saved %edx
* saved %ecx
@@ -780,8 +791,10 @@ ast_from_interrupt:
* OR:
* frame-> saved %ebp
* return address in interrupt handler
+ * #ifndef MACH_XEN
* iunit
* saved SPL
+ * #endif
* return address == return_to_iret
* pointer to save area on old stack
* [ saved %ebx, if accurate timing ]
@@ -800,7 +813,11 @@ ast_from_interrupt:
* Call kdb, passing it that register save area.
*/
+#ifdef MACH_XEN
+#define RET_OFFSET 8
+#else /* MACH_XEN */
#define RET_OFFSET 16
+#endif /* MACH_XEN */
ENTRY(kdb_kintr)
movl %ebp,%eax /* save caller`s frame pointer */
@@ -977,6 +994,8 @@ ENTRY(syscall)
syscall_entry:
pushf /* save flags as soon as possible */
syscall_entry_2:
+ cld /* clear direction flag */
+
pushl %eax /* save system call number */
pushl $0 /* clear trap number slot */
@@ -1165,9 +1184,14 @@ ENTRY(discover_x86_cpu_type)
movl %esp,%ebp /* Save stack pointer */
and $~0x3,%esp /* Align stack pointer */
+#ifdef MACH_HYP
+#warning Assuming not Cyrix CPU
+#else /* MACH_HYP */
inb $0xe8,%al /* Enable ID flag for Cyrix CPU ... */
andb $0x80,%al /* ... in CCR4 reg bit7 */
outb %al,$0xe8
+#endif /* MACH_HYP */
+
pushfl /* Fetch flags ... */
popl %eax /* ... into eax */
movl %eax,%ecx /* Save original flags for return */
@@ -1275,18 +1299,46 @@ Entry(copyoutmsg)
* XXX only have to do this on 386's.
*/
copyout_retry:
+#ifdef MACH_HYP
+ movl cr3,%ecx /* point to page directory */
+#else /* MACH_HYP */
movl %cr3,%ecx /* point to page directory */
+#endif /* MACH_HYP */
+#if PAE
+ movl %edi,%eax /* get page directory pointer bits */
+ shrl $(PDPSHIFT),%eax /* from user address */
+ movl KERNELBASE(%ecx,%eax,PTE_SIZE),%ecx
+ /* get page directory pointer */
+#ifdef MACH_PSEUDO_PHYS
+ shrl $(PTESHIFT),%ecx
+ movl pfn_list,%eax
+ movl (%eax,%ecx,4),%ecx /* mfn_to_pfn */
+ shll $(PTESHIFT),%ecx
+#else /* MACH_PSEUDO_PHYS */
+ andl $(PTE_PFN),%ecx /* isolate page frame address */
+#endif /* MACH_PSEUDO_PHYS */
+#endif /* PAE */
movl %edi,%eax /* get page directory bits */
shrl $(PDESHIFT),%eax /* from user address */
- movl KERNELBASE(%ecx,%eax,4),%ecx
+#if PAE
+ andl $(PDEMASK),%eax
+#endif /* PAE */
+ movl KERNELBASE(%ecx,%eax,PTE_SIZE),%ecx
/* get page directory pointer */
testl $(PTE_V),%ecx /* present? */
jz 0f /* if not, fault is OK */
+#ifdef MACH_PSEUDO_PHYS
+ shrl $(PTESHIFT),%ecx
+ movl pfn_list,%eax
+ movl (%eax,%ecx,4),%ecx /* mfn_to_pfn */
+ shll $(PTESHIFT),%ecx
+#else /* MACH_PSEUDO_PHYS */
andl $(PTE_PFN),%ecx /* isolate page frame address */
+#endif /* MACH_PSEUDO_PHYS */
movl %edi,%eax /* get page table bits */
shrl $(PTESHIFT),%eax
andl $(PTEMASK),%eax /* from user address */
- leal KERNELBASE(%ecx,%eax,4),%ecx
+ leal KERNELBASE(%ecx,%eax,PTE_SIZE),%ecx
/* point to page table entry */
movl (%ecx),%eax /* get it */
testl $(PTE_V),%eax /* present? */
@@ -1296,9 +1348,17 @@ copyout_retry:
/*
* Not writable - must fake a fault. Turn off access to the page.
*/
+#ifdef MACH_HYP
+ pushl %edx
+ pushl %ecx
+ call hyp_invalidate_pte
+ popl %ecx
+ popl %edx
+#else /* MACH_HYP */
andl $(PTE_INVALID),(%ecx) /* turn off valid bit */
movl %cr3,%eax /* invalidate TLB */
movl %eax,%cr3
+#endif /* MACH_HYP */
0:
/*
diff --git a/i386/i386/locore.h b/i386/i386/locore.h
index b873e6f1..57014304 100644
--- a/i386/i386/locore.h
+++ b/i386/i386/locore.h
@@ -26,11 +26,11 @@
extern int copyin (const void *userbuf, void *kernelbuf, size_t cn);
-extern int copyinmsg (vm_offset_t userbuf, vm_offset_t kernelbuf, size_t cn);
+extern int copyinmsg (const void *userbuf, void *kernelbuf, size_t cn);
extern int copyout (const void *kernelbuf, void *userbuf, size_t cn);
-extern int copyoutmsg (vm_offset_t kernelbuf, vm_offset_t userbuf, size_t cn);
+extern int copyoutmsg (const void *kernelbuf, void *userbuf, size_t cn);
extern int call_continuation (continuation_t continuation);
diff --git a/i386/i386/loose_ends.c b/i386/i386/loose_ends.c
index c7d54c37..d3108fdb 100644
--- a/i386/i386/loose_ends.c
+++ b/i386/i386/loose_ends.c
@@ -40,9 +40,10 @@ int boothowto = 0;
/* Someone with time should write code to set cpuspeed automagically */
int cpuspeed = 4;
-#define DELAY(n) { register int N = cpuspeed * (n); while (--N > 0); }
+#define DELAY(n) { volatile int N = cpuspeed * (n); while (--N > 0); }
void
delay(n)
+ int n;
{
DELAY(n);
}
diff --git a/i386/i386/loose_ends.h b/i386/i386/loose_ends.h
new file mode 100644
index 00000000..c0855279
--- /dev/null
+++ b/i386/i386/loose_ends.h
@@ -0,0 +1,33 @@
+/*
+ * Other useful functions?
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Other useful functions?
+ *
+ */
+
+#ifndef _LOOSE_ENDS_H_
+#define _LOOSE_ENDS_H_
+
+#include <mach/std_types.h>
+
+extern void delay (int n);
+
+#endif /* _LOOSE_ENDS_H_ */
diff --git a/i386/i386/model_dep.h b/i386/i386/model_dep.h
new file mode 100644
index 00000000..a41c474d
--- /dev/null
+++ b/i386/i386/model_dep.h
@@ -0,0 +1,60 @@
+/*
+ * Arch dependent functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Arch dependent functions.
+ *
+ */
+
+#ifndef _I386AT_MODEL_DEP_H_
+#define _I386AT_MODEL_DEP_H_
+
+#include <mach/std_types.h>
+
+/*
+ * Find devices. The system is alive.
+ */
+extern void machine_init (void);
+
+/* Conserve power on processor CPU. */
+extern void machine_idle (int cpu);
+
+/*
+ * Halt a cpu.
+ */
+extern void halt_cpu (void) __attribute__ ((noreturn));
+
+/*
+ * Halt the system or reboot.
+ */
+extern void halt_all_cpus (boolean_t reboot) __attribute__ ((noreturn));
+
+extern void resettodr (void);
+
+extern void startrtclock (void);
+
+/*
+ * More-specific code provides these;
+ * they indicate the total extent of physical memory
+ * that we know about and might ever have to manage.
+ */
+extern vm_offset_t phys_first_addr, phys_last_addr;
+
+#endif /* _I386AT_MODEL_DEP_H_ */
diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c
index 7f0b21e5..2fd5ec2d 100644
--- a/i386/i386/mp_desc.c
+++ b/i386/i386/mp_desc.c
@@ -31,6 +31,7 @@
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <mach/machine.h>
+#include <mach/xen.h>
#include <vm/vm_kern.h>
#include <i386/mp_desc.h>
@@ -149,6 +150,9 @@ mp_desc_init(mycpu)
* Fix up the entries in the GDT to point to
* this LDT and this TSS.
*/
+#ifdef MACH_HYP
+ panic("TODO %s:%d\n",__FILE__,__LINE__);
+#else /* MACH_HYP */
fill_descriptor(&mpt->gdt[sel_idx(KERNEL_LDT)],
(unsigned)&mpt->ldt,
LDTSZ * sizeof(struct real_descriptor) - 1,
@@ -161,6 +165,7 @@ mp_desc_init(mycpu)
mpt->ktss.tss.ss0 = KERNEL_DS;
mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL;
mpt->ktss.barrier = 0xFF;
+#endif /* MACH_HYP */
return mpt;
}
@@ -192,6 +197,7 @@ interrupt_stack_alloc()
*/
if (!init_alloc_aligned(INTSTACK_SIZE*(cpu_count-1), &stack_start))
panic("not enough memory for interrupt stacks");
+ stack_start = phystokv(stack_start);
/*
* Set up pointers to the top of the interrupt stack.
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index 58b4ea97..f687db14 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -31,6 +31,7 @@
#include <mach/kern_return.h>
#include <mach/thread_status.h>
#include <mach/exec/exec.h>
+#include <mach/xen.h>
#include "vm_param.h"
#include <kern/counters.h>
@@ -58,6 +59,7 @@
#include <i386/mp_desc.h>
#endif
+extern thread_t Load_context();
extern thread_t Switch_context();
extern void Thread_continue();
@@ -152,34 +154,64 @@ void switch_ktss(pcb)
? (int) (&pcb->iss + 1)
: (int) (&pcb->iss.v86_segs);
+#ifdef MACH_XEN
+ /* No IO mask here */
+ hyp_stack_switch(KERNEL_DS, pcb_stack_top);
+#else /* MACH_XEN */
curr_ktss(mycpu)->tss.esp0 = pcb_stack_top;
+#endif /* MACH_XEN */
}
{
- register user_ldt_t ldt = pcb->ims.ldt;
+ register user_ldt_t tldt = pcb->ims.ldt;
/*
* Set the thread`s LDT.
*/
- if (ldt == 0) {
+ if (tldt == 0) {
/*
* Use system LDT.
*/
+#ifdef MACH_HYP
+ hyp_set_ldt(&ldt, LDTSZ);
+#else /* MACH_HYP */
set_ldt(KERNEL_LDT);
+#endif /* MACH_HYP */
}
else {
/*
* Thread has its own LDT.
*/
- *gdt_desc_p(mycpu,USER_LDT) = ldt->desc;
+#ifdef MACH_HYP
+ hyp_set_ldt(tldt->ldt,
+ (tldt->desc.limit_low|(tldt->desc.limit_high<<16)) /
+ sizeof(struct real_descriptor));
+#else /* MACH_HYP */
+ *gdt_desc_p(mycpu,USER_LDT) = tldt->desc;
set_ldt(USER_LDT);
+#endif /* MACH_HYP */
}
}
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i=0; i < USER_GDT_SLOTS; i++) {
+ if (memcmp(gdt_desc_p (mycpu, USER_GDT + (i << 3)),
+ &pcb->ims.user_gdt[i], sizeof pcb->ims.user_gdt[i])) {
+ if (hyp_do_update_descriptor(kv_to_ma(gdt_desc_p (mycpu, USER_GDT + (i << 3))),
+ *(uint64_t *) &pcb->ims.user_gdt[i]))
+ panic("couldn't set user gdt %d\n",i);
+ }
+ }
+ }
+#else /* MACH_XEN */
+
/* Copy in the per-thread GDT slots. No reloading is necessary
because just restoring the segment registers on the way back to
user mode reloads the shadow registers from the in-memory GDT. */
memcpy (gdt_desc_p (mycpu, USER_GDT),
pcb->ims.user_gdt, sizeof pcb->ims.user_gdt);
+#endif /* MACH_XEN */
/*
* Load the floating-point context, if necessary.
@@ -537,7 +569,7 @@ kern_return_t thread_setstatus(thread, flavor, tstate, count)
* Temporary - replace by i386_io_map
*/
case i386_ISA_PORT_MAP_STATE: {
- register struct i386_isa_port_map_state *state;
+ //register struct i386_isa_port_map_state *state;
if (count < i386_ISA_PORT_MAP_STATE_COUNT)
return(KERN_INVALID_ARGUMENT);
diff --git a/i386/i386/pcb.h b/i386/i386/pcb.h
index e7d33635..f8671a2e 100644
--- a/i386/i386/pcb.h
+++ b/i386/i386/pcb.h
@@ -27,6 +27,7 @@
#define _I386_PCB_H_
#include <sys/types.h>
+#include <mach/exec/exec.h>
extern void pcb_init (thread_t thread);
@@ -58,4 +59,15 @@ extern vm_offset_t set_user_regs (
struct exec_info *exec_info,
vm_size_t arg_size);
+extern void load_context (thread_t new);
+
+extern void stack_attach (
+ thread_t thread,
+ vm_offset_t stack,
+ void (*continuation)());
+
+extern vm_offset_t stack_detach (thread_t thread);
+
+extern void switch_ktss (pcb_t pcb);
+
#endif /* _I386_PCB_H_ */
diff --git a/i386/i386/phys.c b/i386/i386/phys.c
index 2c30f17c..925593b3 100644
--- a/i386/i386/phys.c
+++ b/i386/i386/phys.c
@@ -27,6 +27,7 @@
#include <string.h>
#include <mach/boolean.h>
+#include <mach/xen.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <vm/vm_map.h>
@@ -104,5 +105,9 @@ vm_offset_t addr;
if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL)
return 0;
- return i386_trunc_page(*pte) | (addr & INTEL_OFFMASK);
+ return i386_trunc_page(
+#ifdef MACH_PSEUDO_PHYS
+ ma_to_pa
+#endif /* MACH_PSEUDO_PHYS */
+ (*pte)) | (addr & INTEL_OFFMASK);
}
diff --git a/i386/i386/pic.h b/i386/i386/pic.h
index c6f8a4b6..7a177d86 100644
--- a/i386/i386/pic.h
+++ b/i386/i386/pic.h
@@ -176,4 +176,9 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define READ_IR_ONRD 0x00
#define READ_IS_ONRD 0x01
+#ifndef __ASSEMBLER__
+extern void form_pic_mask (void);
+extern void picinit (void);
+#endif
+
#endif /* _I386_PIC_H_ */
diff --git a/i386/i386/pit.c b/i386/i386/pit.c
index 8dc12335..4f156d87 100644
--- a/i386/i386/pit.c
+++ b/i386/i386/pit.c
@@ -51,6 +51,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <kern/mach_clock.h>
#include <i386/ipl.h>
+#include <i386/pic.h>
#include <i386/pit.h>
#include <i386/pio.h>
diff --git a/i386/i386/pit.h b/i386/i386/pit.h
index b59d8c5c..e65aae97 100644
--- a/i386/i386/pit.h
+++ b/i386/i386/pit.h
@@ -77,3 +77,5 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#if AT386
#define CLKNUM 1193167
#endif /* AT386 */
+
+extern void clkstart(void);
diff --git a/i386/i386/proc_reg.h b/i386/i386/proc_reg.h
index 448b645b..64d8c43f 100644
--- a/i386/i386/proc_reg.h
+++ b/i386/i386/proc_reg.h
@@ -72,6 +72,11 @@
#ifndef __ASSEMBLER__
#ifdef __GNUC__
+#ifndef MACH_HYP
+#include <i386/gdt.h>
+#include <i386/ldt.h>
+#endif /* MACH_HYP */
+
static inline unsigned
get_eflags(void)
{
@@ -102,7 +107,7 @@ set_eflags(unsigned eflags)
#define get_cr0() \
({ \
register unsigned int _temp__; \
- asm("mov %%cr0, %0" : "=r" (_temp__)); \
+ asm volatile("mov %%cr0, %0" : "=r" (_temp__)); \
_temp__; \
})
@@ -115,14 +120,24 @@ set_eflags(unsigned eflags)
#define get_cr2() \
({ \
register unsigned int _temp__; \
- asm("mov %%cr2, %0" : "=r" (_temp__)); \
+ asm volatile("mov %%cr2, %0" : "=r" (_temp__)); \
_temp__; \
})
+#ifdef MACH_HYP
+extern unsigned long cr3;
+#define get_cr3() (cr3)
+#define set_cr3(value) \
+ ({ \
+ cr3 = (value); \
+ if (!hyp_set_cr3(value)) \
+ panic("set_cr3"); \
+ })
+#else /* MACH_HYP */
#define get_cr3() \
({ \
register unsigned int _temp__; \
- asm("mov %%cr3, %0" : "=r" (_temp__)); \
+ asm volatile("mov %%cr3, %0" : "=r" (_temp__)); \
_temp__; \
})
@@ -131,13 +146,44 @@ set_eflags(unsigned eflags)
register unsigned int _temp__ = (value); \
asm volatile("mov %0, %%cr3" : : "r" (_temp__)); \
})
+#endif /* MACH_HYP */
#define flush_tlb() set_cr3(get_cr3())
+#ifndef MACH_HYP
+#define invlpg(addr) \
+ ({ \
+ asm volatile("invlpg (%0)" : : "r" (addr)); \
+ })
+
+#define invlpg_linear(start) \
+ ({ \
+ asm volatile( \
+ "movw %w1,%%es\n" \
+ "\tinvlpg %%es:(%0)\n" \
+ "\tmovw %w2,%%es" \
+ :: "r" (start), "q" (LINEAR_DS), "q" (KERNEL_DS)); \
+ })
+
+#define invlpg_linear_range(start, end) \
+ ({ \
+ register unsigned long var = trunc_page(start); \
+ asm volatile( \
+ "movw %w2,%%es\n" \
+ "1:\tinvlpg %%es:(%0)\n" \
+ "\taddl %c4,%0\n" \
+ "\tcmpl %0,%1\n" \
+ "\tjb 1b\n" \
+ "\tmovw %w3,%%es" \
+ : "+r" (var) : "r" (end), \
+ "q" (LINEAR_DS), "q" (KERNEL_DS), "i" (PAGE_SIZE)); \
+ })
+#endif /* MACH_HYP */
+
#define get_cr4() \
({ \
register unsigned int _temp__; \
- asm("mov %%cr4, %0" : "=r" (_temp__)); \
+ asm volatile("mov %%cr4, %0" : "=r" (_temp__)); \
_temp__; \
})
@@ -148,16 +194,23 @@ set_eflags(unsigned eflags)
})
+#ifdef MACH_HYP
+#define set_ts() \
+ hyp_fpu_taskswitch(1)
+#define clear_ts() \
+ hyp_fpu_taskswitch(0)
+#else /* MACH_HYP */
#define set_ts() \
set_cr0(get_cr0() | CR0_TS)
#define clear_ts() \
asm volatile("clts")
+#endif /* MACH_HYP */
#define get_tr() \
({ \
unsigned short _seg__; \
- asm("str %0" : "=rm" (_seg__) ); \
+ asm volatile("str %0" : "=rm" (_seg__) ); \
_seg__; \
})
@@ -167,7 +220,7 @@ set_eflags(unsigned eflags)
#define get_ldt() \
({ \
unsigned short _seg__; \
- asm("sldt %0" : "=rm" (_seg__) ); \
+ asm volatile("sldt %0" : "=rm" (_seg__) ); \
_seg__; \
})
diff --git a/i386/i386/seg.h b/i386/i386/seg.h
index ad7a0f59..a7f65736 100644
--- a/i386/i386/seg.h
+++ b/i386/i386/seg.h
@@ -37,6 +37,13 @@
* i386 segmentation.
*/
+/* Note: the value of KERNEL_RING is handled by hand in locore.S */
+#ifdef MACH_HYP
+#define KERNEL_RING 1
+#else /* MACH_HYP */
+#define KERNEL_RING 0
+#endif /* MACH_HYP */
+
#ifndef __ASSEMBLER__
/*
@@ -95,7 +102,7 @@ struct real_gate {
#define ACC_CODE_CR 0x1e /* code, conforming,
readable */
#define ACC_PL 0x60 /* access rights: */
-#define ACC_PL_K 0x00 /* kernel access only */
+#define ACC_PL_K (KERNEL_RING << 5) /* kernel access only */
#define ACC_PL_U 0x60 /* user access */
#define ACC_P 0x80 /* segment present */
@@ -104,7 +111,7 @@ struct real_gate {
*/
#define SEL_LDT 0x04 /* local selector */
#define SEL_PL 0x03 /* privilege level: */
-#define SEL_PL_K 0x00 /* kernel selector */
+#define SEL_PL_K KERNEL_RING /* kernel selector */
#define SEL_PL_U 0x03 /* user selector */
/*
@@ -116,6 +123,7 @@ struct real_gate {
#ifndef __ASSEMBLER__
#include <mach/inline.h>
+#include <mach/xen.h>
/* Format of a "pseudo-descriptor", used for loading the IDT and GDT. */
@@ -138,7 +146,7 @@ MACH_INLINE void lidt(struct pseudo_descriptor *pdesc)
}
MACH_INLINE void lldt(unsigned short ldt_selector)
{
- __asm volatile("lldt %w0" : : "r" (ldt_selector));
+ __asm volatile("lldt %w0" : : "r" (ldt_selector) : "memory");
}
#ifdef CODE16
@@ -150,9 +158,15 @@ MACH_INLINE void lldt(unsigned short ldt_selector)
/* Fill a segment descriptor. */
MACH_INLINE void
-fill_descriptor(struct real_descriptor *desc, unsigned base, unsigned limit,
+fill_descriptor(struct real_descriptor *_desc, unsigned base, unsigned limit,
unsigned char access, unsigned char sizebits)
{
+ /* TODO: when !MACH_XEN, setting desc and just memcpy isn't simpler actually */
+#ifdef MACH_XEN
+ struct real_descriptor __desc, *desc = &__desc;
+#else /* MACH_XEN */
+ struct real_descriptor *desc = _desc;
+#endif /* MACH_XEN */
if (limit > 0xfffff)
{
limit >>= 12;
@@ -165,6 +179,10 @@ fill_descriptor(struct real_descriptor *desc, unsigned base, unsigned limit,
desc->limit_high = limit >> 16;
desc->granularity = sizebits;
desc->base_high = base >> 24;
+#ifdef MACH_XEN
+ if (hyp_do_update_descriptor(kv_to_ma(_desc), *(uint64_t*)desc))
+ panic("couldn't update descriptor(%p to %08lx%08lx)\n", (vm_offset_t) kv_to_ma(_desc), *(((unsigned long*)desc)+1), *(unsigned long *)desc);
+#endif /* MACH_XEN */
}
/* Fill a gate with particular values. */
diff --git a/i386/i386/spl.S b/i386/i386/spl.S
index f77b5563..f1d4b45f 100644
--- a/i386/i386/spl.S
+++ b/i386/i386/spl.S
@@ -20,6 +20,8 @@
#include <mach/machine/asm.h>
#include <i386/ipl.h>
#include <i386/pic.h>
+#include <i386/i386asm.h>
+#include <i386/xen.h>
/*
* Set IPL to the specified value.
@@ -42,6 +44,7 @@
/*
* Program PICs with mask in %eax.
*/
+#ifndef MACH_XEN
#define SETMASK() \
cmpl EXT(curr_pic_mask),%eax; \
je 9f; \
@@ -50,6 +53,21 @@
movb %ah,%al; \
outb %al,$(PIC_SLAVE_OCW); \
9:
+#else /* MACH_XEN */
+#define pic_mask int_mask
+#define SETMASK() \
+ pushl %ebx; \
+ movl %eax,%ebx; \
+ xchgl %eax,hyp_shared_info+EVTMASK; \
+ notl %ebx; \
+ andl %eax,%ebx; /* Get unmasked events */ \
+ testl hyp_shared_info+PENDING, %ebx; \
+ popl %ebx; \
+ jz 9f; /* Check whether there was some pending */ \
+lock orl $1,hyp_shared_info+CPU_PENDING_SEL; /* Yes, activate it */ \
+ movb $1,hyp_shared_info+CPU_PENDING; \
+9:
+#endif /* MACH_XEN */
ENTRY(spl0)
movl EXT(curr_ipl),%eax /* save current ipl */
diff --git a/i386/i386/spl.h b/i386/i386/spl.h
index 8552e116..00c51458 100644
--- a/i386/i386/spl.h
+++ b/i386/i386/spl.h
@@ -37,37 +37,36 @@ extern spl_t (splhi)(void);
extern spl_t (spl0)(void);
extern spl_t (spl1)(void);
+extern spl_t (splsoftclock)(void);
extern spl_t (spl2)(void);
extern spl_t (spl3)(void);
extern spl_t (spl4)(void);
+extern spl_t (splnet)(void);
extern spl_t (splhdw)(void);
extern spl_t (spl5)(void);
+extern spl_t (splbio)(void);
extern spl_t (spldcm)(void);
extern spl_t (spl6)(void);
+extern spl_t (spltty)(void);
+extern spl_t (splimp)(void);
+extern spl_t (spl7)(void);
+extern spl_t (splclock)(void);
extern spl_t (splsched)(void);
+extern spl_t (splhigh)(void);
extern spl_t (splx)(spl_t n);
-
-extern spl_t (splsoftclock)(void);
+extern spl_t (splx_cli)(spl_t n);
extern void splon (unsigned long n);
extern unsigned long sploff (void);
-extern spl_t splhigh (void);
-
-extern spl_t splimp (void);
-
-extern spl_t spltty (void);
-
-extern spl_t splclock (void);
-
extern void setsoftclock (void);
/* XXX Include each other... */
diff --git a/i386/i386/thread.h b/i386/i386/thread.h
index 76aa1ef7..f2ae8bf0 100644
--- a/i386/i386/thread.h
+++ b/i386/i386/thread.h
@@ -111,9 +111,14 @@ struct i386_kernel_state {
*/
struct i386_fpsave_state {
+ union {
+ struct {
+ struct i386_fp_save fp_save_state;
+ struct i386_fp_regs fp_regs;
+ };
+ struct i386_xfp_save xfp_save_state;
+ };
boolean_t fp_valid;
- struct i386_fp_save fp_save_state;
- struct i386_fp_regs fp_regs;
};
/*
diff --git a/i386/i386/trap.c b/i386/i386/trap.c
index ebb327ca..22dd4914 100644
--- a/i386/i386/trap.c
+++ b/i386/i386/trap.c
@@ -32,6 +32,9 @@
#include <mach/machine/eflags.h>
#include <i386/trap.h>
+#include <i386/fpu.h>
+#include <i386/model_dep.h>
+#include <intel/read_fault.h>
#include <machine/machspl.h> /* for spl_t */
#include <mach/exception.h>
@@ -39,6 +42,7 @@
#include "vm_param.h"
#include <mach/machine/thread_status.h>
+#include <vm/vm_fault.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
@@ -57,10 +61,10 @@
#include "debug.h"
-extern void exception();
-extern void thread_exception_return();
+extern void exception() __attribute__ ((noreturn));
+extern void thread_exception_return() __attribute__ ((noreturn));
-extern void i386_exception();
+extern void i386_exception() __attribute__ ((noreturn));
#if MACH_KDB
boolean_t debug_all_traps_with_kdb = FALSE;
@@ -242,7 +246,7 @@ dump_ss(regs);
assert(thread);
map = thread->task->map;
if (map == kernel_map) {
- printf("kernel page fault at %08x:\n");
+ printf("kernel page fault at %08x:\n", subcode);
dump_ss(regs);
panic("kernel thread accessed user space!\n");
}
@@ -366,15 +370,14 @@ dump_ss(regs);
int user_trap(regs)
register struct i386_saved_state *regs;
{
- int exc;
+ int exc = 0; /* Suppress gcc warning */
int code;
int subcode;
register int type;
register thread_t thread = current_thread();
- extern vm_offset_t phys_last_addr;
if ((vm_offset_t)thread < phys_last_addr) {
- printf("user_trap: bad thread pointer 0x%x\n", thread);
+ printf("user_trap: bad thread pointer 0x%p\n", thread);
printf("trap type %d, code 0x%x, va 0x%x, eip 0x%x\n",
regs->trapno, regs->err, regs->cr2, regs->eip);
asm volatile ("1: hlt; jmp 1b");
@@ -510,7 +513,8 @@ printf("user trap %d error %d sub %08x\n", type, code, subcode);
dump_ss (regs);
#endif
- assert(subcode < LINEAR_MIN_KERNEL_ADDRESS);
+ if (subcode >= LINEAR_MIN_KERNEL_ADDRESS)
+ i386_exception(EXC_BAD_ACCESS, EXC_I386_PGFLT, subcode);
(void) vm_fault(thread->task->map,
trunc_page((vm_offset_t)subcode),
(regs->err & T_PF_WRITE)
@@ -522,6 +526,22 @@ printf("user trap %d error %d sub %08x\n", type, code, subcode);
/*NOTREACHED*/
break;
+#ifdef MACH_XEN
+ case 15:
+ {
+ static unsigned count = 0;
+ count++;
+ if (!(count % 10000))
+ printf("%d 4gb segments accesses\n", count);
+ if (count > 1000000) {
+ printf("A million 4gb segment accesses, stopping reporting them.");
+ if (hyp_vm_assist(VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify))
+ panic("couldn't disable 4gb segments vm assist notify");
+ }
+ return 0;
+ }
+#endif
+
case T_FLOATING_POINT_ERROR:
fpexterrflt();
return 0;
@@ -581,6 +601,7 @@ i386_astintr()
int mycpu = cpu_number();
(void) splsched(); /* block interrupts to check reasons */
+#ifndef MACH_XEN
if (need_ast[mycpu] & AST_I386_FP) {
/*
* AST was for delayed floating-point exception -
@@ -590,9 +611,11 @@ i386_astintr()
ast_off(mycpu, AST_I386_FP);
(void) spl0();
- fpexterrflt();
+ fpastintr();
}
- else {
+ else
+#endif /* MACH_XEN */
+ {
/*
* Not an FPU trap. Handle the AST.
* Interrupts are still blocked.
diff --git a/i386/i386/trap.h b/i386/i386/trap.h
index 8cee9e88..b4e92246 100644
--- a/i386/i386/trap.h
+++ b/i386/i386/trap.h
@@ -30,9 +30,12 @@
#include <mach/machine/trap.h>
#ifndef __ASSEMBLER__
+#include <mach/mach_types.h>
char *trap_name(unsigned int trapnum);
+unsigned int interrupted_pc(thread_t);
+
#endif /* !__ASSEMBLER__ */
#endif /* _I386_TRAP_H_ */
diff --git a/i386/i386/tss.h b/i386/i386/tss.h
index ee0d6121..ff25f217 100644
--- a/i386/i386/tss.h
+++ b/i386/i386/tss.h
@@ -83,7 +83,7 @@ struct task_tss
MACH_INLINE void
ltr(unsigned short segment)
{
- __asm volatile("ltr %0" : : "r" (segment));
+ __asm volatile("ltr %0" : : "r" (segment) : "memory");
}
#endif /* _I386_TSS_H_ */
diff --git a/i386/i386/user_ldt.c b/i386/i386/user_ldt.c
index 7646c351..dfe6b1e6 100644
--- a/i386/i386/user_ldt.c
+++ b/i386/i386/user_ldt.c
@@ -35,9 +35,11 @@
#include <vm/vm_kern.h>
+#include <i386/pcb.h>
#include <i386/seg.h>
#include <i386/thread.h>
#include <i386/user_ldt.h>
+#include <stddef.h>
#include "ldt.h"
#include "vm_param.h"
@@ -110,7 +112,7 @@ i386_set_ldt(thread, first_selector, desc_list, count, desc_list_inline)
pcb_t pcb;
vm_size_t ldt_size_needed;
int first_desc = sel_idx(first_selector);
- vm_map_copy_t old_copy_object;
+ vm_map_copy_t old_copy_object = NULL; /* Suppress gcc warning */
if (thread == THREAD_NULL)
return KERN_INVALID_ARGUMENT;
@@ -194,9 +196,17 @@ i386_set_ldt(thread, first_selector, desc_list, count, desc_list_inline)
if (new_ldt == 0) {
simple_unlock(&pcb->lock);
+#ifdef MACH_XEN
+ /* LDT needs to be aligned on a page */
+ vm_offset_t alloc = kalloc(ldt_size_needed + PAGE_SIZE + offsetof(struct user_ldt, ldt));
+ new_ldt = (user_ldt_t) (round_page((alloc + offsetof(struct user_ldt, ldt))) - offsetof(struct user_ldt, ldt));
+ new_ldt->alloc = alloc;
+
+#else /* MACH_XEN */
new_ldt = (user_ldt_t)
kalloc(ldt_size_needed
+ sizeof(struct real_descriptor));
+#endif /* MACH_XEN */
/*
* Build a descriptor that describes the
* LDT itself
@@ -262,9 +272,19 @@ i386_set_ldt(thread, first_selector, desc_list, count, desc_list_inline)
simple_unlock(&pcb->lock);
if (new_ldt)
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i=0; i<(new_ldt->desc.limit_low + 1)/sizeof(struct real_descriptor); i+=PAGE_SIZE/sizeof(struct real_descriptor))
+ pmap_set_page_readwrite(&new_ldt->ldt[i]);
+ kfree(new_ldt->alloc, new_ldt->desc.limit_low + 1
+ + PAGE_SIZE + offsetof(struct user_ldt, ldt));
+ }
+#else /* MACH_XEN */
kfree((vm_offset_t)new_ldt,
new_ldt->desc.limit_low + 1
+ sizeof(struct real_descriptor));
+#endif /* MACH_XEN */
/*
* Free the descriptor list, if it was
@@ -397,9 +417,17 @@ void
user_ldt_free(user_ldt)
user_ldt_t user_ldt;
{
+#ifdef MACH_XEN
+ int i;
+ for (i=0; i<(user_ldt->desc.limit_low + 1)/sizeof(struct real_descriptor); i+=PAGE_SIZE/sizeof(struct real_descriptor))
+ pmap_set_page_readwrite(&user_ldt->ldt[i]);
+ kfree(user_ldt->alloc, user_ldt->desc.limit_low + 1
+ + PAGE_SIZE + offsetof(struct user_ldt, ldt));
+#else /* MACH_XEN */
kfree((vm_offset_t)user_ldt,
user_ldt->desc.limit_low + 1
+ sizeof(struct real_descriptor));
+#endif /* MACH_XEN */
}
diff --git a/i386/i386/user_ldt.h b/i386/i386/user_ldt.h
index dd3ad4b5..8d16ed87 100644
--- a/i386/i386/user_ldt.h
+++ b/i386/i386/user_ldt.h
@@ -36,6 +36,9 @@
#include <i386/seg.h>
struct user_ldt {
+#ifdef MACH_XEN
+ vm_offset_t alloc; /* allocation before alignment */
+#endif /* MACH_XEN */
struct real_descriptor desc; /* descriptor for self */
struct real_descriptor ldt[1]; /* descriptor table (variable) */
};
diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h
index 96fc8bac..95df6044 100644
--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -25,19 +25,40 @@
/* XXX use xu/vm_param.h */
#include <mach/vm_param.h>
+#include <xen/public/xen.h>
/* The kernel address space is 1GB, starting at virtual address 0. */
-#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00000000)
-#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0x40000000)
+#ifdef MACH_XEN
+#define VM_MIN_KERNEL_ADDRESS 0x20000000UL
+#else /* MACH_XEN */
+#define VM_MIN_KERNEL_ADDRESS 0x00000000UL
+#endif /* MACH_XEN */
+
+#ifdef MACH_XEN
+#if PAE
+#define HYP_VIRT_START HYPERVISOR_VIRT_START_PAE
+#else /* PAE */
+#define HYP_VIRT_START HYPERVISOR_VIRT_START_NONPAE
+#endif /* PAE */
+#define VM_MAX_KERNEL_ADDRESS (HYP_VIRT_START - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
+#else /* MACH_XEN */
+#define VM_MAX_KERNEL_ADDRESS (LINEAR_MAX_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
+#endif /* MACH_XEN */
/* The kernel virtual address space is actually located
at high linear addresses.
This is the kernel address range in linear addresses. */
-#define LINEAR_MIN_KERNEL_ADDRESS ((vm_offset_t) 0xc0000000)
-#define LINEAR_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xffffffff)
+#define LINEAR_MIN_KERNEL_ADDRESS (VM_MAX_ADDRESS)
+#define LINEAR_MAX_KERNEL_ADDRESS (0xffffffffUL)
+#ifdef MACH_XEN
+/* need room for mmu updates (2*8bytes) */
+#define KERNEL_STACK_SIZE (4*I386_PGBYTES)
+#define INTSTACK_SIZE (4*I386_PGBYTES)
+#else /* MACH_XEN */
#define KERNEL_STACK_SIZE (1*I386_PGBYTES)
#define INTSTACK_SIZE (1*I386_PGBYTES)
+#endif /* MACH_XEN */
/* interrupt stack size */
/*
@@ -50,15 +71,18 @@
/*
* Physical memory is direct-mapped to virtual memory
- * starting at virtual address phys_mem_va.
+ * starting at virtual address VM_MIN_KERNEL_ADDRESS.
+ */
+#define phystokv(a) ((vm_offset_t)(a) + VM_MIN_KERNEL_ADDRESS)
+/*
+ * This can not be used with virtual mappings, but can be used during bootstrap
*/
-extern vm_offset_t phys_mem_va;
-#define phystokv(a) ((vm_offset_t)(a) + phys_mem_va)
+#define _kvtophys(a) ((vm_offset_t)(a) - VM_MIN_KERNEL_ADDRESS)
/*
* Kernel virtual memory is actually at 0xc0000000 in linear addresses.
*/
-#define kvtolin(a) ((vm_offset_t)(a) + LINEAR_MIN_KERNEL_ADDRESS)
-#define lintokv(a) ((vm_offset_t)(a) - LINEAR_MIN_KERNEL_ADDRESS)
+#define kvtolin(a) ((vm_offset_t)(a) - VM_MIN_KERNEL_ADDRESS + LINEAR_MIN_KERNEL_ADDRESS)
+#define lintokv(a) ((vm_offset_t)(a) - LINEAR_MIN_KERNEL_ADDRESS + VM_MIN_KERNEL_ADDRESS)
#endif /* _I386_KERNEL_I386_VM_PARAM_ */
diff --git a/i386/i386/xen.h b/i386/i386/xen.h
new file mode 100644
index 00000000..1377a14c
--- /dev/null
+++ b/i386/i386/xen.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_HYPCALL_H
+#define XEN_HYPCALL_H
+
+#ifdef MACH_XEN
+#ifndef __ASSEMBLER__
+#include <kern/printf.h>
+#include <mach/machine/vm_types.h>
+#include <mach/vm_param.h>
+#include <mach/inline.h>
+#include <machine/vm_param.h>
+#include <intel/pmap.h>
+#include <kern/debug.h>
+#include <xen/public/xen.h>
+
+/* TODO: this should be moved in appropriate non-Xen place. */
+#define barrier() __asm__ __volatile__ ("": : :"memory")
+#define mb() __asm__ __volatile__("lock; addl $0,0(%esp)")
+#define rmb() mb()
+#define wmb() mb()
+MACH_INLINE unsigned long xchgl(volatile unsigned long *ptr, unsigned long x)
+{
+ __asm__ __volatile__("xchgl %0, %1"
+ : "=r" (x)
+ : "m" (*(ptr)), "0" (x): "memory");
+ return x;
+}
+#define _TOSTR(x) #x
+#define TOSTR(x) _TOSTR (x)
+
+
+
+/* x86-specific hypercall interface. */
+#define _hypcall0(type, name) \
+MACH_INLINE type hyp_##name(void) \
+{ \
+ long __ret; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret) \
+ : : "memory"); \
+ return __ret; \
+}
+
+#define _hypcall1(type, name, type1, arg1) \
+MACH_INLINE type hyp_##name(type1 arg1) \
+{ \
+ long __ret; \
+ long foo1; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret), \
+ "=b" (foo1) \
+ : "1" ((long)arg1) \
+ : "memory"); \
+ return __ret; \
+}
+
+#define _hypcall2(type, name, type1, arg1, type2, arg2) \
+MACH_INLINE type hyp_##name(type1 arg1, type2 arg2) \
+{ \
+ long __ret; \
+ long foo1, foo2; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret), \
+ "=b" (foo1), \
+ "=c" (foo2) \
+ : "1" ((long)arg1), \
+ "2" ((long)arg2) \
+ : "memory"); \
+ return __ret; \
+}
+
+#define _hypcall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
+MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3) \
+{ \
+ long __ret; \
+ long foo1, foo2, foo3; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret), \
+ "=b" (foo1), \
+ "=c" (foo2), \
+ "=d" (foo3) \
+ : "1" ((long)arg1), \
+ "2" ((long)arg2), \
+ "3" ((long)arg3) \
+ : "memory"); \
+ return __ret; \
+}
+
+#define _hypcall4(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
+MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+ long __ret; \
+ long foo1, foo2, foo3, foo4; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret), \
+ "=b" (foo1), \
+ "=c" (foo2), \
+ "=d" (foo3), \
+ "=S" (foo4) \
+ : "1" ((long)arg1), \
+ "2" ((long)arg2), \
+ "3" ((long)arg3), \
+ "4" ((long)arg4) \
+ : "memory"); \
+ return __ret; \
+}
+
+#define _hypcall5(type, name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) \
+MACH_INLINE type hyp_##name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
+{ \
+ long __ret; \
+ long foo1, foo2, foo3, foo4, foo5; \
+ asm volatile ("call hypcalls+("TOSTR(__HYPERVISOR_##name)"*32)" \
+ : "=a" (__ret), \
+ "=b" (foo1), \
+ "=c" (foo2), \
+ "=d" (foo3), \
+ "=S" (foo4), \
+ "=D" (foo5) \
+ : "1" ((long)arg1), \
+ "2" ((long)arg2), \
+ "3" ((long)arg3), \
+ "4" ((long)arg4), \
+ "5" ((long)arg5) \
+ : "memory"); \
+ return __ret; \
+}
+
+/* x86 Hypercalls */
+
+/* Note: since Hypervisor uses flat memory model, remember to always use
+ * kvtolin when giving pointers as parameters for the hypercall to read data
+ * at. Use kv_to_la when they may be used before GDT got set up. */
+
+_hypcall1(long, set_trap_table, vm_offset_t /* struct trap_info * */, traps);
+
+_hypcall4(int, mmu_update, vm_offset_t /* struct mmu_update * */, req, int, count, vm_offset_t /* int * */, success_count, domid_t, domid)
+MACH_INLINE int hyp_mmu_update_pte(pt_entry_t pte, pt_entry_t val)
+{
+ struct mmu_update update =
+ {
+ .ptr = pte,
+ .val = val,
+ };
+ int count;
+ hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&count), DOMID_SELF);
+ return count;
+}
+/* Note: make sure this fits in KERNEL_STACK_SIZE */
+#define HYP_BATCH_MMU_UPDATES 256
+
+#define hyp_mmu_update_la(la, val) hyp_mmu_update_pte( \
+ (kernel_pmap->dirbase[lin2pdenum((vm_offset_t)(la))] & INTEL_PTE_PFN) \
+ + ptenum((vm_offset_t)(la)) * sizeof(pt_entry_t), val)
+
+_hypcall2(long, set_gdt, vm_offset_t /* unsigned long * */, frame_list, unsigned int, entries)
+
+_hypcall2(long, stack_switch, unsigned long, ss, unsigned long, esp);
+
+_hypcall4(long, set_callbacks, unsigned long, es, void *, ea,
+ unsigned long, fss, void *, fsa);
+_hypcall1(long, fpu_taskswitch, int, set);
+
+#ifdef PAE
+#define hyp_high(pte) ((pte) >> 32)
+#else
+#define hyp_high(pte) 0
+#endif
+_hypcall4(long, update_descriptor, unsigned long, ma_lo, unsigned long, ma_hi, unsigned long, desc_lo, unsigned long, desc_hi);
+#define hyp_do_update_descriptor(ma, desc) ({ \
+ pt_entry_t __ma = (ma); \
+ uint64_t __desc = (desc); \
+ hyp_update_descriptor(__ma & 0xffffffffU, hyp_high(__ma), __desc & 0xffffffffU, __desc >> 32); \
+})
+
+#include <xen/public/memory.h>
+_hypcall2(long, memory_op, unsigned long, cmd, vm_offset_t /* void * */, arg);
+MACH_INLINE void hyp_free_mfn(unsigned long mfn)
+{
+ struct xen_memory_reservation reservation;
+ reservation.extent_start = (void*) kvtolin(&mfn);
+ reservation.nr_extents = 1;
+ reservation.extent_order = 0;
+ reservation.address_bits = 0;
+ reservation.domid = DOMID_SELF;
+ if (hyp_memory_op(XENMEM_decrease_reservation, kvtolin(&reservation)) != 1)
+ panic("couldn't free page %d\n", mfn);
+}
+
+_hypcall4(int, update_va_mapping, unsigned long, va, unsigned long, val_lo, unsigned long, val_hi, unsigned long, flags);
+#define hyp_do_update_va_mapping(va, val, flags) ({ \
+ pt_entry_t __val = (val); \
+ hyp_update_va_mapping(va, __val & 0xffffffffU, hyp_high(__val), flags); \
+})
+
+MACH_INLINE void hyp_free_page(unsigned long pfn, void *va)
+{
+ /* save mfn */
+ unsigned long mfn = pfn_to_mfn(pfn);
+
+ /* remove from mappings */
+ if (hyp_do_update_va_mapping(kvtolin(va), 0, UVMF_INVLPG|UVMF_ALL))
+ panic("couldn't clear page %d at %p\n", pfn, va);
+
+#ifdef MACH_PSEUDO_PHYS
+ /* drop machine page */
+ mfn_list[pfn] = ~0;
+#endif /* MACH_PSEUDO_PHYS */
+
+ /* and free from Xen */
+ hyp_free_mfn(mfn);
+}
+
+_hypcall4(int, mmuext_op, vm_offset_t /* struct mmuext_op * */, op, int, count, vm_offset_t /* int * */, success_count, domid_t, domid);
+MACH_INLINE int hyp_mmuext_op_void(unsigned int cmd)
+{
+ struct mmuext_op op = {
+ .cmd = cmd,
+ };
+ int count;
+ hyp_mmuext_op(kv_to_la(&op), 1, kv_to_la(&count), DOMID_SELF);
+ return count;
+}
+MACH_INLINE int hyp_mmuext_op_mfn(unsigned int cmd, unsigned long mfn)
+{
+ struct mmuext_op op = {
+ .cmd = cmd,
+ .arg1.mfn = mfn,
+ };
+ int count;
+ hyp_mmuext_op(kv_to_la(&op), 1, kv_to_la(&count), DOMID_SELF);
+ return count;
+}
+MACH_INLINE void hyp_set_ldt(void *ldt, unsigned long nbentries) {
+ struct mmuext_op op = {
+ .cmd = MMUEXT_SET_LDT,
+ .arg1.linear_addr = kvtolin(ldt),
+ .arg2.nr_ents = nbentries,
+ };
+ int count;
+ if (((unsigned long)ldt) & PAGE_MASK)
+ panic("ldt %p is not aligned on a page\n", ldt);
+ for (count=0; count<nbentries; count+= PAGE_SIZE/8)
+ pmap_set_page_readonly(ldt+count*8);
+ hyp_mmuext_op(kvtolin(&op), 1, kvtolin(&count), DOMID_SELF);
+ if (!count)
+ panic("couldn't set LDT\n");
+}
+/* TODO: use xen_pfn_to_cr3/xen_cr3_to_pfn to cope with pdp above 4GB */
+#define hyp_set_cr3(value) hyp_mmuext_op_mfn(MMUEXT_NEW_BASEPTR, pa_to_mfn(value))
+MACH_INLINE void hyp_invlpg(vm_offset_t lin) {
+ struct mmuext_op ops;
+ int n;
+ ops.cmd = MMUEXT_INVLPG_ALL;
+ ops.arg1.linear_addr = lin;
+ hyp_mmuext_op(kvtolin(&ops), 1, kvtolin(&n), DOMID_SELF);
+ if (n < 1)
+ panic("couldn't invlpg\n");
+}
+
+_hypcall2(long, set_timer_op, unsigned long, absolute_lo, unsigned long, absolute_hi);
+#define hyp_do_set_timer_op(absolute_nsec) ({ \
+ uint64_t __absolute = (absolute_nsec); \
+ hyp_set_timer_op(__absolute & 0xffffffffU, __absolute >> 32); \
+})
+
+#include <xen/public/event_channel.h>
+_hypcall1(int, event_channel_op, vm_offset_t /* evtchn_op_t * */, op);
+MACH_INLINE int hyp_event_channel_send(evtchn_port_t port) {
+ evtchn_op_t op = {
+ .cmd = EVTCHNOP_send,
+ .u.send.port = port,
+ };
+ return hyp_event_channel_op(kvtolin(&op));
+}
+MACH_INLINE evtchn_port_t hyp_event_channel_alloc(domid_t domid) {
+ evtchn_op_t op = {
+ .cmd = EVTCHNOP_alloc_unbound,
+ .u.alloc_unbound.dom = DOMID_SELF,
+ .u.alloc_unbound.remote_dom = domid,
+ };
+ if (hyp_event_channel_op(kvtolin(&op)))
+ panic("couldn't allocate event channel");
+ return op.u.alloc_unbound.port;
+}
+MACH_INLINE evtchn_port_t hyp_event_channel_bind_virq(uint32_t virq, uint32_t vcpu) {
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_virq, .u.bind_virq = { .virq = virq, .vcpu = vcpu }};
+ if (hyp_event_channel_op(kvtolin(&op)))
+ panic("can't bind virq %d\n",virq);
+ return op.u.bind_virq.port;
+}
+
+_hypcall3(int, console_io, int, cmd, int, count, vm_offset_t /* const char * */, buffer);
+
+_hypcall3(long, grant_table_op, unsigned int, cmd, vm_offset_t /* void * */, uop, unsigned int, count);
+
+_hypcall2(long, vm_assist, unsigned int, cmd, unsigned int, type);
+
+_hypcall0(long, iret);
+
+#include <xen/public/sched.h>
+_hypcall2(long, sched_op, int, cmd, vm_offset_t /* void* */, arg)
+#define hyp_yield() hyp_sched_op(SCHEDOP_yield, 0)
+#define hyp_block() hyp_sched_op(SCHEDOP_block, 0)
+MACH_INLINE void __attribute__((noreturn)) hyp_crash(void)
+{
+ unsigned int shut = SHUTDOWN_crash;
+ hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
+ /* really shouldn't return */
+ printf("uh, shutdown returned?!\n");
+ for(;;);
+}
+
+MACH_INLINE void __attribute__((noreturn)) hyp_halt(void)
+{
+ unsigned int shut = SHUTDOWN_poweroff;
+ hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
+ /* really shouldn't return */
+ printf("uh, shutdown returned?!\n");
+ for(;;);
+}
+
+MACH_INLINE void __attribute__((noreturn)) hyp_reboot(void)
+{
+ unsigned int shut = SHUTDOWN_reboot;
+ hyp_sched_op(SCHEDOP_shutdown, kvtolin(&shut));
+ /* really shouldn't return */
+ printf("uh, reboot returned?!\n");
+ for(;;);
+}
+
+/* x86-specific */
+MACH_INLINE unsigned64_t hyp_cpu_clock(void) {
+ unsigned64_t tsc;
+ asm volatile("rdtsc":"=A"(tsc));
+ return tsc;
+}
+
+#else /* __ASSEMBLER__ */
+/* TODO: SMP */
+#define cli movb $0xff,hyp_shared_info+CPU_CLI
+#define sti call hyp_sti
+#endif /* ASSEMBLER */
+#endif /* MACH_XEN */
+
+#endif /* XEN_HYPCALL_H */
diff --git a/i386/i386at/autoconf.c b/i386/i386at/autoconf.c
index 48e12787..7713dec6 100644
--- a/i386/i386at/autoconf.c
+++ b/i386/i386at/autoconf.c
@@ -35,9 +35,7 @@
#include <mach/machine.h>
#include <machine/cpu.h>
#endif /* MACH_KERNEL */
-#ifdef LINUX_DEV
#include <i386/pic.h>
-#endif
#include <i386/ipl.h>
#include <chips/busses.h>
@@ -49,12 +47,12 @@
#if NCOM > 0
extern struct bus_driver comdriver;
-extern int comintr();
+extern void comintr();
#endif /* NCOM */
#if NLPR > 0
extern struct bus_driver lprdriver;
-extern int lprintr();
+extern void lprintr();
#endif /* NLPR */
struct bus_ctlr bus_master_init[] = {
@@ -62,7 +60,7 @@ struct bus_ctlr bus_master_init[] = {
/* driver name unit intr address len phys_address
adaptor alive flags spl pic */
- 0
+ {0}
};
@@ -91,7 +89,7 @@ struct bus_device bus_device_init[] = {
#endif /* NLPR > 0 */
#endif /* MACH_LPR */
- 0
+ {0}
};
/*
diff --git a/i386/i386at/autoconf.h b/i386/i386at/autoconf.h
new file mode 100644
index 00000000..a16a88f9
--- /dev/null
+++ b/i386/i386at/autoconf.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Device auto configuration.
+ *
+ */
+
+#ifndef _AUTOCONF_H_
+#define _AUTOCONF_H_
+
+#include <mach/std_types.h>
+#include <chips/busses.h>
+
+/*
+ * probeio:
+ *
+ * Probe and subsequently attach devices out on the AT bus.
+ *
+ *
+ */
+void probeio(void);
+
+extern void take_dev_irq (
+ struct bus_device *dev);
+
+#endif /* _AUTOCONF_H_ */
diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
index 27d04053..45cd599f 100644
--- a/i386/i386at/boothdr.S
+++ b/i386/i386at/boothdr.S
@@ -58,6 +58,23 @@ boot_entry:
/* Push the boot_info pointer to be the second argument. */
pushl %ebx
+ /* Fix ifunc entries */
+ movl $__rel_iplt_start,%esi
+ movl $__rel_iplt_end,%edi
+iplt_cont:
+ cmpl %edi,%esi
+ jae iplt_done
+ movl (%esi),%ebx /* r_offset */
+ movb 4(%esi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movl %eax,(%ebx) /* fixed address */
+iplt_next:
+ addl $8,%esi
+ jmp iplt_cont
+iplt_done:
+
/* Jump into C code. */
call EXT(c_boot_entry)
diff --git a/i386/i386at/com.c b/i386/i386at/com.c
index f0467f29..2099a285 100644
--- a/i386/i386at/com.c
+++ b/i386/i386at/com.c
@@ -29,6 +29,7 @@
#include <mach/std_types.h>
#include <sys/types.h>
#include <kern/printf.h>
+#include <kern/mach_clock.h>
#include <sys/time.h>
#include <device/conf.h>
#include <device/errno.h>
@@ -39,16 +40,17 @@
#include <i386/pio.h>
#include <i386/machspl.h>
#include <chips/busses.h>
+#include <i386at/autoconf.h>
+#include <i386at/com.h>
#include <i386at/comreg.h>
#include <device/cons.h>
-extern void timeout(), ttrstrt();
-
-int comprobe(), comintr(), comstart(), commctl();
-void comattach();
+int comprobe(), commctl();
+void comstart(struct tty *);
+void comstop(), comattach(), comintr();
static void comparam();
-int comstop(), comgetstat(), comsetstat();
+int comgetstat(), comsetstat();
static vm_offset_t com_std[NCOM] = { 0 };
struct bus_device *cominfo[NCOM];
@@ -381,7 +383,7 @@ io_return_t comopen(
if (!comtimer_active) {
comtimer_active = TRUE;
- comtimer();
+ comtimer(NULL);
}
s = spltty();
@@ -431,7 +433,7 @@ io_return_t comportdeath(dev, port)
dev_t dev;
mach_port_t port;
{
- return (tty_portdeath(&com_tty[minor(dev)], port));
+ return (tty_portdeath(&com_tty[minor(dev)], (ipc_port_t)port));
}
io_return_t
@@ -487,7 +489,7 @@ unsigned int count;
return (D_SUCCESS);
}
-int
+void
comintr(unit)
int unit;
{
@@ -527,7 +529,7 @@ int unit;
((tp->t_flags&(EVENP|ODDP)) == EVENP ||
(tp->t_flags&(EVENP|ODDP)) == ODDP)) {
/* parity error */;
- } else if (line&iOR && !comoverrun) {
+ } else if (line_stat&iOR && !comoverrun) {
printf("com%d: overrun\n", unit);
comoverrun = 1;
} else if (line_stat & (iFE | iBRKINTR)) {
@@ -608,7 +610,7 @@ comparm(int unit, int baud, int intr, int mode, int modem)
int comst_1, comst_2, comst_3, comst_4, comst_5 = 14;
-int
+void
comstart(tp)
struct tty *tp;
{
@@ -617,7 +619,7 @@ struct tty *tp;
if (tp->t_state & (TS_TIMEOUT|TS_TTSTOP|TS_BUSY)) {
comst_1++;
- return(0);
+ return;
}
if ((!queue_empty(&tp->t_delayed_write)) &&
(tp->t_outq.c_cc <= TTLOWAT(tp))) {
@@ -626,7 +628,7 @@ comst_2++;
}
if (!tp->t_outq.c_cc) {
comst_3++;
- return(0);
+ return;
}
#if 0
@@ -650,19 +652,18 @@ comst_4++;
timeout(ttrstrt, (char *)tp, (nch & 0x7f) + 6);
tp->t_state |= TS_TIMEOUT;
comst_4++;
- return(0);
+ return;
}
outb(TXRX((int)tp->t_addr), nch);
tp->t_state |= TS_BUSY;
#endif
- return(0);
}
/* Check for stuck xmitters */
int comtimer_interval = 5;
void
-comtimer()
+comtimer(void * param)
{
spl_t s = spltty();
struct tty *tp = com_tty;
@@ -676,7 +677,7 @@ comtimer()
if (++comtimer_state[i] < 2)
continue;
/* Its stuck */
-printf("Tty %x was stuck\n", tp);
+printf("Tty %p was stuck\n", tp);
nch = getc(&tp->t_outq);
outb(TXRX((int)tp->t_addr), nch);
}
@@ -749,7 +750,7 @@ commctl(
spl_t s;
int unit;
vm_offset_t dev_addr;
- register int b;
+ register int b = 0; /* Suppress gcc warning */
unit = minor(tp->t_dev);
@@ -807,7 +808,7 @@ commctl(
return commodem[unit];
}
-int
+void
comstop(tp, flags)
register struct tty *tp;
int flags;
diff --git a/i386/i386at/com.h b/i386/i386at/com.h
new file mode 100644
index 00000000..e53e9482
--- /dev/null
+++ b/i386/i386at/com.h
@@ -0,0 +1,45 @@
+/*
+ * Communication functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Communication functions.
+ *
+ */
+
+#ifndef _COM_H_
+#define _COM_H_
+
+#include <mach/std_types.h>
+
+/*
+ * Set receive modem state from modem status register.
+ */
+extern void fix_modem_state(int unit, int modem_stat);
+
+extern void comtimer(void * param);
+
+/*
+ * Modem change (input signals)
+ */
+extern void commodem_intr(int unit, int stat);
+
+extern int comgetc(int unit);
+
+#endif /* _COM_H_ */
diff --git a/i386/i386at/conf.c b/i386/i386at/conf.c
index eee218af..d51b5ee2 100644
--- a/i386/i386at/conf.c
+++ b/i386/i386at/conf.c
@@ -34,6 +34,7 @@ extern int timeopen(), timeclose();
extern vm_offset_t timemmap();
#define timename "time"
+#ifndef MACH_HYP
extern int kdopen(), kdclose(), kdread(), kdwrite();
extern int kdgetstat(), kdsetstat(), kdportdeath();
extern vm_offset_t kdmmap();
@@ -50,17 +51,29 @@ extern int lpropen(), lprclose(), lprread(), lprwrite();
extern int lprgetstat(), lprsetstat(), lprportdeath();
#define lprname "lpr"
#endif /* NLPR > 0 */
+#endif /* MACH_HYP */
extern int kbdopen(), kbdclose(), kbdread();
extern int kbdgetstat(), kbdsetstat();
#define kbdname "kbd"
+#ifndef MACH_HYP
extern int mouseopen(), mouseclose(), mouseread(), mousegetstat();
#define mousename "mouse"
+extern vm_offset_t memmmap();
+#define memname "mem"
+#endif /* MACH_HYP */
+
extern int kmsgopen(), kmsgclose(), kmsgread(), kmsggetstat();
#define kmsgname "kmsg"
+#ifdef MACH_HYP
+extern int hypcnopen(), hypcnclose(), hypcnread(), hypcnwrite();
+extern int hypcngetstat(), hypcnsetstat(), hypcnportdeath();
+#define hypcnname "hyp"
+#endif /* MACH_HYP */
+
/*
* List of devices - console must be at slot 0
*/
@@ -75,20 +88,23 @@ struct dev_ops dev_name_list[] =
cninit() we stick something appropriate here through the
indirect list */
{ "cn", nulldev, nulldev, nulldev,
- nulldev, nulldev, nulldev, nulldev,
+ nulldev, nulldev, nulldev, nomap,
nodev, nulldev, nulldev, 0,
nodev },
+#ifndef MACH_HYP
{ kdname, kdopen, kdclose, kdread,
kdwrite, kdgetstat, kdsetstat, kdmmap,
nodev, nulldev, kdportdeath, 0,
nodev },
+#endif /* MACH_HYP */
{ timename, timeopen, timeclose, nulldev,
nulldev, nulldev, nulldev, timemmap,
nodev, nulldev, nulldev, 0,
nodev },
+#ifndef MACH_HYP
#if NCOM > 0
{ comname, comopen, comclose, comread,
comwrite, comgetstat, comsetstat, nomap,
@@ -107,12 +123,20 @@ struct dev_ops dev_name_list[] =
nodev, mousegetstat, nulldev, nomap,
nodev, nulldev, nulldev, 0,
nodev },
+#endif /* MACH_HYP */
{ kbdname, kbdopen, kbdclose, kbdread,
nodev, kbdgetstat, kbdsetstat, nomap,
nodev, nulldev, nulldev, 0,
nodev },
+#ifndef MACH_HYP
+ { memname, nulldev, nulldev, nodev,
+ nodev, nodev, nodev, memmmap,
+ nodev, nulldev, nulldev, 0,
+ nodev },
+#endif /* MACH_HYP */
+
#ifdef MACH_KMSG
{ kmsgname, kmsgopen, kmsgclose, kmsgread,
nodev, kmsggetstat, nodev, nomap,
@@ -120,6 +144,13 @@ struct dev_ops dev_name_list[] =
nodev },
#endif
+#ifdef MACH_HYP
+ { hypcnname, hypcnopen, hypcnclose, hypcnread,
+ hypcnwrite, hypcngetstat, hypcnsetstat, nomap,
+ nodev, nulldev, hypcnportdeath, 0,
+ nodev },
+#endif /* MACH_HYP */
+
};
int dev_name_count = sizeof(dev_name_list)/sizeof(dev_name_list[0]);
diff --git a/i386/i386at/cons_conf.c b/i386/i386at/cons_conf.c
index 8784ed94..ea8ccb56 100644
--- a/i386/i386at/cons_conf.c
+++ b/i386/i386at/cons_conf.c
@@ -30,19 +30,27 @@
#include <sys/types.h>
#include <device/cons.h>
+#ifdef MACH_HYP
+extern int hypcnprobe(), hypcninit(), hypcngetc(), hypcnputc();
+#else /* MACH_HYP */
extern int kdcnprobe(), kdcninit(), kdcngetc(), kdcnputc();
#if NCOM > 0 && RCLINE >= 0
extern int comcnprobe(), comcninit(), comcngetc(), comcnputc();
#endif
+#endif /* MACH_HYP */
/*
* The rest of the consdev fields are filled in by the respective
* cnprobe routine.
*/
struct consdev constab[] = {
+#ifdef MACH_HYP
+ {"hyp", hypcnprobe, hypcninit, hypcngetc, hypcnputc},
+#else /* MACH_HYP */
{"kd", kdcnprobe, kdcninit, kdcngetc, kdcnputc},
#if NCOM > 0 && RCLINE >= 0 && 1
{"com", comcnprobe, comcninit, comcngetc, comcnputc},
#endif
+#endif /* MACH_HYP */
{0}
};
diff --git a/i386/i386at/idt.h b/i386/i386at/idt.h
index 840bad11..1b3284fa 100644
--- a/i386/i386at/idt.h
+++ b/i386/i386at/idt.h
@@ -34,4 +34,8 @@
#include <i386/idt-gen.h>
+#ifndef __ASSEMBLER__
+extern void idt_init (void);
+#endif
+
#endif /* _I386AT_IDT_ */
diff --git a/i386/i386at/int_init.h b/i386/i386at/int_init.h
new file mode 100644
index 00000000..f4abef0b
--- /dev/null
+++ b/i386/i386at/int_init.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Initialization functions.
+ *
+ */
+
+#ifndef _INT_INIT_H_
+#define _INT_INIT_H_
+
+#include <mach/std_types.h>
+
+#ifndef __ASSEMBLER__
+extern void int_init (void);
+#endif
+
+#endif /* _INT_INIT_H_ */
diff --git a/i386/i386at/kd.c b/i386/i386at/kd.c
index 9dbbd46a..ad155953 100644
--- a/i386/i386at/kd.c
+++ b/i386/i386at/kd.c
@@ -85,6 +85,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <device/io_req.h>
#include <device/buf.h> /* for struct uio (!) */
#include <vm/vm_kern.h>
+#include <i386/loose_ends.h>
#include <i386/vm_param.h>
#include <i386/machspl.h>
#include <i386/pio.h>
@@ -94,15 +95,12 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <i386at/kd_mouse.h>
#include <i386at/kdsoft.h>
#include <device/cons.h>
+#include <util/atoi.h>
#define DEBUG 1 /* export feep() */
-#define DEFAULT -1 /* see kd_atoi */
-
void kd_enqsc(); /* enqueues a scancode */
-void timeout();
-
#if 0
#define BROKEN_KEYBOARD_RESET
#endif
@@ -117,7 +115,8 @@ boolean_t kdcheckmagic();
int kdcnprobe(struct consdev *cp);
int kdcninit(struct consdev *cp);
int kdcngetc(dev_t dev, int wait);
-int kdcnputc(dev_t dev, int c);
+void kdcnputc(dev_t dev, int c);
+int do_modifier (int, Scancode, boolean_t);
/*
* These routines define the interface to the device-specific layer.
@@ -378,7 +377,7 @@ feep()
kd_bellon();
for (i = 0; i < 50000; ++i)
;
- kd_belloff();
+ kd_belloff(NULL);
}
void
@@ -459,9 +458,9 @@ kdopen(dev, flag, ior)
io_req_t ior;
{
struct tty *tp;
- int kdstart();
+ void kdstart();
spl_t o_pri;
- int kdstop();
+ void kdstop();
tp = &kd_tty;
o_pri = spltty();
@@ -588,7 +587,7 @@ kdportdeath(dev, port)
dev_t dev;
mach_port_t port;
{
- return (tty_portdeath(&kd_tty, port));
+ return (tty_portdeath(&kd_tty, (ipc_port_t)port));
}
/*ARGSUSED*/
@@ -669,7 +668,7 @@ int flags; /* flags set for console */
if (val == KD_BELLON)
kd_bellon();
else if (val == KD_BELLOFF)
- kd_belloff();
+ kd_belloff(NULL);
else
err = D_INVALID_OPERATION;
@@ -735,7 +734,7 @@ int flags; /* flags set for console */
*
*/
/*ARGSUSED*/
-int
+void
kdintr(vec, regs)
int vec;
int regs;
@@ -1083,7 +1082,7 @@ boolean_t extended;
* Entered and left at spltty. Drops priority to spl0 to display character.
* ASSUMES that it is never called from interrupt-driven code.
*/
-int
+void
kdstart(tp)
struct tty *tp;
{
@@ -1134,7 +1133,7 @@ struct tty *tp;
}
/*ARGSUSED*/
-int
+void
kdstop(tp, flags)
register struct tty *tp;
int flags;
@@ -1230,7 +1229,7 @@ kdinit()
static unsigned int kd_bellstate = 0;
void
-kd_belloff()
+kd_belloff(void * param)
{
unsigned char status;
@@ -1505,17 +1504,17 @@ u_char *cp;
csrpos_t newpos;
for(i=0;i<=15;i++)
- number[i] = DEFAULT;
+ number[i] = MACH_ATOI_DEFAULT;
do {
- cp += kd_atoi(cp, &number[npar]);
+ cp += mach_atoi(cp, &number[npar]);
} while (*cp == ';' && ++npar <= 15 && cp++);
switch(*cp) {
case 'm':
for (i=0;i<=npar;i++)
switch(number[i]) {
- case DEFAULT:
+ case MACH_ATOI_DEFAULT:
case 0:
kd_attrflags = 0;
kd_color = KA_NORMAL;
@@ -1575,14 +1574,14 @@ u_char *cp;
esc_spt = esc_seq;
break;
case '@':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_insch(1);
else
kd_insch(number[0]);
esc_spt = esc_seq;
break;
case 'A':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_up();
else
while (number[0]--)
@@ -1590,7 +1589,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'B':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_down();
else
while (number[0]--)
@@ -1598,7 +1597,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'C':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_right();
else
while (number[0]--)
@@ -1606,7 +1605,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'D':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_left();
else
while (number[0]--)
@@ -1615,7 +1614,7 @@ u_char *cp;
break;
case 'E':
kd_cr();
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_down();
else
while (number[0]--)
@@ -1624,7 +1623,7 @@ u_char *cp;
break;
case 'F':
kd_cr();
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_up();
else
while (number[0]--)
@@ -1632,7 +1631,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'G':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
number[0] = 0;
else
if (number[0] > 0)
@@ -1642,18 +1641,18 @@ u_char *cp;
break;
case 'f':
case 'H':
- if (number[0] == DEFAULT && number[1] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT && number[1] == MACH_ATOI_DEFAULT)
{
kd_home();
esc_spt = esc_seq;
break;
}
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
number[0] = 0;
else if (number[0] > 0)
--number[0]; /* numbered from 1 */
newpos = (number[0] * ONE_LINE); /* setup row */
- if (number[1] == DEFAULT)
+ if (number[1] == MACH_ATOI_DEFAULT)
number[1] = 0;
else if (number[1] > 0)
number[1]--;
@@ -1667,7 +1666,7 @@ u_char *cp;
break; /* done or not ready */
case 'J':
switch(number[0]) {
- case DEFAULT:
+ case MACH_ATOI_DEFAULT:
case 0:
kd_cltobcur(); /* clears from current
pos to bottom.
@@ -1688,7 +1687,7 @@ u_char *cp;
break;
case 'K':
switch(number[0]) {
- case DEFAULT:
+ case MACH_ATOI_DEFAULT:
case 0:
kd_cltoecur(); /* clears from current
pos to eoln.
@@ -1709,28 +1708,28 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'L':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_insln(1);
else
kd_insln(number[0]);
esc_spt = esc_seq;
break;
case 'M':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_delln(1);
else
kd_delln(number[0]);
esc_spt = esc_seq;
break;
case 'P':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_delch(1);
else
kd_delch(number[0]);
esc_spt = esc_seq;
break;
case 'S':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_scrollup();
else
while (number[0]--)
@@ -1738,7 +1737,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'T':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_scrolldn();
else
while (number[0]--)
@@ -1746,7 +1745,7 @@ u_char *cp;
esc_spt = esc_seq;
break;
case 'X':
- if (number[0] == DEFAULT)
+ if (number[0] == MACH_ATOI_DEFAULT)
kd_erase(1);
else
kd_erase(number[0]);
@@ -1762,37 +1761,6 @@ u_char *cp;
return;
}
-/*
- * kd_atoi:
- *
- * This function converts an ascii string into an integer, and
- * returns DEFAULT if no integer was found. Note that this is why
- * we don't use the regular atio(), because ZERO is ZERO and not
- * the DEFAULT in all cases.
- *
- * input : string
- * output : a number or possibly DEFAULT, and the count of characters
- * consumed by the conversion
- *
- */
-int
-kd_atoi(cp, nump)
-u_char *cp;
-int *nump;
-{
- int number;
- u_char *original;
-
- original = cp;
- for (number = 0; ('0' <= *cp) && (*cp <= '9'); cp++)
- number = (number * 10) + (*cp - '0');
- if (original == cp)
- *nump = DEFAULT;
- else
- *nump = number;
- return(cp - original);
-}
-
void
kd_tab()
{
@@ -2973,6 +2941,7 @@ kdcnprobe(struct consdev *cp)
cp->cn_dev = makedev(maj, unit);
cp->cn_pri = pri;
+ return 0;
}
int
@@ -2995,7 +2964,7 @@ kdcngetc(dev_t dev, int wait)
return kdcnmaygetc();
}
-int
+void
kdcnputc(dev_t dev, int c)
{
if (!kd_initialized)
diff --git a/i386/i386at/kd.h b/i386/i386at/kd.h
index 2e8f8318..9048ce50 100644
--- a/i386/i386at/kd.h
+++ b/i386/i386at/kd.h
@@ -685,4 +685,54 @@ typedef struct {
#define MOUSE_MOTION 4 /* mouse motion */
#define KEYBD_EVENT 5 /* key up/down */
+extern boolean_t kd_isupper (u_char);
+extern boolean_t kd_islower (u_char);
+extern void kd_senddata (unsigned char);
+extern void kd_sendcmd (unsigned char);
+extern void kd_cmdreg_write (int);
+extern void kd_mouse_drain (void);
+extern void set_kd_state (int);
+extern void kd_setleds1 (u_char);
+extern void kd_setleds2 (void);
+extern void cnsetleds (u_char);
+extern void kdreboot (void);
+extern void kd_putc (u_char);
+extern void kd_parseesc (void);
+extern void kd_down (void);
+extern void kd_up (void);
+extern void kd_cr (void);
+extern void kd_tab (void);
+extern void kd_left (void);
+extern void kd_right (void);
+extern void kd_scrollup (void);
+extern void kd_scrolldn (void);
+extern void kd_cls (void);
+extern void kd_home (void);
+extern void kd_insch (int number);
+extern void kd_cltobcur (void);
+extern void kd_cltopcur (void);
+extern void kd_cltoecur (void);
+extern void kd_clfrbcur (void);
+extern void kd_eraseln (void);
+extern void kd_insln (int);
+extern void kd_delln (int);
+extern void kd_delch (int);
+extern void kd_erase (int);
+extern void kd_bellon (void);
+extern void kd_belloff (void *param);
+extern void kdinit (void);
+extern int kdsetkbent (struct kbentry *, int);
+extern int kdgetkbent (struct kbentry *);
+extern int kdsetbell (int, int);
+extern void kd_resend (void);
+extern void kd_handle_ack (void);
+extern int kd_kbd_magic (int);
+extern int kdstate2idx (int, boolean_t);
+extern void kd_parserest (u_char *);
+extern int kdcnmaygetc (void);
+
+extern void kd_slmwd (void *start, int count, int value);
+extern void kd_slmscu (void *from, void *to, int count);
+extern void kd_slmscd (void *from, void *to, int count);
+
#endif /* _KD_H_ */
diff --git a/i386/i386at/kd_event.c b/i386/i386at/kd_event.c
index 76884129..5d8d5637 100644
--- a/i386/i386at/kd_event.c
+++ b/i386/i386at/kd_event.c
@@ -60,6 +60,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <string.h>
#ifdef MACH_KERNEL
+#include <device/ds_routines.h>
#include <device/errno.h>
#include <device/io_req.h>
#else /* MACH_KERNEL */
diff --git a/i386/i386at/kd_mouse.c b/i386/i386at/kd_mouse.c
index 2ad3be22..640209c5 100644
--- a/i386/i386at/kd_mouse.c
+++ b/i386/i386at/kd_mouse.c
@@ -68,8 +68,10 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <sys/types.h>
#include <kern/printf.h>
#ifdef MACH_KERNEL
+#include <device/ds_routines.h>
#include <device/errno.h>
#include <device/io_req.h>
+#include <device/subrs.h>
#else /* MACH_KERNEL */
#include <sys/file.h>
#include <sys/errno.h>
@@ -81,15 +83,17 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <sys/tty.h>
#endif /* MACH_KERNEL */
#include <i386/ipl.h>
+#include <i386/pic.h>
#include <i386/pio.h>
#include <chips/busses.h>
+#include <i386at/com.h>
#include <i386at/kd.h>
#include <i386at/kd_queue.h>
#include <i386at/i8250.h>
#include "kd_mouse.h"
-static int (*oldvect)(); /* old interrupt vector */
+static void (*oldvect)(); /* old interrupt vector */
static int oldunit;
static spl_t oldspl;
extern struct bus_device *cominfo[];
@@ -120,7 +124,7 @@ u_char lastbuttons; /* previous state of mouse buttons */
#define MOUSE_DOWN 0
#define MOUSE_ALL_UP 0x7
-int mouseintr();
+void mouseintr();
void mouse_enqueue();
int mouse_baud = BCNT1200;
@@ -252,7 +256,7 @@ kd_mouse_open(dev, mouse_pic)
int mouse_pic;
{
spl_t s = splhi(); /* disable interrupts */
- extern int kdintr();
+ extern void kdintr();
oldvect = ivect[mouse_pic];
ivect[mouse_pic] = kdintr;
@@ -535,7 +539,7 @@ done:
/*
* mouseintr - Get a byte and pass it up for handling. Called at SPLKD.
*/
-int
+void
mouseintr(unit)
{
unsigned short base_addr = cominfo[unit]->address;
@@ -599,7 +603,7 @@ mouse_handle_byte(ch)
mousebuf[mousebufindex++] = ch;
if (mouse_char_wanted) {
mouse_char_wanted = FALSE;
- wakeup(&mousebuf);
+ wakeup((vm_offset_t)&mousebuf);
}
return;
}
diff --git a/i386/i386at/lpr.c b/i386/i386at/lpr.c
index 348159e9..57f6f857 100644
--- a/i386/i386at/lpr.c
+++ b/i386/i386at/lpr.c
@@ -34,6 +34,7 @@
#include <mach/std_types.h>
#include <sys/types.h>
#include <kern/printf.h>
+#include <kern/mach_clock.h>
#include <sys/time.h>
#include <device/conf.h>
#include <device/errno.h>
@@ -55,20 +56,22 @@
#include <i386/ipl.h>
#include <i386/pio.h>
#include <chips/busses.h>
+#include <i386at/autoconf.h>
#include <i386at/lprreg.h>
-extern void timeout();
-extern void ttrstrt();
/*
* Driver information for auto-configuration stuff.
*/
-int lprprobe(), lprintr(), lprstart(), lprstop();
+int lprprobe();
+void lprstop();
+void lprintr(), lprstart();
void lprattach(struct bus_device *);
#ifdef MACH_KERNEL
-int lprstop(), lprgetstat(), lprsetstat();
+int lprgetstat(), lprsetstat();
#endif /* MACH_KERNEL */
+void lprpr_addr();
struct bus_device *lprinfo[NLPR]; /* ??? */
@@ -201,7 +204,7 @@ lprportdeath(dev, port)
dev_t dev;
mach_port_t port;
{
- return (tty_portdeath(&lpr_tty[minor(dev)], port));
+ return (tty_portdeath(&lpr_tty[minor(dev)], (ipc_port_t)port));
}
io_return_t
@@ -279,7 +282,7 @@ int lprioctl(dev, cmd, addr, mode)
}
#endif /* MACH_KERNEL */
-int lprintr(unit)
+void lprintr(unit)
int unit;
{
register struct tty *tp = &lpr_tty[unit];
@@ -294,7 +297,7 @@ int unit;
lprstart(tp);
}
-int lprstart(tp)
+void lprstart(tp)
struct tty *tp;
{
spl_t s = spltty();
@@ -304,13 +307,13 @@ struct tty *tp;
if (tp->t_state & (TS_TIMEOUT|TS_TTSTOP|TS_BUSY)) {
splx(s);
- return(0);
+ return;
}
if (status & 0x20) {
printf("Printer out of paper!\n");
splx(s);
- return(0);
+ return;
}
if (tp->t_outq.c_cc <= TTLOWAT(tp)) {
@@ -330,7 +333,7 @@ struct tty *tp;
}
if (tp->t_outq.c_cc == 0) {
splx(s);
- return(0);
+ return;
}
#ifdef MACH_KERNEL
nch = getc(&tp->t_outq);
@@ -365,11 +368,11 @@ struct tty *tp;
}
#endif /* MACH_KERNEL */
splx(s);
- return(0);
+ return;
}
#ifdef MACH_KERNEL
-int
+void
lprstop(tp, flags)
register struct tty *tp;
int flags;
@@ -378,7 +381,7 @@ int flags;
tp->t_state |= TS_FLUSH;
}
#else /* MACH_KERNEL */
-int lprstop(tp, flag)
+void lprstop(tp, flag)
struct tty *tp;
{
int s = spltty();
diff --git a/i386/i386at/mem.c b/i386/i386at/mem.c
new file mode 100644
index 00000000..5e51676b
--- /dev/null
+++ b/i386/i386at/mem.c
@@ -0,0 +1,47 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <device/io_req.h>
+#include <i386/model_dep.h>
+
+/* This provides access to any memory that is not main RAM */
+
+/*ARGSUSED*/
+int
+memmmap(dev, off, prot)
+int dev;
+vm_offset_t off;
+vm_prot_t prot;
+{
+ if (off == 0)
+ return 0;
+ else if (off < 0xa0000)
+ return -1;
+ else if (off >= 0x100000 && off < phys_last_addr)
+ return -1;
+ else
+ return i386_btop(off);
+}
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index d3d9ca6c..ca000786 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -34,10 +34,13 @@
#include <string.h>
+#include <device/cons.h>
+
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#include <mach/machine.h>
#include <mach/machine/multiboot.h>
+#include <mach/xen.h>
#include <i386/vm_param.h>
#include <kern/assert.h>
@@ -46,11 +49,30 @@
#include <kern/mach_clock.h>
#include <kern/printf.h>
#include <sys/time.h>
+#include <sys/types.h>
#include <vm/vm_page.h>
+#include <i386/fpu.h>
+#include <i386/gdt.h>
+#include <i386/ktss.h>
+#include <i386/ldt.h>
#include <i386/machspl.h>
+#include <i386/pic.h>
+#include <i386/pit.h>
#include <i386/pmap.h>
#include <i386/proc_reg.h>
#include <i386/locore.h>
+#include <i386/model_dep.h>
+#include <i386at/autoconf.h>
+#include <i386at/idt.h>
+#include <i386at/int_init.h>
+#include <i386at/kd.h>
+#include <i386at/rtc.h>
+#ifdef MACH_XEN
+#include <xen/console.h>
+#include <xen/store.h>
+#include <xen/evt.h>
+#include <xen/xen.h>
+#endif /* MACH_XEN */
/* Location of the kernel's symbol table.
Both of these are 0 if none is available. */
@@ -66,11 +88,21 @@ static vm_offset_t kern_sym_start, kern_sym_end;
vm_offset_t phys_first_addr = 0;
vm_offset_t phys_last_addr;
-/* Virtual address of physical memory, for the kvtophys/phystokv macros. */
-vm_offset_t phys_mem_va;
-
/* A copy of the multiboot info structure passed by the boot loader. */
+#ifdef MACH_XEN
+struct start_info boot_info;
+#ifdef MACH_PSEUDO_PHYS
+unsigned long *mfn_list;
+#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+unsigned long *pfn_list = (void*) PFN_LIST;
+#endif
+#endif /* MACH_PSEUDO_PHYS */
+#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+unsigned long la_shift = VM_MIN_KERNEL_ADDRESS;
+#endif
+#else /* MACH_XEN */
struct multiboot_info boot_info;
+#endif /* MACH_XEN */
/* Command line supplied to kernel. */
char *kernel_cmdline = "";
@@ -79,7 +111,11 @@ char *kernel_cmdline = "";
it gets bumped up through physical memory
that exists and is not occupied by boot gunk.
It is not necessarily page-aligned. */
-static vm_offset_t avail_next = 0x1000; /* XX end of BIOS data area */
+static vm_offset_t avail_next
+#ifndef MACH_HYP
+ = 0x1000 /* XX end of BIOS data area */
+#endif /* MACH_HYP */
+ ;
/* Possibly overestimated amount of available memory
still remaining to be handed to the VM system. */
@@ -96,6 +132,10 @@ void inittodr(); /* forward */
int rebootflag = 0; /* exported to kdintr */
+#if ! MACH_KBD
+boolean_t reboot_on_panic = 1;
+#endif
+
/* XX interrupt stack pointer and highwater mark, for locore.S. */
vm_offset_t int_stack_top, int_stack_high;
@@ -103,6 +143,8 @@ vm_offset_t int_stack_top, int_stack_high;
extern void linux_init(void);
#endif
+boolean_t init_alloc_aligned(vm_size_t size, vm_offset_t *addrp);
+
/*
* Find devices. The system is alive.
*/
@@ -118,6 +160,9 @@ void machine_init(void)
*/
init_fpu();
+#ifdef MACH_HYP
+ hyp_init();
+#else /* MACH_HYP */
#ifdef LINUX_DEV
/*
* Initialize Linux drivers.
@@ -129,16 +174,19 @@ void machine_init(void)
* Find the devices
*/
probeio();
+#endif /* MACH_HYP */
/*
* Get the time
*/
inittodr();
+#ifndef MACH_HYP
/*
* Tell the BIOS not to clear and test memory.
*/
*(unsigned short *)phystokv(0x472) = 0x1234;
+#endif /* MACH_HYP */
/*
* Unmap page 0 to trap NULL references.
@@ -149,8 +197,17 @@ void machine_init(void)
/* Conserve power on processor CPU. */
void machine_idle (int cpu)
{
+#ifdef MACH_HYP
+ hyp_idle();
+#else /* MACH_HYP */
assert (cpu == cpu_number ());
asm volatile ("hlt" : : : "memory");
+#endif /* MACH_HYP */
+}
+
+void machine_relax ()
+{
+ asm volatile ("rep; nop" : : : "memory");
}
/*
@@ -158,9 +215,13 @@ void machine_idle (int cpu)
*/
void halt_cpu(void)
{
+#ifdef MACH_HYP
+ hyp_halt();
+#else /* MACH_HYP */
asm volatile("cli");
while (TRUE)
machine_idle (cpu_number ());
+#endif /* MACH_HYP */
}
/*
@@ -170,10 +231,16 @@ void halt_all_cpus(reboot)
boolean_t reboot;
{
if (reboot) {
+#ifdef MACH_HYP
+ hyp_reboot();
+#endif /* MACH_HYP */
kdreboot();
}
else {
rebootflag = 1;
+#ifdef MACH_HYP
+ hyp_halt();
+#endif /* MACH_HYP */
printf("In tight loop: hit ctl-alt-del to reboot\n");
(void) spl0();
}
@@ -198,32 +265,49 @@ void db_reset_cpu(void)
void
mem_size_init(void)
{
- vm_size_t phys_last_kb;
-
/* Physical memory on all PCs starts at physical address 0.
XX make it a constant. */
phys_first_addr = 0;
- phys_last_kb = 0x400 + boot_info.mem_upper;
+#ifdef MACH_HYP
+ if (boot_info.nr_pages >= 0x100000) {
+ printf("Truncating memory size to 4GiB\n");
+ phys_last_addr = 0xffffffffU;
+ } else
+ phys_last_addr = boot_info.nr_pages * 0x1000;
+#else /* MACH_HYP */
+ /* TODO: support mmap */
+ vm_size_t phys_last_kb = 0x400 + boot_info.mem_upper;
/* Avoid 4GiB overflow. */
- if (phys_last_kb < 0x400 || phys_last_kb >= 0x400000)
- phys_last_kb = 0x400000 - 1;
-
- phys_last_addr = phys_last_kb * 0x400;
- avail_remaining
- = phys_last_addr - (0x100000 - (boot_info.mem_lower * 0x400)
- - 0x1000);
+ if (phys_last_kb < 0x400 || phys_last_kb >= 0x400000) {
+ printf("Truncating memory size to 4GiB\n");
+ phys_last_addr = 0xffffffffU;
+ } else
+ phys_last_addr = phys_last_kb * 0x400;
+#endif /* MACH_HYP */
printf("AT386 boot: physical memory from 0x%x to 0x%x\n",
phys_first_addr, phys_last_addr);
- /* Reserve 1/16 of the memory address space for virtual mappings.
+ /* Reserve 1/6 of the memory address space for virtual mappings.
* Yes, this loses memory. Blame i386. */
- if (phys_last_addr > (VM_MAX_KERNEL_ADDRESS / 16) * 15)
- phys_last_addr = (VM_MAX_KERNEL_ADDRESS / 16) * 15;
+ if (phys_last_addr > ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 6) * 5) {
+ phys_last_addr = ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 6) * 5;
+ printf("Truncating memory size to %dMiB\n", (phys_last_addr - phys_first_addr) / (1024 * 1024));
+ /* TODO Xen: free lost memory */
+ }
phys_first_addr = round_page(phys_first_addr);
phys_last_addr = trunc_page(phys_last_addr);
+
+#ifdef MACH_HYP
+ /* Memory is just contiguous */
+ avail_remaining = phys_last_addr;
+#else /* MACH_HYP */
+ avail_remaining
+ = phys_last_addr - (0x100000 - (boot_info.mem_lower * 0x400)
+ - 0x1000);
+#endif /* MACH_HYP */
}
/*
@@ -239,13 +323,56 @@ i386at_init(void)
/*
* Initialize the PIC prior to any possible call to an spl.
*/
+#ifndef MACH_HYP
picinit();
+#else /* MACH_HYP */
+ hyp_intrinit();
+#endif /* MACH_HYP */
/*
* Find memory size parameters.
*/
mem_size_init();
+#ifdef MACH_XEN
+ kernel_cmdline = (char*) boot_info.cmd_line;
+#else /* MACH_XEN */
+ /* Copy content pointed by boot_info before losing access to it when it
+ * is too far in physical memory. */
+ if (boot_info.flags & MULTIBOOT_CMDLINE) {
+ vm_offset_t addr;
+ int len = strlen ((char*)phystokv(boot_info.cmdline)) + 1;
+ assert(init_alloc_aligned(round_page(len), &addr));
+ kernel_cmdline = (char*) phystokv(addr);
+ memcpy(kernel_cmdline, (char*)phystokv(boot_info.cmdline), len);
+ boot_info.cmdline = addr;
+ }
+
+ if (boot_info.flags & MULTIBOOT_MODS) {
+ struct multiboot_module *m;
+ vm_offset_t addr;
+ int i;
+
+ assert(init_alloc_aligned(round_page(boot_info.mods_count * sizeof(*m)), &addr));
+ m = (void*) phystokv(addr);
+ memcpy(m, (void*) phystokv(boot_info.mods_addr), boot_info.mods_count * sizeof(*m));
+ boot_info.mods_addr = addr;
+
+ for (i = 0; i < boot_info.mods_count; i++) {
+ vm_size_t size = m[i].mod_end - m[i].mod_start;
+ assert(init_alloc_aligned(round_page(size), &addr));
+ memcpy((void*) phystokv(addr), (void*) phystokv(m[i].mod_start), size);
+ m[i].mod_start = addr;
+ m[i].mod_end = addr + size;
+
+ size = strlen((char*) phystokv(m[i].string)) + 1;
+ assert(init_alloc_aligned(round_page(size), &addr));
+ memcpy((void*) phystokv(addr), (void*) phystokv(m[i].string), size);
+ m[i].string = addr;
+ }
+ }
+#endif /* MACH_XEN */
+
/*
* Initialize kernel physical map, mapping the
* region from loadpt to avail_start.
@@ -263,38 +390,102 @@ i386at_init(void)
* Also, set the WP bit so that on 486 or better processors
* page-level write protection works in kernel mode.
*/
- kernel_page_dir[lin2pdenum(0)] =
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS)] =
kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)];
- set_cr3((unsigned)kernel_page_dir);
+#if PAE
+ /* PAE page tables are 2MB only */
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 1] =
+ kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS) + 1];
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 2] =
+ kernel_page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS) + 2];
+#endif /* PAE */
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES);
+#if PAE
+ pmap_set_page_readonly_init(kernel_pmap->pdpbase);
+#endif /* PAE */
+ }
+#endif /* MACH_XEN */
+#if PAE
+ set_cr3((unsigned)_kvtophys(kernel_pmap->pdpbase));
+#ifndef MACH_HYP
+ if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE))
+ panic("CPU doesn't have support for PAE.");
+ set_cr4(get_cr4() | CR4_PAE);
+#endif /* MACH_HYP */
+#else
+ set_cr3((unsigned)_kvtophys(kernel_page_dir));
+#endif /* PAE */
+#ifndef MACH_HYP
if (CPU_HAS_FEATURE(CPU_FEATURE_PGE))
set_cr4(get_cr4() | CR4_PGE);
+ /* already set by Hypervisor */
set_cr0(get_cr0() | CR0_PG | CR0_WP);
+#endif /* MACH_HYP */
flush_instr_queue();
+#ifdef MACH_XEN
+ pmap_clear_bootstrap_pagetable((void *)boot_info.pt_base);
+#endif /* MACH_XEN */
+
+ /* Interrupt stacks are allocated in physical memory,
+ while kernel stacks are allocated in kernel virtual memory,
+ so phys_last_addr serves as a convenient dividing point. */
+ int_stack_high = phystokv(phys_last_addr);
/*
* Initialize and activate the real i386 protected-mode structures.
*/
gdt_init();
idt_init();
+#ifndef MACH_HYP
int_init();
+#endif /* MACH_HYP */
ldt_init();
ktss_init();
/* Get rid of the temporary direct mapping and flush it out of the TLB. */
- kernel_page_dir[lin2pdenum(0)] = 0;
- set_cr3((unsigned)kernel_page_dir);
-
-
+#ifdef MACH_XEN
+#ifdef MACH_PSEUDO_PHYS
+ if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS)]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame 0\n");
+#if PAE
+#ifdef MACH_PSEUDO_PHYS
+ if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 1]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame 1\n");
+#ifdef MACH_PSEUDO_PHYS
+ if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 2]), 0))
+#else /* MACH_PSEUDO_PHYS */
+ if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + 2*INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL))
+#endif /* MACH_PSEUDO_PHYS */
+ printf("couldn't unmap frame 2\n");
+#endif /* PAE */
+ hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS);
+#else /* MACH_XEN */
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS)] = 0;
+#if PAE
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 1] = 0;
+ kernel_page_dir[lin2pdenum(VM_MIN_KERNEL_ADDRESS) + 2] = 0;
+#endif /* PAE */
+#endif /* MACH_XEN */
+ flush_tlb();
+
+#ifdef MACH_XEN
+ hyp_p2m_init();
+#endif /* MACH_XEN */
/* XXX We'll just use the initialization stack we're already running on
as the interrupt stack for now. Later this will have to change,
because the init stack will get freed after bootup. */
asm("movl %%esp,%0" : "=m" (int_stack_top));
-
- /* Interrupt stacks are allocated in physical memory,
- while kernel stacks are allocated in kernel virtual memory,
- so phys_last_addr serves as a convenient dividing point. */
- int_stack_high = phys_last_addr;
}
/*
@@ -304,21 +495,23 @@ i386at_init(void)
void c_boot_entry(vm_offset_t bi)
{
/* Stash the boot_image_info pointer. */
- boot_info = *(struct multiboot_info*)phystokv(bi);
+ boot_info = *(typeof(boot_info)*)phystokv(bi);
int cpu_type;
- /* XXX we currently assume phys_mem_va is always 0 here -
- if it isn't, we must tweak the pointers in the boot_info. */
-
/* Before we do _anything_ else, print the hello message.
If there are no initialized console devices yet,
it will be stored and printed at the first opportunity. */
printf(version);
printf("\n");
- /* Find the kernel command line, if there is one. */
- if (boot_info.flags & MULTIBOOT_CMDLINE)
- kernel_cmdline = (char*)phystokv(boot_info.cmdline);
+#ifdef MACH_XEN
+ printf("Running on %s.\n", boot_info.magic);
+ if (boot_info.flags & SIF_PRIVILEGED)
+ panic("Mach can't run as dom0.");
+#ifdef MACH_PSEUDO_PHYS
+ mfn_list = (void*)boot_info.mfn_list;
+#endif
+#else /* MACH_XEN */
#if MACH_KDB
/*
@@ -341,6 +534,7 @@ void c_boot_entry(vm_offset_t bi)
symtab_size, strtab_size);
}
#endif /* MACH_KDB */
+#endif /* MACH_XEN */
cpu_type = discover_x86_cpu_type ();
@@ -366,8 +560,13 @@ void c_boot_entry(vm_offset_t bi)
*/
if (strstr(kernel_cmdline, "-d ")) {
cninit(); /* need console for debugger */
- Debugger();
+ SoftDebugger("init");
}
+#else
+ if (strstr (kernel_cmdline, "-H "))
+ {
+ reboot_on_panic = 0;
+ }
#endif /* MACH_KDB */
machine_slot[0].is_cpu = TRUE;
@@ -431,7 +630,7 @@ inittodr(void)
new_time.seconds = 0;
new_time.microseconds = 0;
- (void) readtodc(&new_time.seconds);
+ (void) readtodc((u_int *)&new_time.seconds);
{
spl_t s = splhigh();
@@ -456,6 +655,12 @@ boolean_t
init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
{
vm_offset_t addr;
+
+#ifdef MACH_HYP
+ /* There is none */
+ if (!avail_next)
+ avail_next = _kvtophys(boot_info.pt_base) + (boot_info.nr_pt_frames + 3) * 0x1000;
+#else /* MACH_HYP */
extern char start[], end[];
int i;
static int wrapped = 0;
@@ -474,11 +679,14 @@ init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
: 0;
retry:
+#endif /* MACH_HYP */
/* Page-align the start address. */
avail_next = round_page(avail_next);
+#ifndef MACH_HYP
/* Start with memory above 16MB, reserving the low memory for later. */
+ /* Don't care on Xen */
if (!wrapped && phys_last_addr > 16 * 1024*1024)
{
if (avail_next < 16 * 1024*1024)
@@ -494,9 +702,15 @@ init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
wrapped = 1;
}
}
+#endif /* MACH_HYP */
/* Check if we have reached the end of memory. */
- if (avail_next == (wrapped ? 16 * 1024*1024 : phys_last_addr))
+ if (avail_next ==
+ (
+#ifndef MACH_HYP
+ wrapped ? 16 * 1024*1024 :
+#endif /* MACH_HYP */
+ phys_last_addr))
return FALSE;
/* Tentatively assign the current location to the caller. */
@@ -506,6 +720,7 @@ init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
and see where that puts us. */
avail_next += size;
+#ifndef MACH_HYP
/* Skip past the I/O and ROM area. */
if ((avail_next > (boot_info.mem_lower * 0x400)) && (addr < 0x100000))
{
@@ -551,6 +766,7 @@ init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
/* XXX string */
}
}
+#endif /* MACH_HYP */
avail_remaining -= size;
@@ -580,6 +796,11 @@ boolean_t pmap_valid_page(x)
vm_offset_t x;
{
/* XXX is this OK? What does it matter for? */
- return (((phys_first_addr <= x) && (x < phys_last_addr)) &&
- !(((boot_info.mem_lower * 1024) <= x) && (x < 1024*1024)));
+ return (((phys_first_addr <= x) && (x < phys_last_addr))
+#ifndef MACH_HYP
+ && !(
+ ((boot_info.mem_lower * 1024) <= x) &&
+ (x < 1024*1024))
+#endif /* MACH_HYP */
+ );
}
diff --git a/i386/i386at/pic_isa.c b/i386/i386at/pic_isa.c
index 9d791c77..811ee1c5 100644
--- a/i386/i386at/pic_isa.c
+++ b/i386/i386at/pic_isa.c
@@ -30,10 +30,10 @@
/* These interrupts are always present */
-extern intnull(), fpintr(), hardclock(), kdintr();
-extern prtnull();
+extern void intnull(), fpintr(), hardclock(), kdintr();
+extern void prtnull();
-int (*ivect[NINTR])() = {
+void (*ivect[NINTR])() = {
/* 00 */ hardclock, /* always */
#if RCLINE < 0
/* 01 */ kdintr, /* kdintr, ... */
diff --git a/i386/i386at/rtc.c b/i386/i386at/rtc.c
index a66c5a8c..e0a03dee 100644
--- a/i386/i386at/rtc.c
+++ b/i386/i386at/rtc.c
@@ -75,7 +75,7 @@ unsigned char *regs;
first_rtcopen_ever = 0;
}
outb(RTC_ADDR, RTC_D);
- if (inb(RTC_DATA) & RTC_VRT == 0) return(-1);
+ if ((inb(RTC_DATA) & RTC_VRT) == 0) return(-1);
outb(RTC_ADDR, RTC_A);
while (inb(RTC_DATA) & RTC_UIP) /* busy wait */
outb(RTC_ADDR, RTC_A);
diff --git a/i386/i386at/rtc.h b/i386/i386at/rtc.h
index e8d19670..ced39b98 100644
--- a/i386/i386at/rtc.h
+++ b/i386/i386at/rtc.h
@@ -134,4 +134,5 @@ struct rtc_st {
} \
}
-
+extern int readtodc(u_int *tp);
+extern int writetodc(void);
diff --git a/i386/include/mach/i386/cthreads.h b/i386/include/mach/i386/cthreads.h
index e10cda3d..f9755b4a 100644
--- a/i386/include/mach/i386/cthreads.h
+++ b/i386/include/mach/i386/cthreads.h
@@ -37,14 +37,14 @@ typedef volatile int spin_lock_t;
({ register int _u__ ; \
__asm__ volatile("xorl %0, %0; \n\
xchgl %0, %1" \
- : "=&r" (_u__), "=m" (*(p)) ); \
+ : "=&r" (_u__), "=m" (*(p)) : "memory" ); \
0; })
#define spin_try_lock(p)\
(!({ boolean_t _r__; \
__asm__ volatile("movl $1, %0; \n\
xchgl %0, %1" \
- : "=&r" (_r__), "=m" (*(p)) ); \
+ : "=&r" (_r__), "=m" (*(p)) : "memory" ); \
_r__; }))
#define cthread_sp() \
diff --git a/i386/include/mach/i386/fp_reg.h b/i386/include/mach/i386/fp_reg.h
index 6fe7af56..56730555 100644
--- a/i386/include/mach/i386/fp_reg.h
+++ b/i386/include/mach/i386/fp_reg.h
@@ -46,10 +46,30 @@ struct i386_fp_save {
};
struct i386_fp_regs {
- unsigned short fp_reg_word[5][8];
+ unsigned short fp_reg_word[8][5];
/* space for 8 80-bit FP registers */
};
+struct i386_xfp_save {
+ unsigned short fp_control; /* control */
+ unsigned short fp_status; /* status */
+ unsigned short fp_tag; /* register tags */
+ unsigned short fp_opcode; /* opcode of failed instruction */
+ unsigned int fp_eip; /* eip at failed instruction */
+ unsigned short fp_cs; /* cs at failed instruction */
+ unsigned short fp_unused_1;
+ unsigned int fp_dp; /* data address */
+ unsigned short fp_ds; /* data segment */
+ unsigned short fp_unused_2;
+ unsigned int fp_mxcsr; /* MXCSR */
+ unsigned int fp_mxcsr_mask; /* MXCSR_MASK */
+ unsigned char fp_reg_word[8][16];
+ /* space for 8 128-bit FP registers */
+ unsigned char fp_xreg_word[8][16];
+ /* space for 8 128-bit XMM registers */
+ unsigned int padding[56];
+} __attribute__((aligned(16)));
+
/*
* Control register
*/
@@ -104,5 +124,6 @@ struct i386_fp_regs {
#define FP_SOFT 1 /* software FP emulator */
#define FP_287 2 /* 80287 */
#define FP_387 3 /* 80387 or 80486 */
+#define FP_387X 4 /* FXSAVE/RSTOR-capable */
#endif /* _MACH_I386_FP_REG_H_ */
diff --git a/i386/include/mach/i386/thread_status.h b/i386/include/mach/i386/thread_status.h
index cc3dc663..5f20355e 100644
--- a/i386/include/mach/i386/thread_status.h
+++ b/i386/include/mach/i386/thread_status.h
@@ -111,7 +111,7 @@ struct i386_thread_state {
(sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs))
struct i386_float_state {
- int fpkind; /* FP_NO..FP_387 (readonly) */
+ int fpkind; /* FP_NO..FP_387X (readonly) */
int initialized;
unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */
int exc_status; /* exception status (readonly) */
diff --git a/i386/include/mach/i386/vm_param.h b/i386/include/mach/i386/vm_param.h
index a6b1efd5..6d7c5f3c 100644
--- a/i386/include/mach/i386/vm_param.h
+++ b/i386/include/mach/i386/vm_param.h
@@ -66,8 +66,13 @@
#define i386_trunc_page(x) (((unsigned)(x)) & ~(I386_PGBYTES-1))
/* User address spaces are 3GB each,
- starting at virtual and linear address 0. */
-#define VM_MIN_ADDRESS ((vm_offset_t) 0)
-#define VM_MAX_ADDRESS ((vm_offset_t) 0xc0000000)
+ starting at virtual and linear address 0.
+
+ VM_MAX_ADDRESS can be reduced to leave more space for the kernel, but must
+ not be increased to more than 3GB as glibc and hurd servers would not cope
+ with that.
+ */
+#define VM_MIN_ADDRESS (0)
+#define VM_MAX_ADDRESS (0xc0000000UL)
#endif /* _MACH_I386_VM_PARAM_H_ */
diff --git a/i386/include/mach/i386/vm_types.h b/i386/include/mach/i386/vm_types.h
index 7fb1bcb0..d54008ef 100644
--- a/i386/include/mach/i386/vm_types.h
+++ b/i386/include/mach/i386/vm_types.h
@@ -74,6 +74,7 @@ typedef unsigned int uint32;
* e.g. an offset into a virtual memory space.
*/
typedef natural_t vm_offset_t;
+typedef vm_offset_t * vm_offset_array_t;
/*
* A vm_size_t is the proper type for e.g.
diff --git a/i386/include/mach/sa/stdarg.h b/i386/include/mach/sa/stdarg.h
index 01b2dc76..ba0f78a1 100644
--- a/i386/include/mach/sa/stdarg.h
+++ b/i386/include/mach/sa/stdarg.h
@@ -29,6 +29,16 @@
#ifndef _MACH_SA_STDARG_H_
#define _MACH_SA_STDARG_H_
+#if __GNUC__ >= 3
+
+typedef __builtin_va_list va_list;
+
+#define va_start(v,l) __builtin_va_start(v,l)
+#define va_end(v) __builtin_va_end(v)
+#define va_arg(v,l) __builtin_va_arg(v,l)
+
+#else
+
#define __va_size(type) ((sizeof(type)+3) & ~0x3)
#ifndef _VA_LIST_
@@ -43,4 +53,6 @@ typedef char *va_list;
((pvar) += __va_size(type), \
*((type *)((pvar) - __va_size(type))))
+#endif
+
#endif /* _MACH_SA_STDARG_H_ */
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index 539e3f05..3ee6338d 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -77,12 +77,18 @@
#include <vm/vm_user.h>
#include <mach/machine/vm_param.h>
+#include <mach/xen.h>
#include <machine/thread.h>
#include <i386/cpu_number.h>
#include <i386/proc_reg.h>
#include <i386/locore.h>
+#include <i386/model_dep.h>
+#ifdef MACH_PSEUDO_PHYS
+#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = pte_entry?pa_to_ma(pte_entry):0;
+#else /* MACH_PSEUDO_PHYS */
#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
+#endif /* MACH_PSEUDO_PHYS */
/*
* Private data structures.
@@ -142,13 +148,6 @@ char *pv_lock_table; /* pointer to array of bits */
boolean_t pmap_initialized = FALSE;
/*
- * More-specific code provides these;
- * they indicate the total extent of physical memory
- * that we know about and might ever have to manage.
- */
-extern vm_offset_t phys_first_addr, phys_last_addr;
-
-/*
* Range of kernel virtual addresses available for kernel memory mapping.
* Does not include the virtual addresses used to map physical memory 1-1.
* Initialized by pmap_bootstrap.
@@ -302,7 +301,7 @@ lock_data_t pmap_system_lock;
\
/* invalidate our own TLB if pmap is in use */ \
if ((pmap)->cpus_using & cpu_mask) { \
- INVALIDATE_TLB((s), (e)); \
+ INVALIDATE_TLB((pmap), (s), (e)); \
} \
}
@@ -323,7 +322,7 @@ lock_data_t pmap_system_lock;
#define PMAP_UPDATE_TLBS(pmap, s, e) { \
/* invalidate our own TLB if pmap is in use */ \
if ((pmap)->cpus_using) { \
- INVALIDATE_TLB((s), (e)); \
+ INVALIDATE_TLB((pmap), (s), (e)); \
} \
}
@@ -331,9 +330,38 @@ lock_data_t pmap_system_lock;
#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
-#define INVALIDATE_TLB(s, e) { \
+#ifdef MACH_HYP
+#if 1
+#define INVALIDATE_TLB(pmap, s, e) hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL)
+#else
+#define INVALIDATE_TLB(pmap, s, e) do { \
+ if (__builtin_constant_p((e) - (s)) \
+ && (e) - (s) == PAGE_SIZE) \
+ hyp_invlpg((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
+ else \
+ hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
+} while(0)
+#endif
+#else /* MACH_HYP */
+#if 0
+/* It is hard to know when a TLB flush becomes less expensive than a bunch of
+ * invlpgs. But it surely is more expensive than just one invlpg. */
+#define INVALIDATE_TLB(pmap, s, e) { \
+ if (__builtin_constant_p((e) - (s)) \
+ && (e) - (s) == PAGE_SIZE) \
+ invlpg_linear(s); \
+ else \
+ flush_tlb(); \
+}
+#else
+#define INVALIDATE_TLB(pmap, s, e) { \
+ (void) (pmap); \
+ (void) (s); \
+ (void) (e); \
flush_tlb(); \
}
+#endif
+#endif /* MACH_HYP */
#if NCPUS > 1
@@ -503,6 +531,10 @@ vm_offset_t pmap_map_bd(virt, start, end, prot)
register pt_entry_t template;
register pt_entry_t *pte;
int spl;
+#ifdef MACH_XEN
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
template = pa_to_pte(start)
| INTEL_PTE_NCACHE|INTEL_PTE_WTHRU
@@ -517,11 +549,30 @@ vm_offset_t pmap_map_bd(virt, start, end, prot)
pte = pmap_pte(kernel_pmap, virt);
if (pte == PT_ENTRY_NULL)
panic("pmap_map_bd: Invalid kernel address\n");
+#ifdef MACH_XEN
+ update[i].ptr = kv_to_ma(pte);
+ update[i].val = pa_to_ma(template);
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+ i = 0;
+ }
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte_increment_pa(template);
virt += PAGE_SIZE;
start += PAGE_SIZE;
}
+#ifdef MACH_XEN
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_map_bd");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_map_bd\n");
+#endif /* MACH_XEN */
PMAP_READ_UNLOCK(pmap, spl);
return(virt);
}
@@ -568,24 +619,87 @@ void pmap_bootstrap()
* mapped into the kernel address space,
* and extends to a stupid arbitrary limit beyond that.
*/
- kernel_virtual_start = phys_last_addr;
- kernel_virtual_end = phys_last_addr + morevm
- + (phys_last_addr - phys_first_addr) / 15;
+ kernel_virtual_start = phystokv(phys_last_addr);
+ kernel_virtual_end = phystokv(phys_last_addr) + morevm
+ + (phys_last_addr - phys_first_addr);
- if (kernel_virtual_end < phys_last_addr
+ if (kernel_virtual_end < kernel_virtual_start
|| kernel_virtual_end > VM_MAX_KERNEL_ADDRESS)
kernel_virtual_end = VM_MAX_KERNEL_ADDRESS;
/*
* Allocate and clear a kernel page directory.
*/
- kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)pmap_grab_page();
+ /* Note: initial Xen mapping holds at least 512kB free mapped page.
+ * We use that for directly building our linear mapping. */
+#if PAE
+ {
+ vm_offset_t addr;
+ init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr);
+ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(addr);
+ }
+ kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page());
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ WRITE_PTE(&kernel_pmap->pdpbase[i], pa_to_pte(_kvtophys((void *) kernel_pmap->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID);
+ }
+#else /* PAE */
+ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page());
+#endif /* PAE */
{
int i;
for (i = 0; i < NPDES; i++)
kernel_pmap->dirbase[i] = 0;
}
+#ifdef MACH_XEN
+ /*
+ * Xen may only provide as few as 512KB extra bootstrap linear memory,
+ * which is far from enough to map all available memory, so we need to
+ * map more bootstrap linear memory. We here map 1 (resp. 4 for PAE)
+ * other L1 table(s), thus 4MiB extra memory (resp. 8MiB), which is
+ * enough for a pagetable mapping 4GiB.
+ */
+#ifdef PAE
+#define NSUP_L1 4
+#else
+#define NSUP_L1 1
+#endif
+ pt_entry_t *l1_map[NSUP_L1];
+ {
+ pt_entry_t *base = (pt_entry_t*) boot_info.pt_base;
+ int i;
+ int n_l1map;
+#ifdef PAE
+ pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[0]);
+#else /* PAE */
+ pt_entry_t *l2_map = base;
+#endif /* PAE */
+ for (n_l1map = 0, i = lin2pdenum(VM_MIN_KERNEL_ADDRESS); i < NPTES; i++) {
+ if (!(l2_map[i] & INTEL_PTE_VALID)) {
+ struct mmu_update update;
+ int j, n;
+
+ l1_map[n_l1map] = (pt_entry_t*) phystokv(pmap_grab_page());
+ for (j = 0; j < NPTES; j++)
+ l1_map[n_l1map][j] = (((pt_entry_t)pfn_to_mfn((i - lin2pdenum(VM_MIN_KERNEL_ADDRESS)) * NPTES + j)) << PAGE_SHIFT) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ pmap_set_page_readonly_init(l1_map[n_l1map]);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (l1_map[n_l1map])))
+ panic("couldn't pin page %p(%p)", l1_map[n_l1map], (vm_offset_t) kv_to_ma (l1_map[n_l1map]));
+ update.ptr = kv_to_ma(&l2_map[i]);
+ update.val = kv_to_ma(l1_map[n_l1map]) | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ hyp_mmu_update(kv_to_la(&update), 1, kv_to_la(&n), DOMID_SELF);
+ if (n != 1)
+ panic("couldn't complete bootstrap map");
+ /* added the last L1 table, can stop */
+ if (++n_l1map >= NSUP_L1)
+ break;
+ }
+ }
+ }
+#endif /* MACH_XEN */
+
/*
* Allocate and set up the kernel page tables.
*/
@@ -604,37 +718,60 @@ void pmap_bootstrap()
* to allocate new kernel page tables later.
* XX fix this
*/
- for (va = phys_first_addr; va < kernel_virtual_end; )
+ for (va = phystokv(phys_first_addr); va < kernel_virtual_end; )
{
pt_entry_t *pde = kernel_page_dir + lin2pdenum(kvtolin(va));
- pt_entry_t *ptable = (pt_entry_t*)pmap_grab_page();
+ pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page());
pt_entry_t *pte;
/* Initialize the page directory entry. */
- *pde = pa_to_pte((vm_offset_t)ptable)
- | INTEL_PTE_VALID | INTEL_PTE_WRITE | global;
+ WRITE_PTE(pde, pa_to_pte((vm_offset_t)_kvtophys(ptable))
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE);
/* Initialize the page table. */
- for (pte = ptable; (va < phys_last_addr) && (pte < ptable+NPTES); pte++)
+ for (pte = ptable; (va < phystokv(phys_last_addr)) && (pte < ptable+NPTES); pte++)
{
if ((pte - ptable) < ptenum(va))
{
WRITE_PTE(pte, 0);
}
else
+#ifdef MACH_XEN
+ if (va == (vm_offset_t) &hyp_shared_info)
+ {
+ *pte = boot_info.shared_info | INTEL_PTE_VALID | INTEL_PTE_WRITE;
+ va += INTEL_PGBYTES;
+ }
+ else
+#endif /* MACH_XEN */
{
extern char _start[], etext[];
- if ((va >= (vm_offset_t)_start)
+ if (((va >= (vm_offset_t) _start)
&& (va + INTEL_PGBYTES <= (vm_offset_t)etext))
+#ifdef MACH_XEN
+ || (va >= (vm_offset_t) boot_info.pt_base
+ && (va + INTEL_PGBYTES <=
+ (vm_offset_t) ptable + INTEL_PGBYTES))
+#endif /* MACH_XEN */
+ )
{
- WRITE_PTE(pte, pa_to_pte(va)
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
| INTEL_PTE_VALID | global);
}
else
{
- WRITE_PTE(pte, pa_to_pte(va)
- | INTEL_PTE_VALID | INTEL_PTE_WRITE | global);
+#ifdef MACH_XEN
+ int i;
+ for (i = 0; i < NSUP_L1; i++)
+ if (va == (vm_offset_t) l1_map[i])
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | global);
+ if (i == NSUP_L1)
+#endif /* MACH_XEN */
+ WRITE_PTE(pte, pa_to_pte(_kvtophys(va))
+ | INTEL_PTE_VALID | INTEL_PTE_WRITE | global)
+
}
va += INTEL_PGBYTES;
}
@@ -644,6 +781,11 @@ void pmap_bootstrap()
WRITE_PTE(pte, 0);
va += INTEL_PGBYTES;
}
+#ifdef MACH_XEN
+ pmap_set_page_readonly_init(ptable);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn (ptable)))
+ panic("couldn't pin page %p(%p)\n", ptable, (vm_offset_t) kv_to_ma (ptable));
+#endif /* MACH_XEN */
}
}
@@ -651,6 +793,100 @@ void pmap_bootstrap()
soon after we return from here. */
}
+#ifdef MACH_XEN
+/* These are only required because of Xen security policies */
+
+/* Set back a page read write */
+void pmap_set_page_readwrite(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ vm_offset_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for addr %p(%p)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+ if (canon_vaddr != vaddr)
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID | INTEL_PTE_WRITE, UVMF_NONE))
+ panic("couldn't set hiMMU readwrite for paddr %p(%p)\n", canon_vaddr, (vm_offset_t) pa_to_ma (paddr));
+}
+
+/* Set a page read only (so as to pin it for instance) */
+void pmap_set_page_readonly(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+ vm_offset_t paddr = kvtophys(vaddr);
+ vm_offset_t canon_vaddr = phystokv(paddr);
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, (vm_offset_t) pa_to_ma (paddr));
+ }
+ if (canon_vaddr != vaddr &&
+ *pmap_pde(kernel_pmap, canon_vaddr) & INTEL_PTE_VALID) {
+ if (hyp_do_update_va_mapping (kvtolin(canon_vaddr), pa_to_pte (pa_to_ma(paddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set hiMMU readonly for vaddr %p canon_vaddr %p paddr %p (%p)\n", vaddr, canon_vaddr, paddr, (vm_offset_t) pa_to_ma (paddr));
+ }
+}
+
+/* This needs to be called instead of pmap_set_page_readonly as long as RC3
+ * still points to the bootstrap dirbase. */
+void pmap_set_page_readonly_init(void *_vaddr) {
+ vm_offset_t vaddr = (vm_offset_t) _vaddr;
+#if PAE
+ pt_entry_t *pdpbase = (void*) boot_info.pt_base;
+ vm_offset_t dirbase = ptetokv(pdpbase[0]);
+#else
+ vm_offset_t dirbase = boot_info.pt_base;
+#endif
+ struct pmap linear_pmap = {
+ .dirbase = (void*) dirbase,
+ };
+ /* Modify our future kernel map (can't use update_va_mapping for this)... */
+ if (*pmap_pde(kernel_pmap, vaddr) & INTEL_PTE_VALID)
+ if (!hyp_mmu_update_la (kvtolin(vaddr), pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID))
+ panic("couldn't set hiMMU readonly for vaddr %p(%p)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+ /* ... and the bootstrap map. */
+ if (*pmap_pde(&linear_pmap, vaddr) & INTEL_PTE_VALID)
+ if (hyp_do_update_va_mapping (vaddr, pa_to_pte (kv_to_ma(vaddr)) | INTEL_PTE_VALID, UVMF_NONE))
+ panic("couldn't set MMU readonly for vaddr %p(%p)\n", vaddr, (vm_offset_t) kv_to_ma (vaddr));
+}
+
+void pmap_clear_bootstrap_pagetable(pt_entry_t *base) {
+ int i;
+ pt_entry_t *dir;
+ vm_offset_t va = 0;
+#if PAE
+ int j;
+#endif /* PAE */
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base)))
+ panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%p)\n", base, (vm_offset_t) kv_to_ma(base));
+#if PAE
+ for (j = 0; j < PDPNUM; j++)
+ {
+ pt_entry_t pdpe = base[j];
+ if (pdpe & INTEL_PTE_VALID) {
+ dir = (pt_entry_t *) ptetokv(pdpe);
+#else /* PAE */
+ dir = base;
+#endif /* PAE */
+ for (i = 0; i < NPTES; i++) {
+ pt_entry_t pde = dir[i];
+ unsigned long pfn = mfn_to_pfn(atop(pde));
+ void *pgt = (void*) phystokv(ptoa(pfn));
+ if (pde & INTEL_PTE_VALID)
+ hyp_free_page(pfn, pgt);
+ va += NPTES * INTEL_PGBYTES;
+ if (va >= HYP_VIRT_START)
+ break;
+ }
+#if PAE
+ hyp_free_page(atop(_kvtophys(dir)), dir);
+ } else
+ va += NPTES * NPTES * INTEL_PGBYTES;
+ if (va >= HYP_VIRT_START)
+ break;
+ }
+#endif /* PAE */
+ hyp_free_page(atop(_kvtophys(base)), base);
+}
+#endif /* MACH_XEN */
+
void pmap_virtual_space(startp, endp)
vm_offset_t *startp;
vm_offset_t *endp;
@@ -669,7 +905,9 @@ void pmap_init()
register long npages;
vm_offset_t addr;
register vm_size_t s;
+#if NCPUS > 1
int i;
+#endif /* NCPUS > 1 */
/*
* Allocate memory for the pv_head_table and its lock bits,
@@ -803,6 +1041,30 @@ pmap_page_table_page_alloc()
return pa;
}
+#ifdef MACH_XEN
+void pmap_map_mfn(void *_addr, unsigned long mfn) {
+ vm_offset_t addr = (vm_offset_t) _addr;
+ pt_entry_t *pte, *pdp;
+ vm_offset_t ptp;
+ pt_entry_t ma = ((pt_entry_t) mfn) << PAGE_SHIFT;
+ if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) {
+ ptp = phystokv(pmap_page_table_page_alloc());
+ pmap_set_page_readonly((void*) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, pa_to_mfn(ptp)))
+ panic("couldn't pin page %p(%p)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+ pdp = pmap_pde(kernel_pmap, addr);
+ if (!hyp_mmu_update_pte(kv_to_ma(pdp),
+ pa_to_pte(kv_to_ma(ptp)) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %p(%p) to %p(%p)\n",__FILE__,__LINE__,kvtophys((vm_offset_t)pdp),(vm_offset_t) kv_to_ma(pdp), ptp, (vm_offset_t) pa_to_ma(ptp));
+ pte = pmap_pte(kernel_pmap, addr);
+ }
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), ma | INTEL_PTE_VALID | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pte %p(%p) to %p(%p)\n",__FILE__,__LINE__,pte,(vm_offset_t) kv_to_ma(pte), ma, ma_to_pa(ma));
+}
+#endif /* MACH_XEN */
+
/*
* Deallocate a page-table page.
* The page-table page must have all mappings removed,
@@ -859,11 +1121,34 @@ pmap_t pmap_create(size)
panic("pmap_create");
if (kmem_alloc_wired(kernel_map,
- (vm_offset_t *)&p->dirbase, INTEL_PGBYTES)
+ (vm_offset_t *)&p->dirbase, PDPNUM * INTEL_PGBYTES)
!= KERN_SUCCESS)
panic("pmap_create");
- memcpy(p->dirbase, kernel_page_dir, INTEL_PGBYTES);
+ memcpy(p->dirbase, kernel_page_dir, PDPNUM * INTEL_PGBYTES);
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readonly((void*) p->dirbase + i * INTEL_PGBYTES);
+ }
+#endif /* MACH_XEN */
+
+#if PAE
+ if (kmem_alloc_wired(kernel_map,
+ (vm_offset_t *)&p->pdpbase, INTEL_PGBYTES)
+ != KERN_SUCCESS)
+ panic("pmap_create");
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ WRITE_PTE(&p->pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) p->dirbase + i * INTEL_PGBYTES)) | INTEL_PTE_VALID);
+ }
+#ifdef MACH_XEN
+ pmap_set_page_readonly(p->pdpbase);
+#endif /* MACH_XEN */
+#endif /* PAE */
+
p->ref_count = 1;
simple_lock_init(&p->lock);
@@ -921,13 +1206,31 @@ void pmap_destroy(p)
if (m == VM_PAGE_NULL)
panic("pmap_destroy: pte page not in object");
vm_page_lock_queues();
+#ifdef MACH_XEN
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+ panic("pmap_destroy: couldn't unpin page %p(%p)\n", pa, (vm_offset_t) kv_to_ma(pa));
+ pmap_set_page_readwrite((void*) phystokv(pa));
+#endif /* MACH_XEN */
vm_page_free(m);
inuse_ptepages_count--;
vm_page_unlock_queues();
vm_object_unlock(pmap_object);
}
}
- kmem_free(kernel_map, p->dirbase, INTEL_PGBYTES);
+#ifdef MACH_XEN
+ {
+ int i;
+ for (i = 0; i < PDPNUM; i++)
+ pmap_set_page_readwrite((void*) p->dirbase + i * INTEL_PGBYTES);
+ }
+#endif /* MACH_XEN */
+ kmem_free(kernel_map, (vm_offset_t)p->dirbase, PDPNUM * INTEL_PGBYTES);
+#if PAE
+#ifdef MACH_XEN
+ pmap_set_page_readwrite(p->pdpbase);
+#endif /* MACH_XEN */
+ kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES);
+#endif /* PAE */
zfree(pmap_zone, (vm_offset_t) p);
}
@@ -971,6 +1274,10 @@ void pmap_remove_range(pmap, va, spte, epte)
int num_removed, num_unwired;
int pai;
vm_offset_t pa;
+#ifdef MACH_XEN
+ int n, ii = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
#if DEBUG_PTE_PAGE
if (pmap != kernel_pmap)
@@ -999,7 +1306,19 @@ void pmap_remove_range(pmap, va, spte, epte)
register int i = ptes_per_vm_page;
register pt_entry_t *lpte = cpte;
do {
+#ifdef MACH_XEN
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_XEN */
*lpte = 0;
+#endif /* MACH_XEN */
lpte++;
} while (--i > 0);
continue;
@@ -1020,7 +1339,19 @@ void pmap_remove_range(pmap, va, spte, epte)
do {
pmap_phys_attributes[pai] |=
*lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_XEN
+ update[ii].ptr = kv_to_ma(lpte);
+ update[ii].val = 0;
+ ii++;
+ if (ii == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+ ii = 0;
+ }
+#else /* MACH_XEN */
*lpte = 0;
+#endif /* MACH_XEN */
lpte++;
} while (--i > 0);
}
@@ -1066,6 +1397,14 @@ void pmap_remove_range(pmap, va, spte, epte)
}
}
+#ifdef MACH_XEN
+ if (ii > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_remove_range");
+ hyp_mmu_update(kvtolin(&update), ii, kvtolin(&n), DOMID_SELF);
+ if (n != ii)
+ panic("couldn't pmap_remove_range\n");
+#endif /* MACH_XEN */
+
/*
* Update the counts
*/
@@ -1089,17 +1428,13 @@ void pmap_remove(map, s, e)
register pt_entry_t *pde;
register pt_entry_t *spte, *epte;
vm_offset_t l;
+ vm_offset_t _s = s;
if (map == PMAP_NULL)
return;
PMAP_READ_LOCK(map, spl);
- /*
- * Invalidate the translation buffer first
- */
- PMAP_UPDATE_TLBS(map, s, e);
-
pde = pmap_pde(map, s);
while (s < e) {
l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
@@ -1114,6 +1449,7 @@ void pmap_remove(map, s, e)
s = l;
pde++;
}
+ PMAP_UPDATE_TLBS(map, _s, e);
PMAP_READ_UNLOCK(map, spl);
}
@@ -1179,29 +1515,22 @@ void pmap_page_protect(phys, prot)
prev = pv_e = pv_h;
do {
+ register vm_offset_t va;
+
pmap = pv_e->pmap;
/*
* Lock the pmap to block pmap_extract and similar routines.
*/
simple_lock(&pmap->lock);
- {
- register vm_offset_t va;
-
- va = pv_e->va;
- pte = pmap_pte(pmap, va);
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
- /*
- * Consistency checks.
- */
- /* assert(*pte & INTEL_PTE_VALID); XXX */
- /* assert(pte_to_phys(*pte) == phys); */
-
- /*
- * Invalidate TLBs for all CPUs using this mapping.
- */
- PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
- }
+ /*
+ * Consistency checks.
+ */
+ /* assert(*pte & INTEL_PTE_VALID); XXX */
+ /* assert(pte_to_phys(*pte) == phys); */
/*
* Remove the mapping if new protection is NONE
@@ -1220,7 +1549,12 @@ void pmap_page_protect(phys, prot)
do {
pmap_phys_attributes[pai] |=
*pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte++), 0))
+ panic("%s:%d could not clear pte %p\n",__FILE__,__LINE__,pte-1);
+#else /* MACH_XEN */
*pte++ = 0;
+#endif /* MACH_XEN */
} while (--i > 0);
}
@@ -1250,7 +1584,12 @@ void pmap_page_protect(phys, prot)
register int i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WRITE))
+ panic("%s:%d could not enable write on pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_XEN */
*pte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
pte++;
} while (--i > 0);
@@ -1259,6 +1598,7 @@ void pmap_page_protect(phys, prot)
*/
prev = pv_e;
}
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
simple_unlock(&pmap->lock);
@@ -1293,6 +1633,7 @@ void pmap_protect(map, s, e, prot)
register pt_entry_t *spte, *epte;
vm_offset_t l;
int spl;
+ vm_offset_t _s = s;
if (map == PMAP_NULL)
return;
@@ -1327,11 +1668,6 @@ void pmap_protect(map, s, e, prot)
SPLVM(spl);
simple_lock(&map->lock);
- /*
- * Invalidate the translation buffer first
- */
- PMAP_UPDATE_TLBS(map, s, e);
-
pde = pmap_pde(map, s);
while (s < e) {
l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
@@ -1342,15 +1678,41 @@ void pmap_protect(map, s, e, prot)
spte = &spte[ptenum(s)];
epte = &spte[intel_btop(l-s)];
+#ifdef MACH_XEN
+ int n, i = 0;
+ struct mmu_update update[HYP_BATCH_MMU_UPDATES];
+#endif /* MACH_XEN */
+
while (spte < epte) {
- if (*spte & INTEL_PTE_VALID)
+ if (*spte & INTEL_PTE_VALID) {
+#ifdef MACH_XEN
+ update[i].ptr = kv_to_ma(spte);
+ update[i].val = *spte & ~INTEL_PTE_WRITE;
+ i++;
+ if (i == HYP_BATCH_MMU_UPDATES) {
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+ i = 0;
+ }
+#else /* MACH_XEN */
*spte &= ~INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
+ }
spte++;
}
+#ifdef MACH_XEN
+ if (i > HYP_BATCH_MMU_UPDATES)
+ panic("overflowed array in pmap_protect");
+ hyp_mmu_update(kvtolin(&update), i, kvtolin(&n), DOMID_SELF);
+ if (n != i)
+ panic("couldn't pmap_protect\n");
+#endif /* MACH_XEN */
}
s = l;
pde++;
}
+ PMAP_UPDATE_TLBS(map, _s, e);
simple_unlock(&map->lock);
SPLX(spl);
@@ -1388,6 +1750,8 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
if (pmap == PMAP_NULL)
return;
+ if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+ panic("pmap_enter(%p, %p) falls in physical memory area!\n", v, pa);
if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
&& !wired /* hack for io_wire */ ) {
/*
@@ -1405,9 +1769,9 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
* Invalidate the translation buffer,
* then remove the mapping.
*/
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
pmap_remove_range(pmap, v, pte,
pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
}
PMAP_READ_UNLOCK(pmap, spl);
return;
@@ -1451,7 +1815,7 @@ Retry:
*/
PMAP_READ_UNLOCK(pmap, spl);
- ptp = pmap_page_table_page_alloc();
+ ptp = phystokv(pmap_page_table_page_alloc());
/*
* Re-lock the pmap and check that another thread has
@@ -1466,7 +1830,7 @@ Retry:
* Oops...
*/
PMAP_READ_UNLOCK(pmap, spl);
- pmap_page_table_page_dealloc(ptp);
+ pmap_page_table_page_dealloc(kvtophys(ptp));
PMAP_READ_LOCK(pmap, spl);
continue;
}
@@ -1478,9 +1842,20 @@ Retry:
/*XX pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];*/
pdp = pmap_pde(pmap, v);
do {
+#ifdef MACH_XEN
+ pmap_set_page_readonly((void *) ptp);
+ if (!hyp_mmuext_op_mfn (MMUEXT_PIN_L1_TABLE, kv_to_mfn(ptp)))
+ panic("couldn't pin page %p(%p)\n",ptp,(vm_offset_t) kv_to_ma(ptp));
+ if (!hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdp)),
+ pa_to_pte(pa_to_ma(kvtophys(ptp))) | INTEL_PTE_VALID
+ | INTEL_PTE_USER
+ | INTEL_PTE_WRITE))
+ panic("%s:%d could not set pde %p(%p,%p) to %p(%p,%p) %p\n",__FILE__,__LINE__, pdp, kvtophys((vm_offset_t)pdp), (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)pdp)), ptp, kvtophys(ptp), (vm_offset_t) pa_to_ma(kvtophys(ptp)), (vm_offset_t) pa_to_pte(kv_to_ma(ptp)));
+#else /* MACH_XEN */
*pdp = pa_to_pte(ptp) | INTEL_PTE_VALID
| INTEL_PTE_USER
| INTEL_PTE_WRITE;
+#endif /* MACH_XEN */
pdp++;
ptp += INTEL_PGBYTES;
} while (--i > 0);
@@ -1516,15 +1891,20 @@ Retry:
template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
if (wired)
template |= INTEL_PTE_WIRED;
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
i = ptes_per_vm_page;
do {
if (*pte & INTEL_PTE_MOD)
template |= INTEL_PTE_MOD;
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template)))
+ panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte++;
pte_increment_pa(template);
} while (--i > 0);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
}
else {
@@ -1533,17 +1913,12 @@ Retry:
*/
if (*pte) {
/*
- * Invalidate the translation buffer,
- * then remove the mapping.
- */
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
-
- /*
* Don't free the pte page if removing last
* mapping - we will immediately replace it.
*/
pmap_remove_range(pmap, v, pte,
pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
}
if (valid_page(pa)) {
@@ -1630,7 +2005,12 @@ Retry:
template |= INTEL_PTE_WIRED;
i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), pa_to_ma(template))))
+ panic("%s:%d could not set pte %p to %p\n",__FILE__,__LINE__,pte,template);
+#else /* MACH_XEN */
WRITE_PTE(pte, template)
+#endif /* MACH_XEN */
pte++;
pte_increment_pa(template);
} while (--i > 0);
@@ -1685,7 +2065,12 @@ void pmap_change_wiring(map, v, wired)
map->stats.wired_count--;
i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~INTEL_PTE_WIRED)))
+ panic("%s:%d could not wire down pte %p\n",__FILE__,__LINE__,pte);
+#else /* MACH_XEN */
*pte &= ~INTEL_PTE_WIRED;
+#endif /* MACH_XEN */
pte++;
} while (--i > 0);
}
@@ -1771,8 +2156,6 @@ void pmap_collect(p)
* Garbage collect map.
*/
PMAP_READ_LOCK(p, spl);
- PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
-
for (pdp = p->dirbase;
pdp < &p->dirbase[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)];
pdp += ptes_per_vm_page)
@@ -1818,7 +2201,17 @@ void pmap_collect(p)
register int i = ptes_per_vm_page;
register pt_entry_t *pdep = pdp;
do {
+#ifdef MACH_XEN
+ unsigned long pte = *pdep;
+ void *ptable = (void*) ptetokv(pte);
+ if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+ panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+ panic("couldn't unpin page %p(%p)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
+ pmap_set_page_readwrite(ptable);
+#else /* MACH_XEN */
*pdep++ = 0;
+#endif /* MACH_XEN */
} while (--i > 0);
}
@@ -1845,6 +2238,8 @@ void pmap_collect(p)
}
}
}
+ PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
+
PMAP_READ_UNLOCK(p, spl);
return;
@@ -2008,6 +2403,7 @@ phys_attribute_clear(phys, bits)
* There are some mappings.
*/
for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
+ register vm_offset_t va;
pmap = pv_e->pmap;
/*
@@ -2015,35 +2411,32 @@ phys_attribute_clear(phys, bits)
*/
simple_lock(&pmap->lock);
- {
- register vm_offset_t va;
-
- va = pv_e->va;
- pte = pmap_pte(pmap, va);
+ va = pv_e->va;
+ pte = pmap_pte(pmap, va);
#if 0
- /*
- * Consistency checks.
- */
- assert(*pte & INTEL_PTE_VALID);
- /* assert(pte_to_phys(*pte) == phys); */
+ /*
+ * Consistency checks.
+ */
+ assert(*pte & INTEL_PTE_VALID);
+ /* assert(pte_to_phys(*pte) == phys); */
#endif
- /*
- * Invalidate TLBs for all CPUs using this mapping.
- */
- PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
- }
-
/*
* Clear modify or reference bits.
*/
{
register int i = ptes_per_vm_page;
do {
+#ifdef MACH_XEN
+ if (!(hyp_mmu_update_pte(kv_to_ma(pte), *pte & ~bits)))
+ panic("%s:%d could not clear bits %lx from pte %p\n",__FILE__,__LINE__,bits,pte);
+#else /* MACH_XEN */
*pte &= ~bits;
+#endif /* MACH_XEN */
} while (--i > 0);
}
+ PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
simple_unlock(&pmap->lock);
}
}
@@ -2311,7 +2704,8 @@ void process_pmap_updates(my_pmap)
if (pmap == my_pmap ||
pmap == kernel_pmap) {
- INVALIDATE_TLB(update_list_p->item[j].start,
+ INVALIDATE_TLB(pmap,
+ update_list_p->item[j].start,
update_list_p->item[j].end);
}
}
@@ -2397,8 +2791,15 @@ pmap_unmap_page_zero ()
int *pte;
pte = (int *) pmap_pte (kernel_pmap, 0);
+ if (!pte)
+ return;
assert (pte);
+#ifdef MACH_XEN
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), 0))
+ printf("couldn't unmap page 0\n");
+#else /* MACH_XEN */
*pte = 0;
- asm volatile ("movl %%cr3,%%eax; movl %%eax,%%cr3" ::: "eax");
+ INVALIDATE_TLB(kernel_pmap, 0, PAGE_SIZE);
+#endif /* MACH_XEN */
}
#endif /* i386 */
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 0255f5a0..7ba7d2cb 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -42,6 +42,7 @@
#include <mach/machine/vm_param.h>
#include <mach/vm_statistics.h>
#include <mach/kern_return.h>
+#include <mach/vm_prot.h>
#include <i386/proc_reg.h>
/*
@@ -64,21 +65,41 @@
* i386/i486 Page Table Entry
*/
+#if PAE
+typedef unsigned long long pt_entry_t;
+#else /* PAE */
typedef unsigned int pt_entry_t;
+#endif /* PAE */
#define PT_ENTRY_NULL ((pt_entry_t *) 0)
#endif /* __ASSEMBLER__ */
#define INTEL_OFFMASK 0xfff /* offset within page */
+#if PAE
+#define PDPSHIFT 30 /* page directory pointer */
+#define PDPNUM 4 /* number of page directory pointers */
+#define PDPMASK 3 /* mask for page directory pointer index */
+#define PDESHIFT 21 /* page descriptor shift */
+#define PDEMASK 0x1ff /* mask for page descriptor index */
+#define PTESHIFT 12 /* page table shift */
+#define PTEMASK 0x1ff /* mask for page table index */
+#else /* PAE */
+#define PDPNUM 1 /* number of page directory pointers */
#define PDESHIFT 22 /* page descriptor shift */
#define PDEMASK 0x3ff /* mask for page descriptor index */
#define PTESHIFT 12 /* page table shift */
#define PTEMASK 0x3ff /* mask for page table index */
+#endif /* PAE */
/*
* Convert linear offset to page descriptor index
*/
+#if PAE
+/* Making it include the page directory pointer table index too */
+#define lin2pdenum(a) (((a) >> PDESHIFT) & 0x7ff)
+#else
#define lin2pdenum(a) (((a) >> PDESHIFT) & PDEMASK)
+#endif
/*
* Convert page descriptor index to linear address
@@ -91,7 +112,7 @@ typedef unsigned int pt_entry_t;
#define ptenum(a) (((a) >> PTESHIFT) & PTEMASK)
#define NPTES (intel_ptob(1)/sizeof(pt_entry_t))
-#define NPDES (intel_ptob(1)/sizeof(pt_entry_t))
+#define NPDES (PDPNUM * (intel_ptob(1)/sizeof(pt_entry_t)))
/*
* Hardware pte bit definitions (to be used directly on the ptes
@@ -105,12 +126,25 @@ typedef unsigned int pt_entry_t;
#define INTEL_PTE_NCACHE 0x00000010
#define INTEL_PTE_REF 0x00000020
#define INTEL_PTE_MOD 0x00000040
+#ifdef MACH_XEN
+/* Not supported */
+#define INTEL_PTE_GLOBAL 0x00000000
+#else /* MACH_XEN */
#define INTEL_PTE_GLOBAL 0x00000100
+#endif /* MACH_XEN */
#define INTEL_PTE_WIRED 0x00000200
+#ifdef PAE
+#define INTEL_PTE_PFN 0xfffffffffffff000ULL
+#else
#define INTEL_PTE_PFN 0xfffff000
+#endif
#define pa_to_pte(a) ((a) & INTEL_PTE_PFN)
+#ifdef MACH_PSEUDO_PHYS
+#define pte_to_pa(p) ma_to_pa((p) & INTEL_PTE_PFN)
+#else /* MACH_PSEUDO_PHYS */
#define pte_to_pa(p) ((p) & INTEL_PTE_PFN)
+#endif /* MACH_PSEUDO_PHYS */
#define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1)
/*
@@ -123,7 +157,10 @@ typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
/* changed by other processors */
struct pmap {
- pt_entry_t *dirbase; /* page directory pointer register */
+ pt_entry_t *dirbase; /* page directory table */
+#if PAE
+ pt_entry_t *pdpbase; /* page directory pointer table */
+#endif /* PAE */
int ref_count; /* reference count */
decl_simple_lock_data(,lock)
/* lock on map */
@@ -135,7 +172,19 @@ typedef struct pmap *pmap_t;
#define PMAP_NULL ((pmap_t) 0)
-#define set_dirbase(dirbase) set_cr3(dirbase)
+#ifdef MACH_XEN
+extern void pmap_set_page_readwrite(void *addr);
+extern void pmap_set_page_readonly(void *addr);
+extern void pmap_set_page_readonly_init(void *addr);
+extern void pmap_map_mfn(void *addr, unsigned long mfn);
+extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr);
+#endif /* MACH_XEN */
+
+#if PAE
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase))
+#else /* PAE */
+#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase))
+#endif /* PAE */
#if NCPUS > 1
/*
@@ -233,7 +282,7 @@ pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t addr);
/* \
* If this is the kernel pmap, switch to its page tables. \
*/ \
- set_dirbase(kvtophys(tpmap->dirbase)); \
+ set_pmap(tpmap); \
} \
else { \
/* \
@@ -251,7 +300,7 @@ pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t addr);
* No need to invalidate the TLB - the entire user pmap \
* will be invalidated by reloading dirbase. \
*/ \
- set_dirbase(kvtophys(tpmap->dirbase)); \
+ set_pmap(tpmap); \
\
/* \
* Mark that this cpu is using the pmap. \
@@ -330,23 +379,29 @@ pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t addr);
*/
#define PMAP_ACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
kernel_pmap->cpus_using = TRUE; \
}
#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \
+ (void) (my_cpu); \
kernel_pmap->cpus_using = FALSE; \
}
#define PMAP_ACTIVATE_USER(pmap, th, my_cpu) { \
register pmap_t tpmap = (pmap); \
+ (void) (th); \
+ (void) (my_cpu); \
\
- set_dirbase(kvtophys(tpmap->dirbase)); \
+ set_pmap(tpmap); \
if (tpmap != kernel_pmap) { \
tpmap->cpus_using = TRUE; \
} \
}
#define PMAP_DEACTIVATE_USER(pmap, thread, cpu) { \
+ (void) (thread); \
+ (void) (cpu); \
if ((pmap) != kernel_pmap) \
(pmap)->cpus_using = FALSE; \
}
@@ -363,6 +418,33 @@ pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t addr);
#define pmap_attribute(pmap,addr,size,attr,value) \
(KERN_INVALID_ADDRESS)
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ * Allocate the kernel page directory and page tables,
+ * and direct-map all physical memory.
+ * Called with mapping off.
+ */
+extern void pmap_bootstrap(void);
+
+extern void pmap_unmap_page_zero (void);
+
+/*
+ * pmap_zero_page zeros the specified (machine independent) page.
+ */
+extern void pmap_zero_page (vm_offset_t);
+
+/*
+ * pmap_copy_page copies the specified (machine independent) pages.
+ */
+extern void pmap_copy_page (vm_offset_t, vm_offset_t);
+
+/*
+ * kvtophys(addr)
+ *
+ * Convert a kernel virtual address to a physical address
+ */
+extern vm_offset_t kvtophys (vm_offset_t);
+
#endif /* __ASSEMBLER__ */
#endif /* _PMAP_MACHINE_ */
diff --git a/i386/intel/read_fault.h b/i386/intel/read_fault.h
new file mode 100644
index 00000000..8aa3f035
--- /dev/null
+++ b/i386/intel/read_fault.h
@@ -0,0 +1,35 @@
+/*
+ * Kernel read_fault on i386 functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Kernel read_fault on i386 functions.
+ *
+ */
+
+#ifndef _READ_FAULT_H_
+#define _READ_FAULT_H_
+
+#include <mach/std_types.h>
+
+extern kern_return_t intel_read_fault(
+ vm_map_t map,
+ vm_offset_t vaddr);
+
+#endif /* _READ_FAULT_H_ */
diff --git a/i386/ldscript b/i386/ldscript
index 55a12841..f2b90fa2 100644
--- a/i386/ldscript
+++ b/i386/ldscript
@@ -3,49 +3,8 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386",
"elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(_start)
-SEARCH_DIR("/usr/i486-linux-gnu/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib"); SEARCH_DIR("/usr/lib");
SECTIONS
{
- /* Read-only sections, merged into text segment: */
- PROVIDE (__executable_start = 0x08048000); . = 0x08048000 + SIZEOF_HEADERS;
- .interp : { *(.interp) }
- .hash : { *(.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
- .rel.init : { *(.rel.init) }
- .rela.init : { *(.rela.init) }
- .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
- .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
- .rel.fini : { *(.rel.fini) }
- .rela.fini : { *(.rela.fini) }
- .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
- .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
- .rel.data.rel.ro : { *(.rel.data.rel.ro* .rel.gnu.linkonce.d.rel.ro.*) }
- .rela.data.rel.ro : { *(.rela.data.rel.ro* .rela.gnu.linkonce.d.rel.ro.*) }
- .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
- .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
- .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
- .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
- .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
- .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
- .rel.ctors : { *(.rel.ctors) }
- .rela.ctors : { *(.rela.ctors) }
- .rel.dtors : { *(.rel.dtors) }
- .rela.dtors : { *(.rela.dtors) }
- .rel.got : { *(.rel.got) }
- .rela.got : { *(.rela.got) }
- .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
- .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
- .rel.plt : { *(.rel.plt) }
- .rela.plt : { *(.rela.plt) }
- .init :
- {
- KEEP (*(.init))
- } =0x90909090
- .plt : { *(.plt) }
/*
* There are specific requirements about entry points, so we have it
* configurable via `_START': `.text' will begin there and `.text.start' will
@@ -57,10 +16,15 @@ SECTIONS
{
*(.text.start)
*(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.text.unlikely .text.*_unlikely)
KEEP (*(.text.*personality*))
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} =0x90909090
+ .init :
+ {
+ KEEP (*(.init))
+ } =0x90909090
.fini :
{
KEEP (*(.fini))
@@ -68,6 +32,39 @@ SECTIONS
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
+
+ /* Read-only sections, merged into text segment: */
+ PROVIDE (__executable_start = .);
+ .interp : { *(.interp) }
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .rel.init : { *(.rel.init) }
+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+ .rel.fini : { *(.rel.fini) }
+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+ .rel.data.rel.ro : { *(.rel.data.rel.ro* .rel.gnu.linkonce.d.rel.ro.*) }
+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rel.ctors : { *(.rel.ctors) }
+ .rel.dtors : { *(.rel.dtors) }
+ .rel.got : { *(.rel.got) }
+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+ .rel.ifunc : { *(.rel.ifunc) }
+ .rel.plt :
+ {
+ *(.rel.plt)
+ PROVIDE_HIDDEN (__rel_iplt_start = .);
+ *(.rel.iplt)
+ PROVIDE_HIDDEN (__rel_iplt_end = .);
+ }
+ .plt : { *(.plt) *(.iplt) }
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
@@ -75,7 +72,7 @@ SECTIONS
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
- . = ALIGN (0x1000) - ((0x1000 - .) & (0x1000 - 1)); . = DATA_SEGMENT_ALIGN (0x1000, 0x1000);
+ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
@@ -98,8 +95,8 @@ SECTIONS
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
- KEEP (*(.fini_array))
KEEP (*(SORT(.fini_array.*)))
+ KEEP (*(.fini_array))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
@@ -113,32 +110,33 @@ SECTIONS
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
- KEEP (*crtbegin*.o(.ctors))
+ KEEP (*crtbegin.o(.ctors))
+ KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
- KEEP (*(EXCLUDE_FILE (*crtend*.o ) .ctors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
- KEEP (*crtbegin*.o(.dtors))
- KEEP (*(EXCLUDE_FILE (*crtend*.o ) .dtors))
+ KEEP (*crtbegin.o(.dtors))
+ KEEP (*crtbegin?.o(.dtors))
+ KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
- .got : { *(.got) }
+ .got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (12, .);
- .got.plt : { *(.got.plt) }
+ .got.plt : { *(.got.plt) *(.igot.plt) }
.data :
{
*(.data .data.* .gnu.linkonce.d.*)
- KEEP (*(.gnu.linkonce.d.*personality*))
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
@@ -193,5 +191,9 @@ SECTIONS
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
- /DISCARD/ : { *(.note.GNU-stack) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+ /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}
diff --git a/i386/linux/dev/include/linux/autoconf.h b/i386/linux/dev/include/linux/autoconf.h
index 037829b4..75ff2aad 100644
--- a/i386/linux/dev/include/linux/autoconf.h
+++ b/i386/linux/dev/include/linux/autoconf.h
@@ -55,8 +55,8 @@
#undef CONFIG_BLK_DEV_IDEFLOPPY
#undef CONFIG_BLK_DEV_IDESCSI
#undef CONFIG_BLK_DEV_IDE_PCMCIA
-#define CONFIG_BLK_DEV_CMD640 1
-#define CONFIG_BLK_DEV_CMD640_ENHANCED 1
+#undef CONFIG_BLK_DEV_CMD640
+#undef CONFIG_BLK_DEV_CMD640_ENHANCED
#define CONFIG_BLK_DEV_RZ1000 1
#define CONFIG_BLK_DEV_TRITON 1
#undef CONFIG_IDE_CHIPSETS
diff --git a/i386/xen/Makefrag.am b/i386/xen/Makefrag.am
new file mode 100644
index 00000000..b15b7db1
--- /dev/null
+++ b/i386/xen/Makefrag.am
@@ -0,0 +1,33 @@
+# Makefile fragment for the ix86 specific part of the Xen platform.
+
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+#
+# Xen support.
+#
+
+libkernel_a_SOURCES += \
+ i386/xen/xen.c \
+ i386/xen/xen_locore.S \
+ i386/xen/xen_boothdr.S
+
+
+if PLATFORM_xen
+gnumach_LINKFLAGS += \
+ --defsym _START=0x20000000 \
+ -T '$(srcdir)'/i386/ldscript
+endif
diff --git a/i386/xen/xen.c b/i386/xen/xen.c
new file mode 100644
index 00000000..aa3c2cc8
--- /dev/null
+++ b/i386/xen/xen.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <kern/printf.h>
+#include <kern/debug.h>
+
+#include <mach/machine/eflags.h>
+#include <machine/thread.h>
+#include <machine/ipl.h>
+
+#include <machine/model_dep.h>
+
+unsigned long cr3;
+
+struct failsafe_callback_regs {
+ unsigned int ds;
+ unsigned int es;
+ unsigned int fs;
+ unsigned int gs;
+ unsigned int ip;
+ unsigned int cs_and_mask;
+ unsigned int flags;
+};
+
+void hyp_failsafe_c_callback(struct failsafe_callback_regs *regs) {
+ printf("Fail-Safe callback!\n");
+ printf("IP: %08X CS: %4X DS: %4X ES: %4X FS: %4X GS: %4X FLAGS %08X MASK %04X\n", regs->ip, regs->cs_and_mask & 0xffff, regs->ds, regs->es, regs->fs, regs->gs, regs->flags, regs->cs_and_mask >> 16);
+ panic("failsafe");
+}
+
+extern void clock_interrupt();
+extern void return_to_iret;
+
+void hypclock_machine_intr(int old_ipl, void *ret_addr, struct i386_interrupt_state *regs, unsigned64_t delta) {
+ if (ret_addr == &return_to_iret) {
+ clock_interrupt(delta/1000, /* usec per tick */
+ (regs->efl & EFL_VM) || /* user mode */
+ ((regs->cs & 0x02) != 0), /* user mode */
+ old_ipl == SPL0); /* base priority */
+ } else
+ clock_interrupt(delta/1000, FALSE, FALSE);
+}
+
+void hyp_p2m_init(void) {
+ unsigned long nb_pfns = atop(phys_last_addr);
+#ifdef MACH_PSEUDO_PHYS
+#define P2M_PAGE_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+ unsigned long *l3 = (unsigned long *)phystokv(pmap_grab_page()), *l2 = NULL;
+ unsigned long i;
+
+ for (i = 0; i < (nb_pfns + P2M_PAGE_ENTRIES) / P2M_PAGE_ENTRIES; i++) {
+ if (!(i % P2M_PAGE_ENTRIES)) {
+ l2 = (unsigned long *) phystokv(pmap_grab_page());
+ l3[i / P2M_PAGE_ENTRIES] = kv_to_mfn(l2);
+ }
+ l2[i % P2M_PAGE_ENTRIES] = kv_to_mfn(&mfn_list[i * P2M_PAGE_ENTRIES]);
+ }
+
+ hyp_shared_info.arch.pfn_to_mfn_frame_list_list = kv_to_mfn(l3);
+#endif
+ hyp_shared_info.arch.max_pfn = nb_pfns;
+}
diff --git a/i386/xen/xen_boothdr.S b/i386/xen/xen_boothdr.S
new file mode 100644
index 00000000..604e13b6
--- /dev/null
+++ b/i386/xen/xen_boothdr.S
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <xen/public/elfnote.h>
+
+.section __xen_guest
+ .ascii "GUEST_OS=GNU Mach"
+ .ascii ",GUEST_VERSION=1.3"
+ .ascii ",XEN_VER=xen-3.0"
+ .ascii ",VIRT_BASE=0x20000000"
+ .ascii ",ELF_PADDR_OFFSET=0x20000000"
+ .ascii ",HYPERCALL_PAGE=0x2"
+#if PAE
+ .ascii ",PAE=yes"
+#else
+ .ascii ",PAE=no"
+#endif
+ .ascii ",LOADER=generic"
+#ifndef MACH_PSEUDO_PHYS
+ .ascii ",FEATURES=!auto_translated_physmap"
+#endif
+ .byte 0
+
+/* Macro taken from linux/include/linux/elfnote.h */
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz "name" ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection ;
+
+ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "GNU Mach")
+ ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "1.3")
+ ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
+ ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, _START)
+ ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, _START)
+ ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, start)
+ ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypcalls)
+#if PAE
+ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
+#else
+ ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
+#endif
+ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, ""
+#ifndef MACH_PSEUDO_PHYS
+ "!auto_translated_physmap"
+#endif
+ )
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386/i386asm.h>
+
+ .text
+ .globl gdt, ldt
+ .globl start, _start, gdt
+start:
+_start:
+
+ /* Switch to our own interrupt stack. */
+ movl $(_intstack+INTSTACK_SIZE),%eax
+ movl %eax,%esp
+
+ /* Reset EFLAGS to a known state. */
+ pushl $0
+ popf
+
+ /* Push the start_info pointer to be the second argument. */
+ subl $KERNELBASE,%esi
+ pushl %esi
+
+ /* Fix ifunc entries */
+ movl $__rel_iplt_start,%esi
+ movl $__rel_iplt_end,%edi
+iplt_cont:
+ cmpl %edi,%esi
+ jae iplt_done
+ movl (%esi),%ebx /* r_offset */
+ movb 4(%esi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movl %eax,(%ebx) /* fixed address */
+iplt_next:
+ addl $8,%esi
+ jmp iplt_cont
+iplt_done:
+
+ /* Jump into C code. */
+ call EXT(c_boot_entry)
+
+/* Those need to be aligned on page boundaries. */
+.global hyp_shared_info, hypcalls
+
+ .org (start + 0x1000)
+hyp_shared_info:
+ .org hyp_shared_info + 0x1000
+
+/* Labels just for debuggers */
+#define hypcall(name, n) \
+ .org hypcalls + n*32 ; \
+__hyp_##name:
+
+hypcalls:
+ hypcall(set_trap_table, 0)
+ hypcall(mmu_update, 1)
+ hypcall(set_gdt, 2)
+ hypcall(stack_switch, 3)
+ hypcall(set_callbacks, 4)
+ hypcall(fpu_taskswitch, 5)
+ hypcall(sched_op_compat, 6)
+ hypcall(platform_op, 7)
+ hypcall(set_debugreg, 8)
+ hypcall(get_debugreg, 9)
+ hypcall(update_descriptor, 10)
+ hypcall(memory_op, 12)
+ hypcall(multicall, 13)
+ hypcall(update_va_mapping, 14)
+ hypcall(set_timer_op, 15)
+ hypcall(event_channel_op_compat, 16)
+ hypcall(xen_version, 17)
+ hypcall(console_io, 18)
+ hypcall(physdev_op_compat, 19)
+ hypcall(grant_table_op, 20)
+ hypcall(vm_assist, 21)
+ hypcall(update_va_mapping_otherdomain, 22)
+ hypcall(iret, 23)
+ hypcall(vcpu_op, 24)
+ hypcall(set_segment_base, 25)
+ hypcall(mmuext_op, 26)
+ hypcall(acm_op, 27)
+ hypcall(nmi_op, 28)
+ hypcall(sched_op, 29)
+ hypcall(callback_op, 30)
+ hypcall(xenoprof_op, 31)
+ hypcall(event_channel_op, 32)
+ hypcall(physdev_op, 33)
+ hypcall(hvm_op, 34)
+ hypcall(sysctl, 35)
+ hypcall(domctl, 36)
+ hypcall(kexec_op, 37)
+
+ hypcall(arch_0, 48)
+ hypcall(arch_1, 49)
+ hypcall(arch_2, 50)
+ hypcall(arch_3, 51)
+ hypcall(arch_4, 52)
+ hypcall(arch_5, 53)
+ hypcall(arch_6, 54)
+ hypcall(arch_7, 55)
+
+ .org hypcalls + 0x1000
+
+gdt:
+ .org gdt + 0x1000
+
+ldt:
+ .org ldt + 0x1000
+
+stack:
+ .long _intstack+INTSTACK_SIZE,0xe021
+ .comm _intstack,INTSTACK_SIZE
+
diff --git a/i386/xen/xen_locore.S b/i386/xen/xen_locore.S
new file mode 100644
index 00000000..51f823f2
--- /dev/null
+++ b/i386/xen/xen_locore.S
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386asm.h>
+#include <i386/cpu_number.h>
+#include <i386/xen.h>
+
+ .data 2
+int_active:
+ .long 0
+
+
+ .text
+ .globl hyp_callback, hyp_failsafe_callback
+ P2ALIGN(TEXT_ALIGN)
+hyp_callback:
+ pushl %eax
+ jmp EXT(all_intrs)
+
+ENTRY(interrupt)
+ incl int_active /* currently handling interrupts */
+ call EXT(hyp_c_callback) /* call generic interrupt routine */
+ decl int_active /* stopped handling interrupts */
+ sti
+ ret
+
+/* FIXME: if we're _very_ unlucky, we may be re-interrupted, filling stack
+ *
+ * Far from trivial, see mini-os. That said, maybe we could just, before poping
+ * everything (which is _not_ destructive), save sp into a known place and use
+ * it+jmp back?
+ *
+ * Mmm, there seems to be an iret hypcall that does exactly what we want:
+ * perform iret, and if IF is set, clear the interrupt mask.
+ */
+
+/* Pfff, we have to check pending interrupts ourselves. Some other DomUs just make an hypercall for retriggering the irq. Not sure it's really easier/faster */
+ENTRY(hyp_sti)
+ pushl %ebp
+ movl %esp, %ebp
+_hyp_sti:
+ movb $0,hyp_shared_info+CPU_CLI /* Enable interrupts */
+ cmpl $0,int_active /* Check whether we were already checking pending interrupts */
+ jz 0f
+ popl %ebp
+ ret /* Already active, just return */
+0:
+ /* Not active, check pending interrupts by hand */
+ /* no memory barrier needed on x86 */
+ cmpb $0,hyp_shared_info+CPU_PENDING
+ jne 0f
+ popl %ebp
+ ret
+0:
+ movb $0xff,hyp_shared_info+CPU_CLI
+1:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ incl int_active /* currently handling interrupts */
+
+ pushl $0
+ pushl $0
+ call EXT(hyp_c_callback)
+ popl %edx
+ popl %edx
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ decl int_active /* stopped handling interrupts */
+ cmpb $0,hyp_shared_info+CPU_PENDING
+ jne 1b
+ jmp _hyp_sti
+
+/* Hypervisor failed to reload segments. Dump them. */
+hyp_failsafe_callback:
+#if 1
+ /* load sane segments */
+ mov %ss, %ax
+ mov %ax, %ds
+ mov %ax, %es
+ mov %ax, %fs
+ mov %ax, %gs
+ push %esp
+ call EXT(hyp_failsafe_c_callback)
+#else
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ iret
+#endif
diff --git a/include/device/net_status.h b/include/device/net_status.h
index 5131ef53..9ab95b96 100644
--- a/include/device/net_status.h
+++ b/include/device/net_status.h
@@ -72,6 +72,7 @@ struct net_status {
#define NET_DSTADDR (('n'<<16) + 3)
+#define NET_FLAGS (('n'<<16) + 4)
/*
* Input packet filter definition
diff --git a/include/mach/mach4.defs b/include/mach/mach4.defs
index e4f363fc..114edf4e 100644
--- a/include/mach/mach4.defs
+++ b/include/mach/mach4.defs
@@ -79,4 +79,34 @@ skip /* pc_sampling reserved 1*/;
skip /* pc_sampling reserved 2*/;
skip /* pc_sampling reserved 3*/;
skip /* pc_sampling reserved 4*/;
+
+#else
+
+skip; /* task_enable_pc_sampling */
+skip; /* task_disable_pc_sampling */
+skip; /* task_get_sampled_pcs */
+skip; /* thread_enable_pc_sampling */
+skip; /* thread_disable_pc_sampling */
+skip; /* thread_get_sampled_pcs */
+
+skip /* pc_sampling reserved 1*/;
+skip /* pc_sampling reserved 2*/;
+skip /* pc_sampling reserved 3*/;
+skip /* pc_sampling reserved 4*/;
+
#endif
+
+
+/* Create a new proxy memory object from [START;START+LEN) in the
+ given OBJECT at OFFSET in the new object with the maximum
+ protection MAX_PROTECTION and return it in *PORT. */
+type vm_offset_array_t = array[*:1024] of vm_offset_t;
+routine memory_object_create_proxy(
+ task : ipc_space_t;
+ max_protection : vm_prot_t;
+ object : memory_object_array_t =
+ array[*:1024] of memory_object_t;
+ offset : vm_offset_array_t;
+ start : vm_offset_array_t;
+ len : vm_offset_array_t;
+ out proxy : mach_port_t);
diff --git a/include/mach/memory_object.h b/include/mach/memory_object.h
index b4dd71f7..f281f04d 100644
--- a/include/mach/memory_object.h
+++ b/include/mach/memory_object.h
@@ -46,6 +46,9 @@ typedef mach_port_t memory_object_t;
/* the object to map; used by the */
/* kernel to retrieve or store data */
+typedef mach_port_t * memory_object_array_t;
+ /* should be memory_object_t * */
+
typedef mach_port_t memory_object_control_t;
/* Provided to a memory manager; ... */
/* used to control a memory object */
diff --git a/include/mach/xen.h b/include/mach/xen.h
new file mode 100644
index 00000000..f1d9e418
--- /dev/null
+++ b/include/mach/xen.h
@@ -0,0 +1,85 @@
+
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _MACH_XEN_H
+#define _MACH_XEN_H
+#ifdef MACH_XEN
+#include <sys/types.h>
+#include <xen/public/xen.h>
+#include <i386/vm_param.h>
+
+extern struct start_info boot_info;
+
+extern volatile struct shared_info hyp_shared_info;
+
+/* Memory translations */
+
+/* pa are physical addresses, from 0 to size of memory */
+/* ma are machine addresses, i.e. _real_ hardware adresses */
+/* la are linear addresses, i.e. without segmentation */
+
+/* This might also be useful out of Xen */
+#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+extern unsigned long la_shift;
+#else
+#define la_shift LINEAR_MIN_KERNEL_ADDRESS
+#endif
+#define la_to_pa(a) ((vm_offset_t)(((vm_offset_t)(a)) - la_shift))
+#define pa_to_la(a) ((vm_offset_t)(((vm_offset_t)(a)) + la_shift))
+
+#define kv_to_la(a) pa_to_la(_kvtophys(a))
+#define la_to_kv(a) phystokv(la_to_pa(a))
+
+#ifdef MACH_PSEUDO_PHYS
+#if PAE
+#define PFN_LIST MACH2PHYS_VIRT_START_PAE
+#else
+#define PFN_LIST MACH2PHYS_VIRT_START_NONPAE
+#endif
+#if VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS
+extern unsigned long *pfn_list;
+#else
+#define pfn_list ((unsigned long *) PFN_LIST)
+#endif
+#define mfn_to_pfn(n) (pfn_list[n])
+
+extern unsigned long *mfn_list;
+#define pfn_to_mfn(n) (mfn_list[n])
+#else
+#define mfn_to_pfn(n) (n)
+#define pfn_to_mfn(n) (n)
+#endif /* MACH_PSEUDO_PHYS */
+
+#define pa_to_mfn(a) (pfn_to_mfn(atop(a)))
+#ifdef PAE
+#define pa_to_ma(a) ({ vm_offset_t __a = (vm_offset_t) (a); (((pt_entry_t) pa_to_mfn(__a)) << PAGE_SHIFT) | (__a & PAGE_MASK); })
+#define ma_to_pa(a) ({ pt_entry_t __a = (pt_entry_t) (a); (mfn_to_pfn(__a >> PAGE_SHIFT) << PAGE_SHIFT) | (__a & PAGE_MASK); })
+#else
+#define pa_to_ma(a) ({ vm_offset_t __a = (vm_offset_t) (a); ptoa(pa_to_mfn(__a)) | (__a & PAGE_MASK); })
+#define ma_to_pa(a) ({ vm_offset_t __a = (vm_offset_t) (a); (mfn_to_pfn(atop((__a))) << PAGE_SHIFT) | (__a & PAGE_MASK); })
+#endif
+
+#define kv_to_mfn(a) pa_to_mfn(_kvtophys(a))
+#define kv_to_ma(a) pa_to_ma(_kvtophys(a))
+#define mfn_to_kv(mfn) phystokv(ptoa(mfn_to_pfn(mfn)))
+
+#include <machine/xen.h>
+
+#endif /* MACH_XEN */
+#endif /* _MACH_XEN_H */
diff --git a/include/string.h b/include/string.h
index 0d805ea3..34423dff 100644
--- a/include/string.h
+++ b/include/string.h
@@ -30,6 +30,8 @@
extern void *memcpy (void *dest, const void *src, size_t n);
+extern int *memcmp (const void *s1, const void *s2, size_t n);
+
extern void *memset (void *s, int c, size_t n);
extern char *strchr (const char *s, int c);
diff --git a/include/sys/types.h b/include/sys/types.h
index 6973f892..d79e077c 100644
--- a/include/sys/types.h
+++ b/include/sys/types.h
@@ -58,6 +58,20 @@ typedef unsigned int time_t;
#define RAND_MAX 0x7fffffff
+/* Posix types */
+typedef signed8_t int8_t;
+typedef unsigned8_t uint8_t;
+typedef unsigned8_t u_int8_t;
+typedef signed16_t int16_t;
+typedef unsigned16_t uint16_t;
+typedef unsigned16_t u_int16_t;
+typedef signed32_t int32_t;
+typedef unsigned32_t uint32_t;
+typedef unsigned32_t u_int32_t;
+typedef signed64_t int64_t;
+typedef unsigned64_t uint64_t;
+typedef unsigned64_t u_int64_t;
+
/* Symbols not allowed by POSIX */
#ifndef _POSIX_SOURCE
@@ -70,10 +84,6 @@ typedef unsigned short u_short; /* unsigned short */
typedef unsigned int u_int; /* unsigned int */
typedef unsigned long u_long; /* unsigned long */
-typedef struct _quad_ {
- unsigned int val[2]; /* 2 32-bit values make... */
-} quad; /* an 8-byte item */
-
typedef unsigned int daddr_t; /* disk address */
#define major(i) (((i) >> 8) & 0xFF)
diff --git a/ipc/ipc_kmsg.c b/ipc/ipc_kmsg.c
index 5ad8d61a..a12c9476 100644
--- a/ipc/ipc_kmsg.c
+++ b/ipc/ipc_kmsg.c
@@ -41,13 +41,16 @@
#include <mach/kern_return.h>
#include <mach/message.h>
#include <mach/port.h>
+#include <machine/locore.h>
#include <kern/assert.h>
#include <kern/kalloc.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <ipc/port.h>
#include <ipc/ipc_entry.h>
+#include <ipc/ipc_hash.h>
#include <ipc/ipc_kmsg.h>
#include <ipc/ipc_thread.h>
#include <ipc/ipc_marequest.h>
@@ -59,6 +62,8 @@
#include <ipc/ipc_machdep.h>
+#include <device/net_io.h>
+
#if MACH_KDB
#include <ddb/db_output.h>
#endif
@@ -526,7 +531,7 @@ ipc_kmsg_get(msg, size, kmsgp)
ikm_init(kmsg, size);
}
- if (copyinmsg((char *) msg, (char *) &kmsg->ikm_header, size)) {
+ if (copyinmsg(msg, &kmsg->ikm_header, size)) {
ikm_free(kmsg);
return MACH_SEND_INVALID_DATA;
}
@@ -596,7 +601,7 @@ ipc_kmsg_put(msg, kmsg, size)
ikm_check_initialized(kmsg, kmsg->ikm_size);
- if (copyoutmsg((char *) &kmsg->ikm_header, (char *) msg, size))
+ if (copyoutmsg(&kmsg->ikm_header, msg, size))
mr = MACH_RCV_INVALID_DATA;
else
mr = MACH_MSG_SUCCESS;
diff --git a/ipc/ipc_mqueue.h b/ipc/ipc_mqueue.h
index 6421aefd..ef0f9425 100644
--- a/ipc/ipc_mqueue.h
+++ b/ipc/ipc_mqueue.h
@@ -66,6 +66,9 @@ ipc_mqueue_changed(ipc_mqueue_t, mach_msg_return_t);
extern mach_msg_return_t
ipc_mqueue_send(ipc_kmsg_t, mach_msg_option_t, mach_msg_timeout_t);
+extern mach_msg_return_t
+ipc_mqueue_copyin(ipc_space_t, mach_port_t, ipc_mqueue_t *, ipc_object_t *);
+
#define IMQ_NULL_CONTINUE ((void (*)()) 0)
extern mach_msg_return_t
diff --git a/ipc/ipc_notify.c b/ipc/ipc_notify.c
index 6a3d3818..d06346ea 100644
--- a/ipc/ipc_notify.c
+++ b/ipc/ipc_notify.c
@@ -264,7 +264,7 @@ ipc_notify_port_deleted(port, name)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped port-deleted (0x%08x, 0x%x)\n", port, name);
+ printf("dropped port-deleted (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
@@ -298,7 +298,7 @@ ipc_notify_msg_accepted(port, name)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped msg-accepted (0x%08x, 0x%x)\n", port, name);
+ printf("dropped msg-accepted (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
@@ -335,7 +335,7 @@ ipc_notify_port_destroyed(port, right)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped port-destroyed (0x%08x, 0x%08x)\n",
+ printf("dropped port-destroyed (0x%p, 0x%p)\n",
port, right);
ipc_port_release_sonce(port);
ipc_port_release_receive(right);
@@ -371,7 +371,7 @@ ipc_notify_no_senders(port, mscount)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped no-senders (0x%08x, %u)\n", port, mscount);
+ printf("dropped no-senders (0x%p, %u)\n", port, mscount);
ipc_port_release_sonce(port);
return;
}
@@ -404,7 +404,7 @@ ipc_notify_send_once(port)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped send-once (0x%08x)\n", port);
+ printf("dropped send-once (0x%p)\n", port);
ipc_port_release_sonce(port);
return;
}
@@ -437,7 +437,7 @@ ipc_notify_dead_name(port, name)
kmsg = ikm_alloc(sizeof *n);
if (kmsg == IKM_NULL) {
- printf("dropped dead-name (0x%08x, 0x%x)\n", port, name);
+ printf("dropped dead-name (0x%p, 0x%x)\n", port, name);
ipc_port_release_sonce(port);
return;
}
diff --git a/ipc/mach_debug.c b/ipc/mach_debug.c
index 9854cec0..28dd6935 100644
--- a/ipc/mach_debug.c
+++ b/ipc/mach_debug.c
@@ -112,7 +112,7 @@ host_ipc_hash_info(
mach_msg_type_number_t *countp)
{
vm_offset_t addr;
- vm_size_t size;
+ vm_size_t size = 0; /* Suppress gcc warning */
hash_info_bucket_t *info;
unsigned int potential, actual;
kern_return_t kr;
@@ -277,11 +277,11 @@ mach_port_space_info(
ipc_info_name_t *table_info;
unsigned int table_potential, table_actual;
vm_offset_t table_addr;
- vm_size_t table_size;
+ vm_size_t table_size = 0; /* Suppress gcc warning */
ipc_info_tree_name_t *tree_info;
unsigned int tree_potential, tree_actual;
vm_offset_t tree_addr;
- vm_size_t tree_size;
+ vm_size_t tree_size = 0; /* Suppress gcc warning */
ipc_tree_entry_t tentry;
ipc_entry_t table;
ipc_entry_num_t tsize;
diff --git a/ipc/mach_msg.c b/ipc/mach_msg.c
index e6bcf613..43ae918a 100644
--- a/ipc/mach_msg.c
+++ b/ipc/mach_msg.c
@@ -222,8 +222,8 @@ mach_msg_receive(msg, option, rcv_size, rcv_name, time_out, notify)
assert(real_size > rcv_size);
- (void) copyout((vm_offset_t) &real_size,
- (vm_offset_t) &msg->msgh_size,
+ (void) copyout(&real_size,
+ &msg->msgh_size,
sizeof(mach_msg_size_t));
}
@@ -313,8 +313,8 @@ mach_msg_receive_continue(void)
assert(real_size > rcv_size);
- (void) copyout((vm_offset_t) &real_size,
- (vm_offset_t) &msg->msgh_size,
+ (void) copyout(&real_size,
+ &msg->msgh_size,
sizeof(mach_msg_size_t));
}
@@ -460,7 +460,7 @@ mach_msg_trap(msg, option, send_size, rcv_size, rcv_name, time_out, notify)
ikm_cache() = IKM_NULL;
ikm_check_initialized(kmsg, IKM_SAVED_KMSG_SIZE);
- if (copyinmsg((vm_offset_t) msg, (vm_offset_t) &kmsg->ikm_header,
+ if (copyinmsg(msg, &kmsg->ikm_header,
send_size)) {
ikm_free(kmsg);
goto slow_get;
@@ -1244,7 +1244,7 @@ mach_msg_trap(msg, option, send_size, rcv_size, rcv_name, time_out, notify)
ikm_check_initialized(kmsg, kmsg->ikm_size);
if ((kmsg->ikm_size != IKM_SAVED_KMSG_SIZE) ||
- copyoutmsg((vm_offset_t) &kmsg->ikm_header, (vm_offset_t) msg,
+ copyoutmsg(&kmsg->ikm_header, msg,
reply_size) ||
(ikm_cache() != IKM_NULL))
goto slow_put;
diff --git a/ipc/mach_port.c b/ipc/mach_port.c
index c9f24c5a..d902e220 100644
--- a/ipc/mach_port.c
+++ b/ipc/mach_port.c
@@ -59,6 +59,7 @@
#include <ipc/ipc_port.h>
#include <ipc/ipc_pset.h>
#include <ipc/ipc_right.h>
+#include <ipc/mach_port.h>
@@ -589,6 +590,7 @@ mach_port_destroy(
* KERN_INVALID_RIGHT The right isn't correct.
*/
+static volatile int mach_port_deallocate_debug = 0;
kern_return_t
mach_port_deallocate(
ipc_space_t space,
@@ -601,8 +603,14 @@ mach_port_deallocate(
return KERN_INVALID_TASK;
kr = ipc_right_lookup_write(space, name, &entry);
- if (kr != KERN_SUCCESS)
+ if (kr != KERN_SUCCESS) {
+ if (name != MACH_PORT_NULL && name != MACH_PORT_DEAD) {
+ printf("task %p deallocating an invalid port %u, most probably a bug.\n", current_task(), name);
+ if (mach_port_deallocate_debug)
+ SoftDebugger("mach_port_deallocate");
+ }
return kr;
+ }
/* space is write-locked */
kr = ipc_right_dealloc(space, name, entry); /* unlocks space */
@@ -741,6 +749,8 @@ mach_port_mod_refs(
*/
kern_return_t
+mach_port_get_receive_status(ipc_space_t, mach_port_t, mach_port_status_t *);
+kern_return_t
old_mach_port_get_receive_status(space, name, statusp)
ipc_space_t space;
mach_port_t name;
@@ -1284,10 +1294,11 @@ mach_port_insert_right(
!MACH_MSG_TYPE_PORT_ANY_RIGHT(polyPoly))
return KERN_INVALID_VALUE;
- if (!IO_VALID(poly))
+ if (!IO_VALID((ipc_object_t)poly))
return KERN_INVALID_CAPABILITY;
- return ipc_object_copyout_name(space, poly, polyPoly, FALSE, name);
+ return ipc_object_copyout_name(space, (ipc_object_t)poly,
+ polyPoly, FALSE, name);
}
/*
diff --git a/ipc/mach_port.h b/ipc/mach_port.h
new file mode 100644
index 00000000..a82228fe
--- /dev/null
+++ b/ipc/mach_port.h
@@ -0,0 +1,57 @@
+/*
+ * Mach Port Functions.
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Mach port functions.
+ *
+ */
+
+#ifndef _IPC_MACH_PORT_H_
+#define _IPC_MACH_PORT_H_
+
+#include <sys/types.h>
+#include <ipc/ipc_types.h>
+#include <ipc/ipc_entry.h>
+
+extern kern_return_t
+mach_port_allocate_name (
+ ipc_space_t space,
+ mach_port_right_t right,
+ mach_port_t name);
+
+extern kern_return_t
+mach_port_allocate (
+ ipc_space_t space,
+ mach_port_right_t right,
+ mach_port_t *namep);
+
+extern kern_return_t
+mach_port_deallocate(
+ ipc_space_t space,
+ mach_port_t name);
+
+extern kern_return_t
+mach_port_insert_right(
+ ipc_space_t space,
+ mach_port_t name,
+ ipc_port_t poly,
+ mach_msg_type_name_t polyPoly);
+
+#endif /* _IPC_MACH_PORT_H_ */
diff --git a/kern/ast.c b/kern/ast.c
index d46f0d65..97da3abc 100644
--- a/kern/ast.c
+++ b/kern/ast.c
@@ -44,6 +44,7 @@
#include <kern/sched_prim.h>
#include <kern/thread.h>
#include <kern/processor.h>
+#include <device/net_io.h>
#include <machine/machspl.h> /* for splsched */
diff --git a/kern/ast.h b/kern/ast.h
index 6fd509c0..695eaac0 100644
--- a/kern/ast.h
+++ b/kern/ast.h
@@ -127,4 +127,8 @@ MACRO_END
* be followed by ast_propagate().
*/
+extern void ast_init (void);
+
+extern void ast_check (void);
+
#endif /* _KERN_AST_H_ */
diff --git a/kern/bootstrap.c b/kern/bootstrap.c
index 86d896c3..7dd7c140 100644
--- a/kern/bootstrap.c
+++ b/kern/bootstrap.c
@@ -35,8 +35,10 @@
#include <mach/port.h>
#include <mach/message.h>
+#include <machine/locore.h>
#include <machine/vm_param.h>
#include <ipc/ipc_port.h>
+#include <ipc/mach_port.h>
#include <kern/debug.h>
#include <kern/host.h>
#include <kern/printf.h>
@@ -44,6 +46,7 @@
#include <kern/thread.h>
#include <kern/lock.h>
#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
#include <device/device_port.h>
#if MACH_KDB
@@ -65,7 +68,12 @@ extern void breakpoint();
#else
#include <mach/machine/multiboot.h>
#include <mach/exec/exec.h>
+#ifdef MACH_XEN
+#include <mach/xen.h>
+extern struct start_info boot_info; /* XXX put this in a header! */
+#else /* MACH_XEN */
extern struct multiboot_info boot_info; /* XXX put this in a header! */
+#endif /* MACH_XEN */
#endif
#include "boot_script.h"
@@ -92,7 +100,7 @@ task_insert_send_right(
kern_return_t kr;
kr = mach_port_insert_right(task->itk_space, name,
- (ipc_object_t)port, MACH_MSG_TYPE_PORT_SEND);
+ port, MACH_MSG_TYPE_PORT_SEND);
if (kr == KERN_SUCCESS)
break;
assert(kr == KERN_NAME_EXISTS);
@@ -103,9 +111,23 @@ task_insert_send_right(
void bootstrap_create()
{
+ int compat;
+#ifdef MACH_XEN
+ struct multiboot_module *bmods = ((struct multiboot_module *)
+ boot_info.mod_start);
+ int n = 0;
+ if (bmods)
+ for (n = 0; bmods[n].mod_start; n++) {
+ bmods[n].mod_start = kvtophys(bmods[n].mod_start + (vm_offset_t) bmods);
+ bmods[n].mod_end = kvtophys(bmods[n].mod_end + (vm_offset_t) bmods);
+ bmods[n].string = kvtophys(bmods[n].string + (vm_offset_t) bmods);
+ }
+ boot_info.mods_count = n;
+ boot_info.flags |= MULTIBOOT_MODS;
+#else /* MACH_XEN */
struct multiboot_module *bmods = ((struct multiboot_module *)
phystokv(boot_info.mods_addr));
- int compat;
+#endif /* MACH_XEN */
#ifdef MACH_GDB_STUB
/*
@@ -370,6 +392,7 @@ static void get_compat_strings(char *flags_str, char *root_str)
*cp = '\0';
}
+#if 0
/*
* Copy boot_data (executable) to the user portion of this task.
*/
@@ -395,6 +418,7 @@ static boolean_t load_bootstrap_symbols = TRUE;
#else
static boolean_t load_bootstrap_symbols = FALSE;
#endif
+#endif
@@ -448,7 +472,7 @@ read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size,
if (file_size > 0)
{
err = copyout((char *)phystokv (mod->mod_start) + file_ofs,
- mem_addr, file_size);
+ (void *)mem_addr, file_size);
assert(err == 0);
}
@@ -463,7 +487,7 @@ read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size,
static void copy_bootstrap(void *e, exec_info_t *boot_exec_info)
{
- register vm_map_t user_map = current_task()->map;
+ //register vm_map_t user_map = current_task()->map;
int err;
if ((err = exec_load(boot_read, read_exec, e, boot_exec_info)))
diff --git a/kern/debug.c b/kern/debug.c
index 6d760e5e..178d0788 100644
--- a/kern/debug.c
+++ b/kern/debug.c
@@ -24,6 +24,8 @@
* the rights to redistribute these changes.
*/
+#include <mach/xen.h>
+
#include <kern/printf.h>
#include <stdarg.h>
@@ -33,8 +35,10 @@
#include <kern/debug.h>
+#include <machine/loose_ends.h>
+#include <machine/model_dep.h>
+
extern void cnputc();
-void Debugger() __attribute__ ((noreturn));
#if MACH_KDB
extern int db_breakpoints_inserted;
@@ -69,17 +73,16 @@ Assert(char *exp, char *file, int line)
Debugger("assertion failure");
}
-void Debugger(message)
+void SoftDebugger(message)
char * message;
{
+ printf("Debugger invoked: %s\n", message);
+
#if !MACH_KDB
- panic("Debugger invoked, but there isn't one!");
+ printf("But no debugger, continuing.\n");
+ return;
#endif
-#ifdef lint
- message++;
-#endif /* lint */
-
#if defined(vax) || defined(PC532)
asm("bpt");
#endif /* vax */
@@ -100,6 +103,16 @@ void Debugger(message)
#ifdef i386
asm("int3");
#endif
+}
+
+void Debugger(message)
+ char * message;
+{
+#if !MACH_KDB
+ panic("Debugger invoked, but there isn't one!");
+#endif
+
+ SoftDebugger(message);
panic("Debugger returned!");
}
@@ -124,6 +137,10 @@ panic_init(void)
}
}
+#if ! MACH_KBD
+extern boolean_t reboot_on_panic;
+#endif
+
/*VARARGS1*/
void
panic(const char *s, ...)
@@ -155,6 +172,14 @@ panic(const char *s, ...)
va_end(listp);
printf("\n");
+#if MACH_KDB
+ Debugger("panic");
+#elif MACH_GDB_STUB
+ breakpoint();
+#else
+# ifdef MACH_HYP
+ hyp_crash();
+# else
/* Give the user time to see the message */
{
int i = 1000; /* seconds */
@@ -162,12 +187,8 @@ panic(const char *s, ...)
delay (1000000); /* microseconds */
}
-#if MACH_KDB
- Debugger("panic");
-#elif MACH_GDB_STUB
- breakpoint();
-#else
- halt_all_cpus (1);
+ halt_all_cpus (reboot_on_panic);
+# endif /* MACH_HYP */
#endif
}
@@ -187,3 +208,16 @@ log(int level, const char *fmt, ...)
_doprnt(fmt, &listp, do_cnputc, 0, 0);
va_end(listp);
}
+
+unsigned char __stack_chk_guard [ sizeof (vm_offset_t) ] =
+{
+ [ sizeof (vm_offset_t) - 3 ] = '\r',
+ [ sizeof (vm_offset_t) - 2 ] = '\n',
+ [ sizeof (vm_offset_t) - 1 ] = 0xff,
+};
+
+void
+__stack_chk_fail (void)
+{
+ panic("stack smashing detected");
+}
diff --git a/kern/debug.h b/kern/debug.h
index 64fbf8ab..f4e8200d 100644
--- a/kern/debug.h
+++ b/kern/debug.h
@@ -60,4 +60,7 @@
extern void panic_init(void);
extern void panic (const char *s, ...) __attribute__ ((noreturn));
+extern void SoftDebugger (char *message);
+extern void Debugger (char *message) __attribute__ ((noreturn));
+
#endif /* _mach_debug__debug_ */
diff --git a/kern/eventcount.c b/kern/eventcount.c
index fe7626da..6fcebff5 100644
--- a/kern/eventcount.c
+++ b/kern/eventcount.c
@@ -190,7 +190,6 @@ kern_return_t evc_wait(natural_t ev_id)
kern_return_t evc_wait_clear(natural_t ev_id)
{
spl_t s;
- kern_return_t ret;
evc_t ev;
if ((ev_id >= MAX_EVCS) ||
@@ -227,7 +226,7 @@ kern_return_t evc_wait_clear(natural_t ev_id)
simple_unlock(&ev->lock);
splx(s);
- ret = KERN_NO_SPACE; /* XX */
+ return KERN_NO_SPACE; /* XX */
}
/*
diff --git a/kern/eventcount.h b/kern/eventcount.h
index e2001de1..6872a347 100644
--- a/kern/eventcount.h
+++ b/kern/eventcount.h
@@ -54,4 +54,6 @@ extern void evc_init(evc_t ev),
extern kern_return_t evc_wait(natural_t ev_id);
+extern void evc_notify_abort (thread_t thread);
+
#endif /* _KERN_EVENTCOUNT_H_ */
diff --git a/kern/exception.c b/kern/exception.c
index 2727d569..453a0758 100644
--- a/kern/exception.c
+++ b/kern/exception.c
@@ -29,8 +29,10 @@
#include <mach/message.h>
#include <mach/port.h>
#include <mach/mig_errors.h>
+#include <machine/locore.h>
#include <ipc/port.h>
#include <ipc/ipc_entry.h>
+#include <ipc/ipc_notify.h>
#include <ipc/ipc_object.h>
#include <ipc/ipc_space.h>
#include <ipc/ipc_port.h>
@@ -49,15 +51,15 @@
-extern void exception();
-extern void exception_try_task();
-extern void exception_no_server();
+extern void exception() __attribute__ ((noreturn));
+extern void exception_try_task() __attribute__ ((noreturn));
+extern void exception_no_server() __attribute__ ((noreturn));
-extern void exception_raise();
+extern void exception_raise() __attribute__ ((noreturn));
extern kern_return_t exception_parse_reply();
-extern void exception_raise_continue();
-extern void exception_raise_continue_slow();
-extern void exception_raise_continue_fast();
+extern void exception_raise_continue() __attribute__ ((noreturn));
+extern void exception_raise_continue_slow() __attribute__ ((noreturn));
+extern void exception_raise_continue_fast() __attribute__ ((noreturn));
#if MACH_KDB
extern void thread_kdb_return();
@@ -111,7 +113,6 @@ exception(_exception, code, subcode)
ith_unlock(self);
exception_try_task(_exception, code, subcode);
/*NOTREACHED*/
- return;
}
ip_lock(exc_port);
@@ -120,7 +121,6 @@ exception(_exception, code, subcode)
ip_unlock(exc_port);
exception_try_task(_exception, code, subcode);
/*NOTREACHED*/
- return;
}
/*
@@ -181,7 +181,6 @@ exception_try_task(_exception, code, subcode)
itk_unlock(task);
exception_no_server();
/*NOTREACHED*/
- return;
}
ip_lock(exc_port);
@@ -190,7 +189,6 @@ exception_try_task(_exception, code, subcode)
ip_unlock(exc_port);
exception_no_server();
/*NOTREACHED*/
- return;
}
/*
@@ -266,6 +264,7 @@ exception_no_server()
(void) task_terminate(self->task);
thread_halt_self();
+ panic("terminating the task didn't kill us");
/*NOTREACHED*/
}
@@ -694,7 +693,7 @@ exception_raise(dest_port, thread_port, task_port,
ikm_check_initialized(kmsg, kmsg->ikm_size);
assert(kmsg->ikm_size == IKM_SAVED_KMSG_SIZE);
- if (copyoutmsg((vm_offset_t) &kmsg->ikm_header, (vm_offset_t)receiver->ith_msg,
+ if (copyoutmsg(&kmsg->ikm_header, receiver->ith_msg,
sizeof(struct mach_exception)) ||
(ikm_cache() != IKM_NULL)) {
mr = ipc_kmsg_put(receiver->ith_msg, kmsg,
@@ -756,7 +755,6 @@ exception_raise(dest_port, thread_port, task_port,
ip_unlock(reply_port);
exception_raise_continue_slow(MACH_RCV_PORT_DIED, IKM_NULL, /*dummy*/0);
/*NOTREACHED*/
- return;
}
imq_lock(reply_mqueue);
@@ -931,7 +929,6 @@ exception_raise_continue_slow(mr, kmsg, seqno)
(mr == MACH_RCV_PORT_DIED)) {
thread_exception_return();
/*NOTREACHED*/
- return;
}
if (self->ith_exc != KERN_SUCCESS) {
@@ -939,7 +936,6 @@ exception_raise_continue_slow(mr, kmsg, seqno)
self->ith_exc_code,
self->ith_exc_subcode);
/*NOTREACHED*/
- return;
}
exception_no_server();
@@ -989,7 +985,6 @@ exception_raise_continue_fast(reply_port, kmsg)
if (kr == KERN_SUCCESS) {
thread_exception_return();
/*NOTREACHED*/
- return; /* help for the compiler */
}
if (self->ith_exc != KERN_SUCCESS) {
diff --git a/kern/ipc_kobject.c b/kern/ipc_kobject.c
index 473b1df4..3d8775b4 100644
--- a/kern/ipc_kobject.c
+++ b/kern/ipc_kobject.c
@@ -45,6 +45,8 @@
#include <ipc/ipc_kmsg.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_thread.h>
+#include <vm/vm_object.h>
+#include <device/ds_routines.h>
#if MACH_MACHINE_ROUTINES
#include <machine/machine_routines.h>
@@ -316,7 +318,7 @@ ipc_kobject_destroy(
default:
#if MACH_ASSERT
- printf("ipc_kobject_destroy: port 0x%x, kobj 0x%x, type %d\n",
+ printf("ipc_kobject_destroy: port 0x%p, kobj 0x%x, type %d\n",
port, port->ip_kobject, ip_kotype(port));
#endif /* MACH_ASSERT */
break;
@@ -353,6 +355,9 @@ ipc_kobject_notify(request_header, reply_header)
case IKOT_DEVICE:
return ds_notify(request_header);
+ case IKOT_PAGER_PROXY:
+ return memory_object_proxy_notify(request_header);
+
default:
return FALSE;
}
diff --git a/kern/ipc_kobject.h b/kern/ipc_kobject.h
index 91eb30f6..cb795741 100644
--- a/kern/ipc_kobject.h
+++ b/kern/ipc_kobject.h
@@ -77,9 +77,10 @@ typedef unsigned int ipc_kobject_type_t;
#define IKOT_LOCK_SET 24
#define IKOT_CLOCK 25
#define IKOT_CLOCK_CTRL 26
+#define IKOT_PAGER_PROXY 27
/* << new entries here */
-#define IKOT_UNKNOWN 27 /* magic catchall */
-#define IKOT_MAX_TYPE 28 /* # of IKOT_ types */
+#define IKOT_UNKNOWN 28 /* magic catchall */
+#define IKOT_MAX_TYPE 29 /* # of IKOT_ types */
/* Please keep ipc/ipc_object.c:ikot_print_array up to date */
#define is_ipc_kobject(ikot) (ikot != IKOT_NONE)
@@ -113,6 +114,11 @@ extern void ipc_kobject_set(
extern void ipc_kobject_destroy(
ipc_port_t port);
+/* Deliver notifications to kobjects that care about them */
+extern boolean_t ipc_kobject_notify (
+ mach_msg_header_t *request_header,
+ mach_msg_header_t *reply_header);
+
#define null_conversion(port) (port)
#endif /* _KERN_IPC_KOBJECT_H_ */
diff --git a/kern/ipc_mig.c b/kern/ipc_mig.c
index 3adfdebd..3f55da7c 100644
--- a/kern/ipc_mig.c
+++ b/kern/ipc_mig.c
@@ -28,12 +28,15 @@
#include <mach/port.h>
#include <mach/message.h>
#include <mach/thread_status.h>
+#include <machine/locore.h>
#include <kern/ast.h>
#include <kern/debug.h>
#include <kern/ipc_tt.h>
+#include <kern/syscall_subr.h>
#include <kern/thread.h>
#include <kern/task.h>
#include <kern/ipc_kobject.h>
+#include <kern/ipc_tt.h>
#include <vm/vm_map.h>
#include <vm/vm_user.h>
#include <ipc/port.h>
@@ -45,8 +48,10 @@
#include <ipc/ipc_port.h>
#include <ipc/ipc_pset.h>
#include <ipc/ipc_thread.h>
+#include <ipc/mach_port.h>
+#include <device/dev_hdr.h>
#include <device/device_types.h>
-
+#include <device/ds_routines.h>
/*
* Routine: mach_msg_send_from_kernel
diff --git a/kern/ipc_mig.h b/kern/ipc_mig.h
new file mode 100644
index 00000000..f352bdc6
--- /dev/null
+++ b/kern/ipc_mig.h
@@ -0,0 +1,65 @@
+/*
+ * MIG IPC functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * MIG IPC functions.
+ *
+ */
+
+#ifndef _IPC_MIG_H_
+#define _IPC_MIG_H_
+
+#include <mach/std_types.h>
+
+/*
+ * Routine: mach_msg_send_from_kernel
+ * Purpose:
+ * Send a message from the kernel.
+ *
+ * This is used by the client side of KernelUser interfaces
+ * to implement SimpleRoutines. Currently, this includes
+ * device_reply and memory_object messages.
+ * Conditions:
+ * Nothing locked.
+ * Returns:
+ * MACH_MSG_SUCCESS Sent the message.
+ * MACH_SEND_INVALID_DATA Bad destination port.
+ */
+extern mach_msg_return_t mach_msg_send_from_kernel(
+ mach_msg_header_t *msg,
+ mach_msg_size_t send_size);
+
+/*
+ * Routine: mach_msg_abort_rpc
+ * Purpose:
+ * Destroy the thread's ith_rpc_reply port.
+ * This will interrupt a mach_msg_rpc_from_kernel
+ * with a MACH_RCV_PORT_DIED return code.
+ * Conditions:
+ * Nothing locked.
+ */
+extern void mach_msg_abort_rpc (ipc_thread_t);
+
+extern mach_msg_return_t mach_msg_rpc_from_kernel(
+ mach_msg_header_t *msg,
+ mach_msg_size_t send_size,
+ mach_msg_size_t reply_size);
+
+#endif /* _IPC_MIG_H_ */
diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c
index e14ebacf..de4edc65 100644
--- a/kern/ipc_tt.c
+++ b/kern/ipc_tt.c
@@ -833,7 +833,7 @@ mach_ports_register(
*/
for (i = 0; i < portsCnt; i++)
- ports[i] = memory[i];
+ ports[i] = (ipc_port_t)memory[i];
for (; i < TASK_PORT_REGISTER_MAX; i++)
ports[i] = IP_NULL;
diff --git a/kern/ipc_tt.h b/kern/ipc_tt.h
index e2009b94..78cb43ad 100644
--- a/kern/ipc_tt.h
+++ b/kern/ipc_tt.h
@@ -86,4 +86,7 @@ convert_port_to_space(struct ipc_port *);
extern void
space_deallocate(ipc_space_t);
+mach_port_t
+mach_reply_port (void);
+
#endif /* _KERN_IPC_TT_H_ */
diff --git a/kern/lock.c b/kern/lock.c
index 909aa463..0c61227a 100644
--- a/kern/lock.c
+++ b/kern/lock.c
@@ -104,7 +104,7 @@ boolean_t simple_lock_try(simple_lock_t l)
#endif /* NCPUS > 1 */
#if NCPUS > 1
-int lock_wait_time = 100;
+static int lock_wait_time = 100;
#else /* NCPUS > 1 */
/*
@@ -112,7 +112,7 @@ int lock_wait_time = 100;
* thought something magical would happen to the
* want_write bit while we are executing.
*/
-int lock_wait_time = 0;
+static int lock_wait_time = 0;
#endif /* NCPUS > 1 */
#if MACH_SLOCKS && NCPUS == 1
diff --git a/kern/lock_mon.c b/kern/lock_mon.c
index a9a39896..14504281 100644
--- a/kern/lock_mon.c
+++ b/kern/lock_mon.c
@@ -281,7 +281,7 @@ struct lock_info *li;
li->masked, (li->masked*100)/sum,
li->stack, li->stack/sum,
li->time, li->time/sum);
- db_search_symbol(li->lock, 0, &off);
+ db_free_symbol(db_search_symbol(li->lock, 0, &off));
if (off < 1024)
db_printsym(li->lock, 0);
else {
@@ -348,7 +348,7 @@ decl_simple_lock_data(, *lock)
return;
db_printf("cpu %d looping on simple_lock(%x) called by %x\n",
cpu_number(), lock, *(((int *)&lock) -1));
- Debugger();
+ SoftDebugger("simple_lock timeout");
count = 0;
}
}
@@ -362,7 +362,7 @@ retry_bit_lock(index, addr)
if (count++ > 1000000) {
db_printf("cpu %d looping on bit_lock(%x, %x) called by %x\n",
cpu_number(), index, addr, *(((int *)&index) -1));
- Debugger();
+ SoftDebugger("bit_lock timeout");
count = 0;
}
}
diff --git a/kern/mach_clock.c b/kern/mach_clock.c
index 22c4a74a..04a31153 100644
--- a/kern/mach_clock.c
+++ b/kern/mach_clock.c
@@ -59,6 +59,7 @@
#include <sys/time.h>
#include <machine/mach_param.h> /* HZ */
#include <machine/machspl.h>
+#include <machine/model_dep.h>
#if MACH_PCSAMPLE
#include <kern/pc_sample.h>
@@ -277,8 +278,8 @@ void softclock()
*/
spl_t s;
register timer_elt_t telt;
- register int (*fcn)();
- register char *param;
+ register void (*fcn)( void * param );
+ register void *param;
while (TRUE) {
s = splsched();
@@ -527,8 +528,8 @@ timer_elt_data_t timeout_timers[NTIMERS];
* interval: timeout interval, in hz.
*/
void timeout(fcn, param, interval)
- int (*fcn)(/* char * param */);
- char * param;
+ void (*fcn)( void * param );
+ void * param;
int interval;
{
spl_t s;
@@ -555,8 +556,8 @@ void timeout(fcn, param, interval)
* and removed.
*/
boolean_t untimeout(fcn, param)
- register int (*fcn)();
- register char * param;
+ register void (*fcn)( void * param );
+ register void * param;
{
spl_t s;
register timer_elt_t elt;
diff --git a/kern/mach_clock.h b/kern/mach_clock.h
index 93237aaf..2009c709 100644
--- a/kern/mach_clock.h
+++ b/kern/mach_clock.h
@@ -40,8 +40,8 @@ extern int tick; /* number of usec per tick */
/* Time-out element. */
struct timer_elt {
queue_chain_t chain; /* chain in order of expiration */
- int (*fcn)(); /* function to call */
- char * param; /* with this parameter */
+ void (*fcn)(); /* function to call */
+ void * param; /* with this parameter */
unsigned long ticks; /* expiration time, in ticks */
int set; /* unset | set | allocated */
};
@@ -99,7 +99,7 @@ extern kern_return_t host_adjust_time(
extern void mapable_time_init (void);
/* For public timer elements. */
-extern void timeout(int (*fcn)(), char *param, int interval);
-extern boolean_t untimeout(int (*fcn)(), char *param);
+extern void timeout(void (*fcn)(void *), void *param, int interval);
+extern boolean_t untimeout(void (*fcn)(void *), void *param);
#endif /* _KERN_MACH_CLOCK_H_ */
diff --git a/kern/machine.c b/kern/machine.c
index 871801e0..bcf394c3 100644
--- a/kern/machine.c
+++ b/kern/machine.c
@@ -51,6 +51,7 @@
#include <kern/task.h>
#include <kern/thread.h>
#include <machine/machspl.h> /* for splsched */
+#include <machine/model_dep.h>
#include <sys/reboot.h>
@@ -186,7 +187,6 @@ host_reboot(host, options)
return (KERN_INVALID_HOST);
if (options & RB_DEBUGGER) {
- extern void Debugger();
Debugger("Debugger");
} else {
#ifdef parisc
@@ -688,10 +688,6 @@ Restart_pset:
*/
#ifdef __GNUC__
-extern __volatile__ void halt_cpu();
-#endif
-
-#ifdef __GNUC__
__volatile__
#endif
void processor_doshutdown(processor)
diff --git a/kern/machine.h b/kern/machine.h
new file mode 100644
index 00000000..af2b7e91
--- /dev/null
+++ b/kern/machine.h
@@ -0,0 +1,58 @@
+/*
+ * Machine abstraction functions
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Machine abstraction functions.
+ *
+ */
+
+#ifndef _MACHINE_H_
+#define _MACHINE_H_
+
+#include <mach/std_types.h>
+
+/*
+ * cpu_up:
+ *
+ * Flag specified cpu as up and running. Called when a processor comes
+ * online.
+ */
+extern void cpu_up (int);
+
+/*
+ * processor_assign() changes the processor set that a processor is
+ * assigned to. Any previous assignment in progress is overridden.
+ * Synchronizes with assignment completion if wait is TRUE.
+ */
+extern kern_return_t processor_assign (processor_t, processor_set_t, boolean_t);
+
+/*
+ * processor_shutdown() queues a processor up for shutdown.
+ * Any assignment in progress is overriden. It does not synchronize
+ * with the shutdown (can be called from interrupt level).
+ */
+extern kern_return_t processor_shutdown (processor_t);
+
+/*
+ * action_thread() shuts down processors or changes their assignment.
+ */
+extern void action_thread_continue (void);
+
+#endif /* _MACHINE_H_ */
diff --git a/kern/pc_sample.c b/kern/pc_sample.c
index be28ca04..c82707b2 100644
--- a/kern/pc_sample.c
+++ b/kern/pc_sample.c
@@ -30,6 +30,7 @@
#include <mach/mach_types.h> /* vm_address_t */
#include <mach/std_types.h> /* pointer_t */
#include <mach/pc_sample.h>
+#include <machine/trap.h>
#include <kern/host.h>
#include <kern/thread.h>
#include <kern/pc_sample.h>
diff --git a/kern/printf.c b/kern/printf.c
index 99dffbdf..88a527ba 100644
--- a/kern/printf.c
+++ b/kern/printf.c
@@ -114,6 +114,7 @@
*/
#include <string.h>
+#include <device/cons.h>
#include <kern/printf.h>
#include <mach/boolean.h>
#include <kern/lock.h>
@@ -510,11 +511,10 @@ void _doprnt(
/*
* Printing (to console)
*/
-extern void cnputc( char, /*not really*/vm_offset_t);
int vprintf(const char *fmt, va_list listp)
{
- _doprnt(fmt, &listp, cnputc, 16, 0);
+ _doprnt(fmt, &listp, (void (*)( char, vm_offset_t)) cnputc, 16, 0);
return 0;
}
@@ -550,7 +550,7 @@ void iprintf(const char *fmt, ...)
}
}
va_start(listp, fmt);
- _doprnt(fmt, &listp, cnputc, 16, 0);
+ _doprnt(fmt, &listp, (void (*)( char, vm_offset_t)) cnputc, 16, 0);
va_end(listp);
}
diff --git a/kern/printf.h b/kern/printf.h
index 6d41eb8d..13831986 100644
--- a/kern/printf.h
+++ b/kern/printf.h
@@ -48,5 +48,7 @@ extern void iprintf (const char *fmt, ...);
extern int vprintf(const char *fmt, va_list listp);
+extern void safe_gets (char *str, int maxlen);
+
#endif /* _MACH_SA_SYS_PRINTF_H_ */
diff --git a/kern/priority.c b/kern/priority.c
index 43d61010..feddd8ee 100644
--- a/kern/priority.c
+++ b/kern/priority.c
@@ -41,6 +41,7 @@
#include <kern/mach_clock.h>
#include <kern/mach_param.h>
#include <kern/sched.h>
+#include <kern/sched_prim.h>
#include <kern/thread.h>
#include <kern/processor.h>
#include <kern/timer.h>
diff --git a/kern/processor.c b/kern/processor.c
index d645051c..718ff3ad 100644
--- a/kern/processor.c
+++ b/kern/processor.c
@@ -37,6 +37,7 @@
#include <kern/debug.h>
#include <kern/lock.h>
#include <kern/host.h>
+#include <kern/ipc_tt.h>
#include <kern/processor.h>
#include <kern/sched.h>
#include <kern/task.h>
diff --git a/kern/processor.h b/kern/processor.h
index 7ff7124e..9a6c944b 100644
--- a/kern/processor.h
+++ b/kern/processor.h
@@ -237,12 +237,12 @@ typedef mach_port_t *processor_set_name_array_t;
#ifdef KERNEL
#if MACH_HOST
-extern void pset_sys_bootstrap(void);
extern void pset_sys_init(void);
#endif /* MACH_HOST */
/* Pset internal functions */
+extern void pset_sys_bootstrap(void);
extern void pset_reference(processor_set_t);
extern void pset_deallocate(processor_set_t);
extern void pset_remove_processor(processor_set_t, processor_t);
diff --git a/kern/queue.h b/kern/queue.h
index 97a0a766..1846922a 100644
--- a/kern/queue.h
+++ b/kern/queue.h
@@ -84,6 +84,7 @@ void enqueue_tail(queue_t, queue_entry_t);
queue_entry_t dequeue_head(queue_t);
queue_entry_t dequeue_tail(queue_t);
void remqueue(queue_t, queue_entry_t);
+void insque(queue_entry_t, queue_entry_t);
/*
* Macro: queue_init
diff --git a/kern/sched_prim.c b/kern/sched_prim.c
index 2596fa1b..ff942aee 100644
--- a/kern/sched_prim.c
+++ b/kern/sched_prim.c
@@ -34,6 +34,9 @@
#include <kern/printf.h>
#include <mach/machine.h>
+#include <machine/locore.h>
+#include <machine/machspl.h> /* For def'n of splsched() */
+#include <machine/model_dep.h>
#include <kern/ast.h>
#include <kern/counters.h>
#include <kern/cpu_number.h>
@@ -52,7 +55,6 @@
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
-#include <machine/machspl.h> /* For def'n of splsched() */
#if MACH_FIXPRI
#include <mach/policy.h>
@@ -68,8 +70,6 @@ int sched_usec;
thread_t sched_thread_id;
-void recompute_priorities(void); /* forward */
-void update_priority(thread_t);
void set_pri(thread_t, int, boolean_t);
void do_thread_scan(void);
@@ -160,8 +160,8 @@ void wait_queue_init(void)
void sched_init(void)
{
- recompute_priorities_timer.fcn = (int (*)())recompute_priorities;
- recompute_priorities_timer.param = (char *)0;
+ recompute_priorities_timer.fcn = recompute_priorities;
+ recompute_priorities_timer.param = NULL;
min_quantum = hz / 10; /* context switch 10 times/second */
wait_queue_init();
@@ -180,8 +180,9 @@ void sched_init(void)
* Called at splsoftclock.
*/
void thread_timeout(
- thread_t thread)
+ void *_thread)
{
+ thread_t thread = _thread;
assert(thread->timer.set == TELT_UNSET);
clear_wait(thread, THREAD_TIMED_OUT, FALSE);
@@ -216,10 +217,10 @@ void thread_set_timeout(
void thread_timeout_setup(
register thread_t thread)
{
- thread->timer.fcn = (int (*)())thread_timeout;
- thread->timer.param = (char *)thread;
- thread->depress_timer.fcn = (int (*)())thread_depress_timeout;
- thread->depress_timer.param = (char *)thread;
+ thread->timer.fcn = thread_timeout;
+ thread->timer.param = thread;
+ thread->depress_timer.fcn = (void (*)(void*))thread_depress_timeout;
+ thread->depress_timer.param = thread;
}
/*
@@ -1094,7 +1095,7 @@ void compute_my_priority(
*
* Update the priorities of all threads periodically.
*/
-void recompute_priorities(void)
+void recompute_priorities(void *param)
{
#if SIMPLE_CLOCK
int new_usec;
@@ -1944,7 +1945,7 @@ do_runq_scan(
stuck_threads[stuck_count++] = thread;
if (do_thread_scan_debug)
- printf("do_runq_scan: adding thread %#x\n", thread);
+ printf("do_runq_scan: adding thread %p\n", thread);
}
count--;
thread = next;
diff --git a/kern/sched_prim.h b/kern/sched_prim.h
index 6a4c32de..5311d160 100644
--- a/kern/sched_prim.h
+++ b/kern/sched_prim.h
@@ -104,6 +104,18 @@ extern boolean_t thread_handoff(
continuation_t continuation,
thread_t new_thread);
extern void recompute_priorities();
+extern void update_priority(
+ thread_t thread);
+extern void compute_my_priority(
+ thread_t thread);
+extern void thread_bind(
+ thread_t thread,
+ processor_t processor);
+extern void compute_priority(
+ thread_t thread,
+ boolean_t resched);
+extern void thread_timeout_setup(
+ register thread_t thread);
/*
* Routines defined as macros
diff --git a/kern/startup.c b/kern/startup.c
index d73ef904..81626dbf 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -34,6 +34,8 @@
#include <ipc/ipc_init.h>
#include <kern/cpu_number.h>
#include <kern/debug.h>
+#include <kern/machine.h>
+#include <kern/mach_factor.h>
#include <kern/mach_clock.h>
#include <kern/printf.h>
#include <kern/processor.h>
@@ -42,13 +44,17 @@
#include <kern/thread.h>
#include <kern/thread_swap.h>
#include <kern/timer.h>
+#include <kern/xpr.h>
+#include <kern/time_stamp.h>
#include <kern/zalloc.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <machine/machspl.h>
+#include <machine/pcb.h>
#include <machine/pmap.h>
+#include <machine/model_dep.h>
#include <mach/version.h>
@@ -282,7 +288,6 @@ void cpu_launch_first_thread(th)
if (th == THREAD_NULL)
panic("cpu_launch_first_thread");
- startrtclock(); /* needs an active thread */
PMAP_ACTIVATE_KERNEL(mycpu);
active_threads[mycpu] = th;
@@ -294,6 +299,8 @@ void cpu_launch_first_thread(th)
PMAP_ACTIVATE_USER(vm_map_pmap(th->task->map), th, mycpu);
+ startrtclock(); /* needs an active thread */
+
load_context(th);
/*NOTREACHED*/
}
diff --git a/kern/syscall_subr.c b/kern/syscall_subr.c
index 4db1f168..395b9b8f 100644
--- a/kern/syscall_subr.c
+++ b/kern/syscall_subr.c
@@ -37,6 +37,7 @@
#include <kern/processor.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
#include <kern/ipc_sched.h>
#include <kern/task.h>
#include <kern/thread.h>
@@ -62,8 +63,7 @@
* lock and then be a good citizen and really suspend.
*/
-extern void thread_depress_priority(thread_t, mach_msg_timeout_t);
-extern kern_return_t thread_depress_abort(thread_t);
+void thread_depress_priority(thread_t, mach_msg_timeout_t);
void swtch_continue(void)
{
diff --git a/kern/syscall_subr.h b/kern/syscall_subr.h
index 2d2da14a..2b8bcd36 100644
--- a/kern/syscall_subr.h
+++ b/kern/syscall_subr.h
@@ -24,6 +24,8 @@
* the rights to redistribute these changes.
*/
+#include <sys/types.h>
+#include <mach/mach_types.h>
#include <kern/kern_types.h>
#ifndef _KERN_SYSCALL_SUBR_H_
@@ -33,5 +35,6 @@ extern int swtch(void);
extern int swtch_pri(int);
extern int thread_switch(mach_port_t, int, mach_msg_timeout_t);
extern void thread_depress_timeout(thread_t);
+extern kern_return_t thread_depress_abort(thread_t);
#endif /* _KERN_SYSCALL_SUBR_H_ */
diff --git a/kern/syscall_sw.c b/kern/syscall_sw.c
index 9536a63d..b2e20e66 100644
--- a/kern/syscall_sw.c
+++ b/kern/syscall_sw.c
@@ -29,12 +29,14 @@
#include <mach/port.h>
#include <mach/kern_return.h>
+#include <kern/debug.h>
#include <kern/syscall_sw.h>
/* Include declarations of the trap functions. */
#include <mach/mach_traps.h>
#include <mach/message.h>
#include <kern/syscall_subr.h>
+#include <ipc/mach_port.h>
/*
@@ -58,13 +60,13 @@ int kern_invalid_debug = 0;
mach_port_t null_port()
{
- if (kern_invalid_debug) Debugger("null_port mach trap");
+ if (kern_invalid_debug) SoftDebugger("null_port mach trap");
return(MACH_PORT_NULL);
}
kern_return_t kern_invalid()
{
- if (kern_invalid_debug) Debugger("kern_invalid mach trap");
+ if (kern_invalid_debug) SoftDebugger("kern_invalid mach trap");
return(KERN_INVALID_ARGUMENT);
}
diff --git a/kern/task.c b/kern/task.c
index 45c59c59..88da16e8 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -116,7 +116,9 @@ kern_return_t task_create(
{
register task_t new_task;
register processor_set_t pset;
+#if FAST_TAS
int i;
+#endif
new_task = (task_t) zalloc(task_zone);
if (new_task == TASK_NULL) {
diff --git a/kern/task.h b/kern/task.h
index 1337a98d..9d902435 100644
--- a/kern/task.h
+++ b/kern/task.h
@@ -149,6 +149,7 @@ extern kern_return_t task_assign(
extern kern_return_t task_assign_default(
task_t task,
boolean_t assign_threads);
+extern void consider_task_collect(void);
/*
* Internal only routines
diff --git a/kern/thread.c b/kern/thread.c
index 71f222ce..1548e143 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -42,12 +42,15 @@
#include <kern/ast.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/eventcount.h>
+#include <kern/ipc_mig.h>
#include <kern/ipc_tt.h>
#include <kern/mach_param.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
#include <kern/thread.h>
#include <kern/thread_swap.h>
#include <kern/host.h>
@@ -696,7 +699,7 @@ void thread_deallocate(
* Clean up any machine-dependent resources.
*/
if ((thread->state & TH_SWAPPED) == 0) {
- spl_t _s_ = splsched();
+ splsched();
stack_free(thread);
(void) splx(s);
thread_deallocate_stack++;
diff --git a/kern/thread.h b/kern/thread.h
index 18905f5b..3959dfce 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -265,6 +265,31 @@ extern kern_return_t thread_resume(
thread_t thread);
extern kern_return_t thread_abort(
thread_t thread);
+extern void thread_start(
+ thread_t thread,
+ continuation_t start);
+extern thread_t kernel_thread(
+ task_t task,
+ continuation_t start,
+ void *arg);
+extern kern_return_t thread_priority(
+ thread_t thread,
+ int priority,
+ boolean_t set_max);
+extern void thread_set_own_priority(
+ int priority);
+extern kern_return_t thread_max_priority(
+ thread_t thread,
+ processor_set_t pset,
+ int max_priority);
+extern kern_return_t thread_policy(
+ thread_t thread,
+ int policy,
+ int data);
+extern void consider_thread_collect(
+ void);
+extern void stack_privilege(
+ thread_t thread);
extern kern_return_t thread_get_state(
thread_t thread,
int flavor,
@@ -293,6 +318,7 @@ extern kern_return_t thread_assign(
processor_set_t new_pset);
extern kern_return_t thread_assign_default(
thread_t thread);
+extern void stack_collect(void);
#endif
/*
diff --git a/kern/time_stamp.c b/kern/time_stamp.c
index fdb02fca..22885b18 100644
--- a/kern/time_stamp.c
+++ b/kern/time_stamp.c
@@ -25,6 +25,7 @@
*/
#include <mach/std_types.h>
+#include <machine/locore.h>
#include <sys/time.h>
#include <kern/time_stamp.h>
diff --git a/kern/time_stamp.h b/kern/time_stamp.h
index 480d2f1d..becaae1b 100644
--- a/kern/time_stamp.h
+++ b/kern/time_stamp.h
@@ -62,4 +62,7 @@ unsigned ts_tick_count;
#define TS_FORMAT_DEFAULT 1
#define TS_FORMAT_MMAX 2
+
+extern void timestamp_init(void);
+
#endif /* _KERN_TIME_STAMP_H_ */
diff --git a/kern/timer.h b/kern/timer.h
index 07e46fc4..817fa356 100644
--- a/kern/timer.h
+++ b/kern/timer.h
@@ -131,6 +131,8 @@ extern void timer_switch(timer_t);
extern void timer_read(timer_t, time_value_t *);
extern void thread_read_times(thread_t, time_value_t *, time_value_t *);
extern unsigned timer_delta(timer_t, timer_save_t);
+extern void timer_normalize(timer_t);
+extern void timer_init(timer_t);
#if STAT_TIME
/*
@@ -178,4 +180,6 @@ MACRO_BEGIN \
} \
MACRO_END
+extern void init_timers(void);
+
#endif /* _KERN_TIMER_H_ */
diff --git a/kern/zalloc.c b/kern/zalloc.c
index 839e40f3..a95c7f6b 100644
--- a/kern/zalloc.c
+++ b/kern/zalloc.c
@@ -105,7 +105,7 @@ zone_t zone_zone; /* this is the zone containing other zones */
boolean_t zone_ignore_overflow = TRUE;
vm_map_t zone_map = VM_MAP_NULL;
-vm_size_t zone_map_size = 12 * 1024 * 1024;
+vm_size_t zone_map_size = 64 * 1024 * 1024;
/*
* The VM system gives us an initial chunk of memory.
@@ -214,7 +214,7 @@ zone_t zinit(size, align, max, alloc, memtype, name)
max = alloc;
if (align > 0) {
- if (align >= PAGE_SIZE)
+ if (PAGE_SIZE % align || align % sizeof(z->free_elements))
panic("zinit");
ALIGN_SIZE_UP(size, align);
}
@@ -828,6 +828,18 @@ static void zone_gc(void)
free_addr = zone_map_min_address +
PAGE_SIZE * (freep - zone_page_table);
+
+ /* Hack Hack */
+ /* Needed to make vm_map_delete's vm_map_clip_end always be
+ * able to get an element without having to call zget_space and
+ * hang because zone_map is already locked by vm_map_delete */
+
+ extern zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
+ vm_offset_t entry1 = zalloc(vm_map_kentry_zone),
+ entry2 = zalloc(vm_map_kentry_zone);
+ zfree(vm_map_kentry_zone, entry1);
+ zfree(vm_map_kentry_zone, entry2);
+
kmem_free(zone_map, free_addr, PAGE_SIZE);
}
}
diff --git a/linux/Makefrag.am b/linux/Makefrag.am
index 8fd3cc01..fccb8070 100644
--- a/linux/Makefrag.am
+++ b/linux/Makefrag.am
@@ -526,6 +526,12 @@ liblinux_a_SOURCES += \
linux/src/drivers/net/seeq8005.h
endif
+if device_driver_sis900
+liblinux_a_SOURCES += \
+ linux/src/drivers/net/sis900.c \
+ linux/src/drivers/net/sis900.h
+endif
+
if device_driver_sk_g16
liblinux_a_SOURCES += \
linux/src/drivers/net/sk_g16.c \
diff --git a/linux/configfrag.ac b/linux/configfrag.ac
index ff465b2b..cf671fcc 100644
--- a/linux/configfrag.ac
+++ b/linux/configfrag.ac
@@ -431,6 +431,10 @@ AC_Linux_DRIVER([seeq8005],
[Ethernet controller Seeq8005],
[CONFIG_SEEQ8005],
[net])
+AC_Linux_DRIVER([sis900],
+ [Ethernet controller SiS 900],
+ [CONFIG_SIS900],
+ [net])
AC_Linux_DRIVER([sk_g16],
[Ethernet controller Schneider & Koch G16],
[CONFIG_SK_G16],
diff --git a/linux/dev/arch/i386/kernel/irq.c b/linux/dev/arch/i386/kernel/irq.c
index 4bed353d..8966dea1 100644
--- a/linux/dev/arch/i386/kernel/irq.c
+++ b/linux/dev/arch/i386/kernel/irq.c
@@ -47,7 +47,7 @@
#include <asm/io.h>
#include <asm/hardirq.h>
-extern int linux_timer_intr (void);
+extern void linux_timer_intr (void);
extern spl_t splhigh (void);
extern spl_t spl0 (void);
extern void form_pic_mask (void);
@@ -99,13 +99,13 @@ extern spl_t curr_ipl;
extern int curr_pic_mask;
extern int pic_mask[];
-extern int intnull (), prtnull ();
+extern void intnull (), prtnull ();
/*
* Generic interrupt handler for Linux devices.
* Set up a fake `struct pt_regs' then call the real handler.
*/
-static int
+static void
linux_intr (int irq)
{
struct pt_regs regs;
@@ -128,9 +128,6 @@ linux_intr (int irq)
restore_flags (flags);
intr_count--;
-
- /* Not used. by OKUJI Yoshinori. */
- return 0;
}
/*
@@ -208,11 +205,10 @@ enable_irq (unsigned int irq_nr)
/*
* Default interrupt handler for Linux.
*/
-int
+void
linux_bad_intr (int irq)
{
mask_irq (irq);
- return 0;
}
static int
@@ -685,7 +681,7 @@ void __global_restore_flags(unsigned long flags)
#endif
-static int (*old_clock_handler) ();
+static void (*old_clock_handler) ();
static int old_clock_pri;
void
diff --git a/linux/dev/drivers/net/Space.c b/linux/dev/drivers/net/Space.c
index db40d0ce..213fa9b5 100644
--- a/linux/dev/drivers/net/Space.c
+++ b/linux/dev/drivers/net/Space.c
@@ -93,6 +93,7 @@ extern int yellowfin_probe(struct device *);
extern int eepro100_probe(struct device *);
extern int epic100_probe(struct device *);
extern int rtl8139_probe(struct device *);
+extern int sis900_probe(struct device *);
extern int tlan_probe(struct device *);
extern int isa515_probe(struct device *);
extern int pcnet32_probe(struct device *);
@@ -137,6 +138,9 @@ ethif_probe(struct device *dev)
#ifdef CONFIG_RTL8139
&& rtl8139_probe(dev)
#endif
+#ifdef CONFIG_SIS900
+ && sis900_probe(dev)
+#endif
#ifdef CONFIG_VIA_RHINE
&& via_rhine_probe(dev)
#endif
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c
index c06b5d27..b920fb62 100644
--- a/linux/dev/glue/block.c
+++ b/linux/dev/glue/block.c
@@ -572,7 +572,7 @@ out:
}
#define BH_Bounce 16
-#define MAX_BUF VM_MAP_COPY_PAGE_LIST_MAX
+#define MAX_BUF 8
/* Perform read/write operation RW on device DEV
starting at *off to/from buffer *BUF of size *RESID.
@@ -627,7 +627,7 @@ rdwr_full (int rw, kdev_t dev, loff_t *off, char **buf, int *resid, int bshift)
bh->b_size = cc;
bhp[i] = bh;
nb += cc >> bshift;
- blk += nb;
+ blk += cc >> bshift;
if (++i == MAX_BUF)
break;
}
@@ -1704,6 +1704,25 @@ device_get_status (void *d, dev_flavor_t flavor, dev_status_t status,
return D_SUCCESS;
}
+static io_return_t
+device_set_status (void *d, dev_flavor_t flavor, dev_status_t status,
+ mach_msg_type_number_t *status_count)
+{
+ struct block_data *bd = d;
+
+ switch (flavor)
+ {
+ case BLKRRPART:
+ {
+ DECL_DATA;
+ INIT_DATA();
+ return (*bd->ds->fops->ioctl) (&td.inode, &td.file, flavor, 0);
+ }
+ }
+
+ return D_INVALID_OPERATION;
+}
+
struct device_emulation_ops linux_block_emulation_ops =
{
NULL,
@@ -1715,7 +1734,7 @@ struct device_emulation_ops linux_block_emulation_ops =
NULL,
device_read,
NULL,
- NULL,
+ device_set_status,
device_get_status,
NULL,
NULL,
diff --git a/linux/dev/glue/kmem.c b/linux/dev/glue/kmem.c
index 908d0869..8c21ce7d 100644
--- a/linux/dev/glue/kmem.c
+++ b/linux/dev/glue/kmem.c
@@ -48,6 +48,7 @@ extern int printf (const char *, ...);
Increase MEM_CHUNKS if the kernel is running out of memory. */
#define MEM_CHUNK_SIZE (64 * 1024)
#define MEM_CHUNKS 7
+#define MEM_DMA_LIMIT (16 * 1024 * 1024)
/* Mininum amount that linux_kmalloc will allocate. */
#define MIN_ALLOC 12
@@ -100,7 +101,7 @@ linux_kmem_init ()
{
/* Allocate memory. */
pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE,
- 16 * 1024 * 1024,
+ MEM_DMA_LIMIT,
0xffff, &pages);
assert (pages_free[i].start);
@@ -109,7 +110,7 @@ linux_kmem_init ()
/* Sanity check: ensure pages are contiguous and within DMA limits. */
for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE)
{
- assert (p->phys_addr < 16 * 1024 * 1024);
+ assert (p->phys_addr < MEM_DMA_LIMIT);
assert (p->phys_addr + PAGE_SIZE
== ((vm_page_t) p->pageq.next)->phys_addr);
diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c
index b76e0986..095428d3 100644
--- a/linux/dev/glue/net.c
+++ b/linux/dev/glue/net.c
@@ -533,6 +533,17 @@ static io_return_t
device_get_status (void *d, dev_flavor_t flavor, dev_status_t status,
mach_msg_type_number_t *count)
{
+ if (flavor == NET_FLAGS)
+ {
+ struct net_data *net = (struct net_data *) d;
+
+ if (*count != 1)
+ return D_INVALID_SIZE;
+
+ status[0] = net->dev->flags;
+ return D_SUCCESS;
+ }
+
if(flavor >= SIOCIWFIRST && flavor <= SIOCIWLAST)
{
/* handle wireless ioctl */
@@ -592,6 +603,21 @@ static io_return_t
device_set_status(void *d, dev_flavor_t flavor, dev_status_t status,
mach_msg_type_number_t count)
{
+ if (flavor == NET_FLAGS)
+ {
+ if (count != 1)
+ return D_INVALID_SIZE;
+
+ short flags = status[0];
+ struct net_data *net = (struct net_data *) d;
+
+ dev_change_flags (net->dev, flags);
+
+ /* Change the flags of the Mach device, too. */
+ net->ifnet.if_flags = net->dev->flags;
+ return D_SUCCESS;
+ }
+
if(flavor < SIOCIWFIRST || flavor > SIOCIWLAST)
return D_INVALID_OPERATION;
diff --git a/linux/dev/include/asm-i386/segment.h b/linux/dev/include/asm-i386/segment.h
deleted file mode 100644
index d3f6d27b..00000000
--- a/linux/dev/include/asm-i386/segment.h
+++ /dev/null
@@ -1,375 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#ifdef MACH
-
-#define KERNEL_CS 0x08
-#define KERNEL_DS 0x10
-
-#define USER_CS 0x17
-#define USER_DS 0x1F
-
-#else /* !MACH */
-
-#define KERNEL_CS 0x10
-#define KERNEL_DS 0x18
-
-#define USER_CS 0x23
-#define USER_DS 0x2B
-
-#endif /* !MACH */
-
-#ifndef __ASSEMBLY__
-
-/*
- * Uh, these should become the main single-value transfer routines..
- * They automatically use the right size if we just have the right
- * pointer type..
- */
-#define put_user(x,ptr) __put_user((unsigned long)(x),(ptr),sizeof(*(ptr)))
-#define get_user(ptr) ((__typeof__(*(ptr)))__get_user((ptr),sizeof(*(ptr))))
-
-/*
- * This is a silly but good way to make sure that
- * the __put_user function is indeed always optimized,
- * and that we use the correct sizes..
- */
-extern int bad_user_access_length(void);
-
-/*
- * dummy pointer type structure.. gcc won't try to do something strange
- * this way..
- */
-struct __segment_dummy { unsigned long a[100]; };
-#define __sd(x) ((struct __segment_dummy *) (x))
-#define __const_sd(x) ((const struct __segment_dummy *) (x))
-
-static inline void __put_user(unsigned long x, void * y, int size)
-{
- switch (size) {
- case 1:
- __asm__ ("movb %b1,%%fs:%0"
- :"=m" (*__sd(y))
- :"iq" ((unsigned char) x), "m" (*__sd(y)));
- break;
- case 2:
- __asm__ ("movw %w1,%%fs:%0"
- :"=m" (*__sd(y))
- :"ir" ((unsigned short) x), "m" (*__sd(y)));
- break;
- case 4:
- __asm__ ("movl %1,%%fs:%0"
- :"=m" (*__sd(y))
- :"ir" (x), "m" (*__sd(y)));
- break;
- default:
- bad_user_access_length();
- }
-}
-
-static inline unsigned long __get_user(const void * y, int size)
-{
- unsigned long result;
-
- switch (size) {
- case 1:
- __asm__ ("movb %%fs:%1,%b0"
- :"=q" (result)
- :"m" (*__const_sd(y)));
- return (unsigned char) result;
- case 2:
- __asm__ ("movw %%fs:%1,%w0"
- :"=r" (result)
- :"m" (*__const_sd(y)));
- return (unsigned short) result;
- case 4:
- __asm__ ("movl %%fs:%1,%0"
- :"=r" (result)
- :"m" (*__const_sd(y)));
- return result;
- default:
- return bad_user_access_length();
- }
-}
-
-#if defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95)
-static inline void __generic_memcpy_tofs(void * to, const void * from, unsigned long n)
-{
- __asm__ volatile
- ("cld\n"
- "push %%es\n"
- "push %%fs\n"
- "cmpl $3,%0\n"
- "pop %%es\n"
- "jbe 1f\n"
- "movl %%edi,%%ecx\n"
- "negl %%ecx\n"
- "andl $3,%%ecx\n"
- "subl %%ecx,%0\n"
- "rep; movsb\n"
- "movl %0,%%ecx\n"
- "shrl $2,%%ecx\n"
- "rep; movsl\n"
- "andl $3,%0\n"
- "1: movl %0,%%ecx\n"
- "rep; movsb\n"
- "pop %%es\n"
- :"=abd" (n)
- :"0" (n),"D" ((long) to),"S" ((long) from)
- :"cx","di","si");
-}
-
-static inline void __constant_memcpy_tofs(void * to, const void * from, unsigned long n)
-{
- switch (n) {
- case 0:
- return;
- case 1:
- __put_user(*(const char *) from, (char *) to, 1);
- return;
- case 2:
- __put_user(*(const short *) from, (short *) to, 2);
- return;
- case 3:
- __put_user(*(const short *) from, (short *) to, 2);
- __put_user(*(2+(const char *) from), 2+(char *) to, 1);
- return;
- case 4:
- __put_user(*(const int *) from, (int *) to, 4);
- return;
- case 8:
- __put_user(*(const int *) from, (int *) to, 4);
- __put_user(*(1+(const int *) from), 1+(int *) to, 4);
- return;
- case 12:
- __put_user(*(const int *) from, (int *) to, 4);
- __put_user(*(1+(const int *) from), 1+(int *) to, 4);
- __put_user(*(2+(const int *) from), 2+(int *) to, 4);
- return;
- case 16:
- __put_user(*(const int *) from, (int *) to, 4);
- __put_user(*(1+(const int *) from), 1+(int *) to, 4);
- __put_user(*(2+(const int *) from), 2+(int *) to, 4);
- __put_user(*(3+(const int *) from), 3+(int *) to, 4);
- return;
- }
-#define COMMON(x) \
-__asm__("cld\n\t" \
- "push %%es\n\t" \
- "push %%fs\n\t" \
- "pop %%es\n\t" \
- "rep ; movsl\n\t" \
- x \
- "pop %%es" \
- : /* no outputs */ \
- :"c" (n/4),"D" ((long) to),"S" ((long) from) \
- :"cx","di","si")
-
- switch (n % 4) {
- case 0:
- COMMON("");
- return;
- case 1:
- COMMON("movsb\n\t");
- return;
- case 2:
- COMMON("movsw\n\t");
- return;
- case 3:
- COMMON("movsw\n\tmovsb\n\t");
- return;
- }
-#undef COMMON
-}
-
-static inline void __generic_memcpy_fromfs(void * to, const void * from, unsigned long n)
-{
- __asm__ volatile
- ("cld\n"
- "cmpl $3,%0\n"
- "jbe 1f\n"
- "movl %%edi,%%ecx\n"
- "negl %%ecx\n"
- "andl $3,%%ecx\n"
- "subl %%ecx,%0\n"
- "fs; rep; movsb\n"
- "movl %0,%%ecx\n"
- "shrl $2,%%ecx\n"
- "fs; rep; movsl\n"
- "andl $3,%0\n"
- "1:movl %0,%%ecx\n"
- "fs; rep; movsb\n"
- :"=abd" (n)
- :"0" (n),"D" ((long) to),"S" ((long) from)
- :"cx","di","si", "memory");
-}
-
-static inline void __constant_memcpy_fromfs(void * to, const void * from, unsigned long n)
-{
- switch (n) {
- case 0:
- return;
- case 1:
- *(char *)to = __get_user((const char *) from, 1);
- return;
- case 2:
- *(short *)to = __get_user((const short *) from, 2);
- return;
- case 3:
- *(short *) to = __get_user((const short *) from, 2);
- *((char *) to + 2) = __get_user(2+(const char *) from, 1);
- return;
- case 4:
- *(int *) to = __get_user((const int *) from, 4);
- return;
- case 8:
- *(int *) to = __get_user((const int *) from, 4);
- *(1+(int *) to) = __get_user(1+(const int *) from, 4);
- return;
- case 12:
- *(int *) to = __get_user((const int *) from, 4);
- *(1+(int *) to) = __get_user(1+(const int *) from, 4);
- *(2+(int *) to) = __get_user(2+(const int *) from, 4);
- return;
- case 16:
- *(int *) to = __get_user((const int *) from, 4);
- *(1+(int *) to) = __get_user(1+(const int *) from, 4);
- *(2+(int *) to) = __get_user(2+(const int *) from, 4);
- *(3+(int *) to) = __get_user(3+(const int *) from, 4);
- return;
- }
-#define COMMON(x) \
-__asm__("cld\n\t" \
- "rep ; fs ; movsl\n\t" \
- x \
- : /* no outputs */ \
- :"c" (n/4),"D" ((long) to),"S" ((long) from) \
- :"cx","di","si","memory")
-
- switch (n % 4) {
- case 0:
- COMMON("");
- return;
- case 1:
- COMMON("fs ; movsb");
- return;
- case 2:
- COMMON("fs ; movsw");
- return;
- case 3:
- COMMON("fs ; movsw\n\tfs ; movsb");
- return;
- }
-#undef COMMON
-}
-
-#define memcpy_fromfs(to, from, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy_fromfs((to),(from),(n)) : \
- __generic_memcpy_fromfs((to),(from),(n)))
-
-#define memcpy_tofs(to, from, n) \
-(__builtin_constant_p(n) ? \
- __constant_memcpy_tofs((to),(from),(n)) : \
- __generic_memcpy_tofs((to),(from),(n)))
-
-
-#else /* code for gcc-2.95.x and newer follows */
-
-static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
-{
- char *d = (char *)to;
- const char *s = (const char *)from;
- while (n-- > 0) {
- *d++ = __get_user(s++, 1);
- }
-}
-
-static inline void memcpy_tofs(void * to, const void * from, unsigned long n)
-{
- char *d = (char *)to;
- const char *s = (const char *)from;
- while (n-- > 0) {
- __put_user(*s++, d++, 1);
- }
-}
-
-#endif /* not gcc-2.95 */
-
-/*
- * These are deprecated..
- *
- * Use "put_user()" and "get_user()" with the proper pointer types instead.
- */
-
-#define get_fs_byte(addr) __get_user((const unsigned char *)(addr),1)
-#define get_fs_word(addr) __get_user((const unsigned short *)(addr),2)
-#define get_fs_long(addr) __get_user((const unsigned int *)(addr),4)
-
-#define put_fs_byte(x,addr) __put_user((x),(unsigned char *)(addr),1)
-#define put_fs_word(x,addr) __put_user((x),(unsigned short *)(addr),2)
-#define put_fs_long(x,addr) __put_user((x),(unsigned int *)(addr),4)
-
-#ifdef WE_REALLY_WANT_TO_USE_A_BROKEN_INTERFACE
-
-static inline unsigned short get_user_word(const short *addr)
-{
- return __get_user(addr, 2);
-}
-
-static inline unsigned char get_user_byte(const char * addr)
-{
- return __get_user(addr,1);
-}
-
-static inline unsigned long get_user_long(const int *addr)
-{
- return __get_user(addr, 4);
-}
-
-static inline void put_user_byte(char val,char *addr)
-{
- __put_user(val, addr, 1);
-}
-
-static inline void put_user_word(short val,short * addr)
-{
- __put_user(val, addr, 2);
-}
-
-static inline void put_user_long(unsigned long val,int * addr)
-{
- __put_user(val, addr, 4);
-}
-
-#endif
-
-/*
- * Someone who knows GNU asm better than I should double check the following.
- * It seems to work, but I don't know if I'm doing something subtly wrong.
- * --- TYT, 11/24/91
- * [ nothing wrong here, Linus: I just changed the ax to be any reg ]
- */
-
-static inline unsigned long get_fs(void)
-{
- unsigned long _v;
- __asm__("mov %%fs,%w0":"=r" (_v):"0" (0));
- return _v;
-}
-
-static inline unsigned long get_ds(void)
-{
- unsigned long _v;
- __asm__("mov %%ds,%w0":"=r" (_v):"0" (0));
- return _v;
-}
-
-static inline void set_fs(unsigned long val)
-{
- __asm__ __volatile__("mov %w0,%%fs": /* no output */ :"r" (val));
-}
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_SEGMENT_H */
diff --git a/linux/dev/include/linux/netdevice.h b/linux/dev/include/linux/netdevice.h
index ff25df16..e1a9a34d 100644
--- a/linux/dev/include/linux/netdevice.h
+++ b/linux/dev/include/linux/netdevice.h
@@ -271,6 +271,7 @@ extern void net_bh(void);
extern void dev_tint(struct linux_device *dev);
#endif
+extern int dev_change_flags(struct linux_device *dev, short flags);
extern int dev_get_info(char *buffer, char **start, off_t offset, int length, int dummy);
extern int dev_ioctl(unsigned int cmd, void *);
diff --git a/linux/dev/kernel/printk.c b/linux/dev/kernel/printk.c
index 9dc86cb3..1c45b245 100644
--- a/linux/dev/kernel/printk.c
+++ b/linux/dev/kernel/printk.c
@@ -27,6 +27,7 @@
#include <stdarg.h>
#include <asm/system.h>
#include <kern/assert.h>
+#include <device/cons.h>
static char buf[2048];
@@ -40,7 +41,6 @@ printk (char *fmt, ...)
{
va_list args;
int n, flags;
- extern void cnputc ();
char *p, *msg, *buf_end;
static int msg_level = -1;
diff --git a/linux/dev/net/core/dev.c b/linux/dev/net/core/dev.c
index efe02467..cbdf8ccf 100644
--- a/linux/dev/net/core/dev.c
+++ b/linux/dev/net/core/dev.c
@@ -1618,3 +1618,31 @@ int net_dev_init(void)
init_bh(NET_BH, net_bh);
return 0;
}
+
+/*
+ * Change the flags of device DEV to FLAGS.
+ */
+int dev_change_flags (struct device *dev, short flags)
+{
+ if (securelevel > 0)
+ flags &= ~IFF_PROMISC;
+
+ /*
+ * Set the flags on our device.
+ */
+
+ dev->flags = (flags &
+ (IFF_BROADCAST | IFF_DEBUG | IFF_LOOPBACK |
+ IFF_POINTOPOINT | IFF_NOTRAILERS | IFF_RUNNING |
+ IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI | IFF_SLAVE
+ | IFF_MASTER | IFF_MULTICAST))
+ | (dev->flags & (IFF_SOFTHEADERS|IFF_UP));
+
+ /* The flags are taken into account (multicast, promiscuous, ...)
+ in the set_multicast_list handler. */
+ if ((dev->flags & IFF_UP) && dev->set_multicast_list != NULL)
+ dev->set_multicast_list (dev);
+
+ return 0;
+}
+
diff --git a/linux/pcmcia-cs/clients/smc91c92_cs.c b/linux/pcmcia-cs/clients/smc91c92_cs.c
index 782d504d..69215156 100644
--- a/linux/pcmcia-cs/clients/smc91c92_cs.c
+++ b/linux/pcmcia-cs/clients/smc91c92_cs.c
@@ -1472,7 +1472,7 @@ static int smc_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx_timeout_check(dev, smc_tx_timeout);
skb_tx_check(dev, skb);
- DEBUG(2, "%s: smc_start_xmit(length = %d) called,"
+ DEBUG(2, "%s: smc_start_xmit(length = %ld) called,"
" status %4.4x.\n", dev->name, skb->len, inw(ioaddr + 2));
if (smc->saved_skb) {
diff --git a/linux/pcmcia-cs/glue/ds.c b/linux/pcmcia-cs/glue/ds.c
index 733e4b63..8f88b553 100644
--- a/linux/pcmcia-cs/glue/ds.c
+++ b/linux/pcmcia-cs/glue/ds.c
@@ -91,7 +91,7 @@ struct mach_socket_device {
static void
-device_deallocate(void *p)
+ds_device_deallocate(void *p)
{
mach_device_t device = (mach_device_t) p;
@@ -146,7 +146,7 @@ dev_to_port(void *d)
ipc_port_t port = ipc_port_make_send(dev->port);
- device_deallocate(dev);
+ ds_device_deallocate(dev);
return port;
}
@@ -441,8 +441,8 @@ device_get_status(void *d, dev_flavor_t req, dev_status_t arg,
struct device_emulation_ops linux_pcmcia_emulation_ops =
{
- mach_device_reference,
- device_deallocate,
+ (void*) mach_device_reference,
+ ds_device_deallocate,
dev_to_port,
device_open,
device_close,
diff --git a/linux/pcmcia-cs/glue/pcmcia_glue.h b/linux/pcmcia-cs/glue/pcmcia_glue.h
index fc308ee8..691c1b9b 100644
--- a/linux/pcmcia-cs/glue/pcmcia_glue.h
+++ b/linux/pcmcia-cs/glue/pcmcia_glue.h
@@ -255,12 +255,6 @@ init_dev_name(struct net_device *dev, dev_node_t node)
#define module_exit(a)
/*
- * Debugging convenience.
- */
-extern void Debugger(void);
-
-
-/*
* TODO: We don't have `disable_irq_nosync', do we need it? This is used
* by the axnet_cs client driver only.
*/
diff --git a/linux/pcmcia-cs/glue/wireless_glue.h b/linux/pcmcia-cs/glue/wireless_glue.h
index 7883d357..05eef4ba 100644
--- a/linux/pcmcia-cs/glue/wireless_glue.h
+++ b/linux/pcmcia-cs/glue/wireless_glue.h
@@ -65,7 +65,7 @@ static inline int
schedule_task(struct tq_struct *task)
{
printk(KERN_INFO "schedule_task: not implemented, task=%p\n", task);
- Debugger();
+ Debugger("schedule_task");
return 0; /* fail */
}
diff --git a/linux/src/arch/i386/kernel/bios32.c b/linux/src/arch/i386/kernel/bios32.c
index 5a0fc385..0b357be0 100644
--- a/linux/src/arch/i386/kernel/bios32.c
+++ b/linux/src/arch/i386/kernel/bios32.c
@@ -166,7 +166,7 @@ static unsigned long bios32_service(unsigned long service)
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%edi)"
+ __asm__("lcall *(%%edi)"
: "=a" (return_code),
"=b" (address),
"=c" (length),
@@ -209,7 +209,7 @@ static int check_pcibios(void)
pci_indirect.address = pcibios_entry;
save_flags(flags); cli();
- __asm__("lcall (%%edi)\n\t"
+ __asm__("lcall *(%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:\tshl $8, %%eax\n\t"
@@ -254,7 +254,7 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index,
unsigned long flags;
save_flags(flags); cli();
- __asm__ ("lcall (%%edi)\n\t"
+ __asm__ ("lcall *(%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -279,7 +279,7 @@ static int pci_bios_find_device (unsigned short vendor, unsigned short device_id
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%edi)\n\t"
+ __asm__("lcall *(%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -304,7 +304,7 @@ static int pci_bios_read_config_byte(unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -326,7 +326,7 @@ static int pci_bios_read_config_word (unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -348,7 +348,7 @@ static int pci_bios_read_config_dword (unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -370,7 +370,7 @@ static int pci_bios_write_config_byte (unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -392,7 +392,7 @@ static int pci_bios_write_config_word (unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
@@ -414,7 +414,7 @@ static int pci_bios_write_config_dword (unsigned char bus,
unsigned long flags;
save_flags(flags); cli();
- __asm__("lcall (%%esi)\n\t"
+ __asm__("lcall *(%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
"1:"
diff --git a/linux/src/drivers/block/ide.c b/linux/src/drivers/block/ide.c
index 5dcd0aa9..18f2e763 100644
--- a/linux/src/drivers/block/ide.c
+++ b/linux/src/drivers/block/ide.c
@@ -1495,6 +1495,10 @@ static inline void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned
if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
return;
#endif /* CONFIG_BLK_DEV_TRITON */
+ if (drive->mult_count)
+ ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
+ else
+ ide_set_handler (drive, &write_intr, WAIT_CMD);
OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, io_base+IDE_COMMAND_OFFSET);
if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
printk("%s: no DRQ after issuing %s\n", drive->name,
@@ -1505,10 +1509,8 @@ static inline void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned
cli();
if (drive->mult_count) {
HWGROUP(drive)->wrq = *rq; /* scratchpad */
- ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
ide_multwrite(drive, drive->mult_count);
} else {
- ide_set_handler (drive, &write_intr, WAIT_CMD);
ide_output_data(drive, rq->buffer, SECTOR_WORDS);
}
return;
@@ -2914,11 +2916,23 @@ static void probe_cmos_for_drives (ide_hwif_t *hwif)
for (unit = 0; unit < MAX_DRIVES; ++unit) {
ide_drive_t *drive = &hwif->drives[unit];
if ((cmos_disks & (0xf0 >> (unit*4))) && !drive->present && !drive->nobios) {
- drive->cyl = drive->bios_cyl = *(unsigned short *)BIOS;
- drive->head = drive->bios_head = *(BIOS+2);
- drive->sect = drive->bios_sect = *(BIOS+14);
- drive->ctl = *(BIOS+8);
- drive->present = 1;
+ unsigned short cyl = *(unsigned short *)BIOS;
+ unsigned char head = *(BIOS+2);
+ unsigned char sect = *(BIOS+14);
+ unsigned char ctl = *(BIOS+8);
+ if (cyl > 0 && head > 0 && sect > 0 && sect < 64) {
+ drive->cyl = drive->bios_cyl = cyl;
+ drive->head = drive->bios_head = head;
+ drive->sect = drive->bios_sect = sect;
+ drive->ctl = ctl;
+ drive->present = 1;
+ printk("hd%d: got CHS=%d/%d/%d CTL=%x from BIOS\n",
+ unit, cyl, head, sect, ctl);
+
+ } else {
+ printk("hd%d: CHS=%d/%d/%d CTL=%x from BIOS ignored\n",
+ unit, cyl, head, sect, ctl);
+ }
}
BIOS += 16;
}
diff --git a/linux/src/drivers/block/ide.h b/linux/src/drivers/block/ide.h
index 5310ac8f..44331607 100644
--- a/linux/src/drivers/block/ide.h
+++ b/linux/src/drivers/block/ide.h
@@ -23,7 +23,7 @@
* REALLY_SLOW_IO can be defined in ide.c and ide-cd.c, if necessary
*/
#undef REALLY_FAST_IO /* define if ide ports are perfect */
-#define INITIAL_MULT_COUNT 0 /* off=0; on=2,4,8,16,32, etc.. */
+#define INITIAL_MULT_COUNT 16 /* off=0; on=2,4,8,16,32, etc.. */
#ifndef SUPPORT_SLOW_DATA_PORTS /* 1 to support slow data ports */
#define SUPPORT_SLOW_DATA_PORTS 1 /* 0 to reduce kernel size */
diff --git a/linux/src/drivers/block/triton.c b/linux/src/drivers/block/triton.c
index 739b1b94..2e3d1878 100644
--- a/linux/src/drivers/block/triton.c
+++ b/linux/src/drivers/block/triton.c
@@ -129,6 +129,13 @@
const char *good_dma_drives[] = {"Micropolis 2112A",
"CONNER CTMA 4000",
"CONNER CTT8000-A",
+ // Should work, but kvm/qemu seem to produce
+ // issues:
+ // hd1 irq timeout: status=0xd8 { Busy }
+ // hd0: disabled DMA
+ // hd1: disabled DMA
+ // ide0: reset: success
+ //"QEMU HARDDISK",
NULL};
/*
@@ -409,7 +416,17 @@ void ide_init_triton (byte bus, byte fn)
goto quit;
}
if ((pcicmd & 4) == 0) {
- printk("ide: BM-DMA feature is not enabled (BIOS)\n");
+ printk("ide: BM-DMA feature is not enabled (BIOS), enabling\n");
+ pcicmd |= 4;
+ pcibios_write_config_word(bus, fn, 0x04, pcicmd);
+ if ((rc = pcibios_read_config_word(bus, fn, 0x04, &pcicmd))) {
+ printk("ide: Couldn't read back PCI command\n");
+ goto quit;
+ }
+ }
+
+ if ((pcicmd & 4) == 0) {
+ printk("ide: BM-DMA feature couldn't be enabled\n");
} else {
/*
* Get the bmiba base address
diff --git a/linux/src/drivers/net/apricot.c b/linux/src/drivers/net/apricot.c
index 0799f620..d106e50d 100644
--- a/linux/src/drivers/net/apricot.c
+++ b/linux/src/drivers/net/apricot.c
@@ -98,7 +98,7 @@ struct i596_tbd {
unsigned short size;
unsigned short pad;
struct i596_tbd *next;
- char *data;
+ unsigned char *data;
};
struct tx_cmd {
@@ -115,7 +115,7 @@ struct i596_rfd {
long rbd;
unsigned short count;
unsigned short size;
- char data[1532];
+ unsigned char data[1532];
};
#define RX_RING_SIZE 8
@@ -187,7 +187,7 @@ static void i596_interrupt(int irq, void *dev_id, struct pt_regs *regs);
static int i596_close(struct device *dev);
static struct enet_statistics *i596_get_stats(struct device *dev);
static void i596_add_cmd(struct device *dev, struct i596_cmd *cmd);
-static void print_eth(char *);
+static void print_eth(unsigned char *);
static void set_multicast_list(struct device *dev);
@@ -659,20 +659,20 @@ i596_start_xmit(struct sk_buff *skb, struct device *dev)
}
-static void print_eth(char *add)
+static void print_eth(unsigned char *add)
{
int i;
printk ("Dest ");
for (i = 0; i < 6; i++)
- printk(" %2.2X", (unsigned char)add[i]);
+ printk(" %2.2X", add[i]);
printk ("\n");
printk ("Source");
for (i = 0; i < 6; i++)
- printk(" %2.2X", (unsigned char)add[i+6]);
+ printk(" %2.2X", add[i+6]);
printk ("\n");
- printk ("type %2.2X%2.2X\n", (unsigned char)add[12], (unsigned char)add[13]);
+ printk ("type %2.2X%2.2X\n", add[12], add[13]);
}
int apricot_probe(struct device *dev)
@@ -980,7 +980,7 @@ static void set_multicast_list(struct device *dev)
memcpy(cp, dmi,6);
cp+=6;
}
- print_eth (((char *)(cmd + 1)) + 2);
+ print_eth (((unsigned char *)(cmd + 1)) + 2);
i596_add_cmd(dev, cmd);
}
else
diff --git a/linux/src/drivers/net/rtl8139.c b/linux/src/drivers/net/rtl8139.c
index 1d00f686..e97c905d 100644
--- a/linux/src/drivers/net/rtl8139.c
+++ b/linux/src/drivers/net/rtl8139.c
@@ -705,10 +705,6 @@ static int rtl8129_open(struct net_device *dev)
int rx_buf_len_idx;
MOD_INC_USE_COUNT;
- if (request_irq(dev->irq, &rtl8129_interrupt, SA_SHIRQ, dev->name, dev)) {
- MOD_DEC_USE_COUNT;
- return -EAGAIN;
- }
/* The Rx ring allocation size is 2^N + delta, which is worst-case for
the kernel binary-buddy allocation. We allocate the Tx bounce buffers
@@ -737,6 +733,11 @@ static int rtl8129_open(struct net_device *dev)
tp->rx_config =
(RX_FIFO_THRESH << 13) | (rx_buf_len_idx << 11) | (RX_DMA_BURST<<8);
+ if (request_irq(dev->irq, &rtl8129_interrupt, SA_SHIRQ, dev->name, dev)) {
+ MOD_DEC_USE_COUNT;
+ return -EAGAIN;
+ }
+
rtl_hw_start(dev);
netif_start_tx_queue(dev);
diff --git a/linux/src/drivers/net/sis900.c b/linux/src/drivers/net/sis900.c
new file mode 100644
index 00000000..d9e5f63a
--- /dev/null
+++ b/linux/src/drivers/net/sis900.c
@@ -0,0 +1,1803 @@
+/* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux.
+ Copyright 1999 Silicon Integrated System Corporation
+ Revision: 1.06.11 Apr. 30 2002
+
+ Modified from the driver which is originally written by Donald Becker.
+
+ This software may be used and distributed according to the terms
+ of the GNU Public License (GPL), incorporated herein by reference.
+ Drivers based on this skeleton fall under the GPL and must retain
+ the authorship (implicit copyright) notice.
+
+ References:
+ SiS 7016 Fast Ethernet PCI Bus 10/100 Mbps LAN Controller with OnNow Support,
+ preliminary Rev. 1.0 Jan. 14, 1998
+ SiS 900 Fast Ethernet PCI Bus 10/100 Mbps LAN Single Chip with OnNow Support,
+ preliminary Rev. 1.0 Nov. 10, 1998
+ SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution,
+ preliminary Rev. 1.0 Jan. 18, 1998
+ http://www.sis.com.tw/support/databook.htm
+
+ Rev 1.06.11 Apr. 25 2002 Mufasa Yang (mufasa@sis.com.tw) added SiS962 support
+ Rev 1.06.10 Dec. 18 2001 Hui-Fen Hsu workaround for EDB & RTL8201 PHY
+ Rev 1.06.09 Sep. 28 2001 Hui-Fen Hsu update for 630ET & workaround for ICS1893 PHY
+ Rev 1.06.08 Mar. 2 2001 Hui-Fen Hsu (hfhsu@sis.com.tw) some bug fix & 635M/B support
+ Rev 1.06.07 Jan. 8 2001 Lei-Chun Chang added RTL8201 PHY support
+ Rev 1.06.06 Sep. 6 2000 Lei-Chun Chang added ICS1893 PHY support
+ Rev 1.06.05 Aug. 22 2000 Lei-Chun Chang (lcchang@sis.com.tw) modified 630E equalier workaroung rule
+ Rev 1.06.03 Dec. 23 1999 Ollie Lho Third release
+ Rev 1.06.02 Nov. 23 1999 Ollie Lho bug in mac probing fixed
+ Rev 1.06.01 Nov. 16 1999 Ollie Lho CRC calculation provide by Joseph Zbiciak (im14u2c@primenet.com)
+ Rev 1.06 Nov. 4 1999 Ollie Lho (ollie@sis.com.tw) Second release
+ Rev 1.05.05 Oct. 29 1999 Ollie Lho (ollie@sis.com.tw) Single buffer Tx/Rx
+ Chin-Shan Li (lcs@sis.com.tw) Added AMD Am79c901 HomePNA PHY support
+ Rev 1.05 Aug. 7 1999 Jim Huang (cmhuang@sis.com.tw) Initial release
+*/
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/malloc.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/bios32.h>
+#include <linux/compatmac.h>
+
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <asm/processor.h> /* Processor type for cache alignment. */
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <linux/delay.h>
+#include <asm/types.h>
+#include "sis900.h"
+
+
+#if LINUX_VERSION_CODE < 0x20159
+#define dev_free_skb(skb) dev_kfree_skb (skb, FREE_WRITE);
+#else /* Grrr, incompatible changes should change the name. */
+#define dev_free_skb(skb) dev_kfree_skb(skb);
+#endif
+
+static const char *version =
+"sis900.c: modified v1.06.11 4/30/2002";
+
+static int max_interrupt_work = 20;
+static int multicast_filter_limit = 128;
+
+#define sis900_debug debug
+static int sis900_debug = 0;
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT (4*HZ)
+
+enum pci_flags_bit {
+ PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+ PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+struct mac_chip_info {
+ const char *name;
+ u16 vendor_id, device_id, flags;
+ int io_size;
+ struct device *(*probe) (struct mac_chip_info *mac, long ioaddr, int irq,
+ int pci_index, unsigned char pci_device_fn, unsigned char pci_bus, struct device * net_dev);
+};
+static struct device * sis900_mac_probe (struct mac_chip_info * mac, long ioaddr, int irq,
+ int pci_index, unsigned char pci_device_fn,
+ unsigned char pci_bus, struct device * net_dev);
+static struct mac_chip_info mac_chip_table[] = {
+ { "SiS 900 PCI Fast Ethernet", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_900,
+ PCI_COMMAND_IO|PCI_COMMAND_MASTER, SIS900_TOTAL_SIZE, sis900_mac_probe},
+ { "SiS 7016 PCI Fast Ethernet",PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_7016,
+ PCI_COMMAND_IO|PCI_COMMAND_MASTER, SIS900_TOTAL_SIZE, sis900_mac_probe},
+ {0,}, /* 0 terminatted list. */
+};
+
+static void sis900_read_mode(struct device *net_dev, int *speed, int *duplex);
+
+static struct mii_chip_info {
+ const char * name;
+ u16 phy_id0;
+ u16 phy_id1;
+ u8 phy_types;
+#define HOME 0x0001
+#define LAN 0x0002
+#define MIX 0x0003
+} mii_chip_table[] = {
+ { "SiS 900 Internal MII PHY", 0x001d, 0x8000, LAN },
+ { "SiS 7014 Physical Layer Solution", 0x0016, 0xf830, LAN },
+ { "AMD 79C901 10BASE-T PHY", 0x0000, 0x6B70, LAN },
+ { "AMD 79C901 HomePNA PHY", 0x0000, 0x6B90, HOME},
+ { "ICS LAN PHY", 0x0015, 0xF440, LAN },
+ { "NS 83851 PHY", 0x2000, 0x5C20, MIX },
+ { "Realtek RTL8201 PHY", 0x0000, 0x8200, LAN },
+ {0,},
+};
+
+struct mii_phy {
+ struct mii_phy * next;
+ int phy_addr;
+ u16 phy_id0;
+ u16 phy_id1;
+ u16 status;
+ u8 phy_types;
+};
+
+typedef struct _BufferDesc {
+ u32 link;
+ u32 cmdsts;
+ u32 bufptr;
+} BufferDesc;
+
+struct sis900_private {
+ struct device *next_module;
+ struct enet_statistics stats;
+
+ /* struct pci_dev * pci_dev;*/
+ unsigned char pci_bus;
+ unsigned char pci_device_fn;
+ int pci_index;
+
+ struct mac_chip_info * mac;
+ struct mii_phy * mii;
+ struct mii_phy * first_mii; /* record the first mii structure */
+ unsigned int cur_phy;
+
+ struct timer_list timer; /* Link status detection timer. */
+ u8 autong_complete; /* 1: auto-negotiate complete */
+
+ unsigned int cur_rx, dirty_rx; /* producer/comsumer pointers for Tx/Rx ring */
+ unsigned int cur_tx, dirty_tx;
+
+ /* The saved address of a sent/receive-in-place packet buffer */
+ struct sk_buff *tx_skbuff[NUM_TX_DESC];
+ struct sk_buff *rx_skbuff[NUM_RX_DESC];
+ BufferDesc tx_ring[NUM_TX_DESC];
+ BufferDesc rx_ring[NUM_RX_DESC];
+
+ unsigned int tx_full; /* The Tx queue is full. */
+ int LinkOn;
+};
+
+#ifdef MODULE
+#if LINUX_VERSION_CODE > 0x20115
+MODULE_AUTHOR("Jim Huang <cmhuang@sis.com.tw>, Ollie Lho <ollie@sis.com.tw>");
+MODULE_DESCRIPTION("SiS 900 PCI Fast Ethernet driver");
+MODULE_PARM(multicast_filter_limit, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(debug, "i");
+#endif
+#endif
+
+static int sis900_open(struct device *net_dev);
+static int sis900_mii_probe (unsigned char pci_bus, unsigned char pci_device_fn, struct device * net_dev);
+static void sis900_init_rxfilter (struct device * net_dev);
+static u16 read_eeprom(long ioaddr, int location);
+static u16 mdio_read(struct device *net_dev, int phy_id, int location);
+static void mdio_write(struct device *net_dev, int phy_id, int location, int val);
+static void sis900_timer(unsigned long data);
+static void sis900_check_mode (struct device *net_dev, struct mii_phy *mii_phy);
+static void sis900_tx_timeout(struct device *net_dev);
+static void sis900_init_tx_ring(struct device *net_dev);
+static void sis900_init_rx_ring(struct device *net_dev);
+static int sis900_start_xmit(struct sk_buff *skb, struct device *net_dev);
+static int sis900_rx(struct device *net_dev);
+static void sis900_finish_xmit (struct device *net_dev);
+static void sis900_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
+static int sis900_close(struct device *net_dev);
+static int mii_ioctl(struct device *net_dev, struct ifreq *rq, int cmd);
+static struct enet_statistics *sis900_get_stats(struct device *net_dev);
+static u16 sis900_compute_hashtable_index(u8 *addr, u8 revision);
+static void set_rx_mode(struct device *net_dev);
+static void sis900_reset(struct device *net_dev);
+static void sis630_set_eq(struct device *net_dev, u8 revision);
+static u16 sis900_default_phy(struct device * net_dev);
+static void sis900_set_capability( struct device *net_dev ,struct mii_phy *phy);
+static u16 sis900_reset_phy(struct device *net_dev, int phy_addr);
+static void sis900_auto_negotiate(struct device *net_dev, int phy_addr);
+static void sis900_set_mode (long ioaddr, int speed, int duplex);
+
+/* A list of all installed SiS900 devices, for removing the driver module. */
+static struct device *root_sis900_dev = NULL;
+
+#ifdef HAVE_DEVLIST
+struct netdev_entry netcard_drv =
+ {"sis900", sis900_probe, SIS900_TOTAL_SIZE, NULL};
+#endif
+
+/* walk through every ethernet PCI devices to see if some of them are matched with our card list*/
+int sis900_probe (struct device * net_dev)
+{
+ int found = 0;
+ int pci_index = 0;
+ unsigned char pci_bus, pci_device_fn;
+ long ioaddr;
+ int irq;
+
+ if (!pcibios_present())
+ return -ENODEV;
+
+ for (; pci_index < 0xff; pci_index++)
+ {
+ u16 vendor, device, pci_command;
+ struct mac_chip_info *mac;
+
+ if (pcibios_find_class (PCI_CLASS_NETWORK_ETHERNET << 8, pci_index,
+ &pci_bus, &pci_device_fn) != PCIBIOS_SUCCESSFUL)
+ break;
+
+ pcibios_read_config_word(pci_bus, pci_device_fn, PCI_VENDOR_ID, &vendor);
+ pcibios_read_config_word(pci_bus, pci_device_fn, PCI_DEVICE_ID, &device);
+
+ for (mac = mac_chip_table; mac->vendor_id; mac++)
+ {
+ if (vendor == mac->vendor_id && device == mac->device_id) break;
+ }
+
+ /* pci_dev does not match any of our cards */
+ if (mac->vendor_id == 0)
+ continue;
+
+ {
+ u32 pci_ioaddr;
+ u8 pci_irq_line;
+
+ pcibios_read_config_byte(pci_bus, pci_device_fn,
+ PCI_INTERRUPT_LINE, &pci_irq_line);
+ pcibios_read_config_dword(pci_bus, pci_device_fn,
+ PCI_BASE_ADDRESS_0, &pci_ioaddr);
+ ioaddr = pci_ioaddr & ~3;
+ irq = pci_irq_line;
+
+ if ((mac->flags & PCI_USES_IO) &&
+ check_region (pci_ioaddr, mac->io_size))
+ continue;
+
+ pcibios_read_config_word(pci_bus, pci_device_fn,
+ PCI_COMMAND, &pci_command);
+
+ {
+ u8 lat;
+
+ pcibios_read_config_byte(pci_bus, pci_device_fn, PCI_LATENCY_TIMER, &lat);
+ if (lat < 16) {
+ printk("PCI: Increasing latency timer of device %02x:%02x to 64\n",
+ pci_bus, pci_device_fn);
+ pcibios_write_config_byte(pci_bus, pci_device_fn, PCI_LATENCY_TIMER, 64);
+ }
+ }
+ net_dev = mac->probe (mac, ioaddr, irq, pci_index, pci_device_fn, pci_bus, net_dev);
+ if (net_dev != NULL)
+ {
+ found++;
+ }
+ net_dev = NULL;
+ }
+ }
+ return found ? 0 : -ENODEV;
+
+}
+
+/* older SiS900 and friends, use EEPROM to store MAC address */
+static int
+sis900_get_mac_addr(long ioaddr, struct device *net_dev)
+{
+ u16 signature;
+ int i;
+
+ /* check to see if we have sane EEPROM */
+ signature = (u16) read_eeprom(ioaddr, EEPROMSignature);
+ if (signature == 0xffff || signature == 0x0000) {
+ printk (KERN_INFO "%s: Error EERPOM read %x\n",
+ net_dev->name, signature);
+ return 0;
+ }
+
+ /* get MAC address from EEPROM */
+ for (i = 0; i < 3; i++)
+ ((u16 *)(net_dev->dev_addr))[i] = read_eeprom(ioaddr, i+EEPROMMACAddr);
+ return 1;
+}
+
+/* SiS630E model, use APC CMOS RAM to store MAC address */
+static int sis630e_get_mac_addr(long ioaddr, int pci_index, struct device *net_dev)
+{
+ u8 reg;
+ int i;
+ u8 pci_bus, pci_dfn;
+ int not_found;
+
+ not_found = pcibios_find_device(0x1039, 0x0008,
+ pci_index,
+ &pci_bus,
+ &pci_dfn);
+ if (not_found) {
+ printk("%s: Can not find ISA bridge\n", net_dev->name);
+ return 0;
+ }
+ pcibios_read_config_byte(pci_bus, pci_dfn, 0x48, &reg);
+ pcibios_write_config_byte(pci_bus, pci_dfn, 0x48, reg | 0x40);
+
+ for (i = 0; i < 6; i++) {
+ outb(0x09 + i, 0x70);
+ ((u8 *)(net_dev->dev_addr))[i] = inb(0x71);
+ }
+ pcibios_write_config_byte(pci_bus, pci_dfn, 0x48, reg & ~0x40);
+
+ return 1;
+}
+
+/* 635 model : set Mac reload bit and get mac address from rfdr */
+static int sis635_get_mac_addr(struct device *net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ u32 rfcrSave;
+ u32 i;
+
+ rfcrSave = inl(rfcr + ioaddr);
+
+ outl(rfcrSave | RELOAD, ioaddr + cr);
+ outl(0, ioaddr + cr);
+
+ /* disable packet filtering before setting filter */
+ outl(rfcrSave & ~RFEN, rfcr + ioaddr);
+
+ /* load MAC addr to filter data register */
+ for (i = 0 ; i < 3 ; i++) {
+ outl((i << RFADDR_shift), ioaddr + rfcr);
+ *( ((u16 *)net_dev->dev_addr) + i) = inw(ioaddr + rfdr);
+ }
+
+ /* enable packet filitering */
+ outl(rfcrSave | RFEN, rfcr + ioaddr);
+
+ return 1;
+}
+
+
+/**
+ * sis962_get_mac_addr: - Get MAC address for SiS962 model
+ * @pci_dev: the sis900 pci device
+ * @net_dev: the net device to get address for
+ *
+ * SiS962 model, use EEPROM to store MAC address. And EEPROM is shared by
+ * LAN and 1394. When access EEPROM, send EEREQ signal to hardware first
+ * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be access
+ * by LAN, otherwise is not. After MAC address is read from EEPROM, send
+ * EEDONE signal to refuse EEPROM access by LAN.
+ * MAC address is read into @net_dev->dev_addr.
+ */
+
+static int sis962_get_mac_addr(struct device *net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ long ee_addr = ioaddr + mear;
+ u32 waittime = 0;
+ int i;
+
+ outl(EEREQ, ee_addr);
+ while(waittime < 2000) {
+ if(inl(ee_addr) & EEGNT) {
+ /* get MAC address from EEPROM */
+ for (i = 0; i < 3; i++)
+ ((u16 *)(net_dev->dev_addr))[i] = read_eeprom(ioaddr, i+EEPROMMACAddr);
+ outl(EEDONE, ee_addr);
+ return 1;
+ } else {
+ udelay(1);
+ waittime ++;
+ }
+ }
+ outl(EEDONE, ee_addr);
+ return 0;
+}
+
+struct device *
+sis900_mac_probe (struct mac_chip_info *mac, long ioaddr, int irq, int pci_index,
+ unsigned char pci_device_fn, unsigned char pci_bus, struct device * net_dev)
+{
+ struct sis900_private *sis_priv;
+ static int did_version = 0;
+
+ u8 revision;
+ int i, ret = 0;
+
+ if (did_version++ == 0)
+ printk(KERN_INFO "%s\n", version);
+
+ if ((net_dev = init_etherdev(net_dev, 0)) == NULL)
+ return NULL;
+
+ if ((net_dev->priv = kmalloc(sizeof(struct sis900_private), GFP_KERNEL)) == NULL) {
+ unregister_netdev(net_dev);
+ return NULL;
+ }
+
+ sis_priv = net_dev->priv;
+ memset(sis_priv, 0, sizeof(struct sis900_private));
+
+ /* We do a request_region() to register /proc/ioports info. */
+ request_region(ioaddr, mac->io_size, net_dev->name);
+ net_dev->base_addr = ioaddr;
+ net_dev->irq = irq;
+
+ sis_priv->mac = mac;
+ sis_priv->pci_bus = pci_bus;
+ sis_priv->pci_device_fn = pci_device_fn;
+ sis_priv->pci_index = pci_index;
+
+ pcibios_read_config_byte(pci_bus, pci_device_fn, PCI_CLASS_REVISION, &revision);
+
+ if ( revision == SIS630E_900_REV )
+ ret = sis630e_get_mac_addr(ioaddr, pci_index, net_dev);
+ else if ((revision > 0x81) && (revision <= 0x90))
+ ret = sis635_get_mac_addr(net_dev);
+ else if (revision == SIS962_900_REV)
+ ret = sis962_get_mac_addr(net_dev);
+ else
+ ret = sis900_get_mac_addr(ioaddr, net_dev);
+
+ if (ret == 0) {
+ unregister_netdev(net_dev);
+ return NULL;
+ }
+
+ /* print some information about our NIC */
+ printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ", net_dev->name, mac->name,
+ ioaddr, irq);
+ for (i = 0; i < 5; i++)
+ printk("%2.2x:", (u8)net_dev->dev_addr[i]);
+ printk("%2.2x.\n", net_dev->dev_addr[i]);
+
+ /* 630ET : set the mii access mode as software-mode */
+ if (revision == SIS630ET_900_REV)
+ outl(ACCESSMODE | inl(ioaddr + cr), ioaddr + cr);
+
+ /* probe for mii transceiver */
+ if (sis900_mii_probe(pci_bus, pci_device_fn, net_dev) == 0) {
+ unregister_netdev(net_dev);
+ kfree(sis_priv);
+ release_region(ioaddr, mac->io_size);
+ return NULL;
+ }
+
+ sis_priv->next_module = root_sis900_dev;
+ root_sis900_dev = net_dev;
+
+ /* The SiS900-specific entries in the device structure. */
+ net_dev->open = &sis900_open;
+ net_dev->hard_start_xmit = &sis900_start_xmit;
+ net_dev->stop = &sis900_close;
+ net_dev->get_stats = &sis900_get_stats;
+ net_dev->set_multicast_list = &set_rx_mode;
+ net_dev->do_ioctl = &mii_ioctl;
+
+ return net_dev;
+}
+
+/* sis900_mii_probe: - Probe MII PHY for sis900 */
+static int sis900_mii_probe (unsigned char pci_bus, unsigned char pci_device_fn, struct device * net_dev)
+{
+ struct sis900_private * sis_priv = (struct sis900_private *)net_dev->priv;
+ u16 poll_bit = MII_STAT_LINK, status = 0;
+ unsigned int timeout = jiffies + 5 * HZ;
+ int phy_addr;
+ u8 revision;
+
+ sis_priv->mii = NULL;
+
+ /* search for total of 32 possible mii phy addresses */
+ for (phy_addr = 0; phy_addr < 32; phy_addr++) {
+ struct mii_phy * mii_phy = NULL;
+ u16 mii_status;
+ int i;
+
+ for(i=0; i<2; i++)
+ mii_status = mdio_read(net_dev, phy_addr, MII_STATUS);
+
+ if (mii_status == 0xffff || mii_status == 0x0000)
+ /* the mii is not accessable, try next one */
+ continue;
+
+ if ((mii_phy = kmalloc(sizeof(struct mii_phy), GFP_KERNEL)) == NULL) {
+ printk(KERN_INFO "Cannot allocate mem for struct mii_phy\n");
+ return 0;
+ }
+
+ mii_phy->phy_id0 = mdio_read(net_dev, phy_addr, MII_PHY_ID0);
+ mii_phy->phy_id1 = mdio_read(net_dev, phy_addr, MII_PHY_ID1);
+ mii_phy->phy_addr = phy_addr;
+ mii_phy->status = mii_status;
+ mii_phy->next = sis_priv->mii;
+ sis_priv->mii = mii_phy;
+ sis_priv->first_mii = mii_phy;
+
+ for (i=0; mii_chip_table[i].phy_id1; i++)
+ if ( ( mii_phy->phy_id0 == mii_chip_table[i].phy_id0 ) &&
+ ( (mii_phy->phy_id1 & 0xFFF0) == mii_chip_table[i].phy_id1 )){
+
+ mii_phy->phy_types = mii_chip_table[i].phy_types;
+ if(mii_chip_table[i].phy_types == MIX)
+ mii_phy->phy_types =
+ (mii_status & (MII_STAT_CAN_TX_FDX | MII_STAT_CAN_TX))?LAN:HOME;
+ printk(KERN_INFO "%s: %s transceiver found at address %d.\n",
+ net_dev->name, mii_chip_table[i].name, phy_addr);
+ break;
+ }
+
+ if( !mii_chip_table[i].phy_id1 )
+ printk(KERN_INFO "%s: Unknown PHY transceiver found at address %d.\n",
+ net_dev->name, phy_addr);
+ }
+
+ if (sis_priv->mii == NULL) {
+ printk(KERN_INFO "%s: No MII transceivers found!\n",
+ net_dev->name);
+ return 0;
+ }
+
+ /* Slect Default PHY to put in sis_priv->mii & sis_priv->cur_phy */
+ sis_priv->mii = NULL;
+ sis900_default_phy( net_dev );
+
+ /* Reset PHY if default PHY is internal sis900 */
+ if( (sis_priv->mii->phy_id0 == 0x001D) &&
+ ( (sis_priv->mii->phy_id1&0xFFF0) == 0x8000) )
+ status = sis900_reset_phy( net_dev, sis_priv->cur_phy );
+
+ /* workaround for ICS1893 PHY */
+ if ((sis_priv->mii->phy_id0 == 0x0015) &&
+ ((sis_priv->mii->phy_id1&0xFFF0) == 0xF440))
+ mdio_write(net_dev, sis_priv->cur_phy, 0x0018, 0xD200);
+
+ if( status & MII_STAT_LINK ){
+ while (poll_bit)
+ {
+ poll_bit ^= (mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS) & poll_bit);
+ if (jiffies >= timeout)
+ {
+ printk(KERN_WARNING "%s: reset phy and link down now\n", net_dev->name);
+ return -ETIME;
+ }
+ }
+ }
+
+ pcibios_read_config_byte(pci_bus, pci_device_fn, PCI_CLASS_REVISION, &revision);
+ if (revision == SIS630E_900_REV) {
+ /* SiS 630E has some bugs on default value of PHY registers */
+ mdio_write(net_dev, sis_priv->cur_phy, MII_ANADV, 0x05e1);
+ mdio_write(net_dev, sis_priv->cur_phy, MII_CONFIG1, 0x22);
+ mdio_write(net_dev, sis_priv->cur_phy, MII_CONFIG2, 0xff00);
+ mdio_write(net_dev, sis_priv->cur_phy, MII_MASK, 0xffc0);
+ //mdio_write(net_dev, sis_priv->cur_phy, MII_CONTROL, 0x1000);
+ }
+
+ if (sis_priv->mii->status & MII_STAT_LINK)
+ sis_priv->LinkOn = TRUE;
+ else
+ sis_priv->LinkOn = FALSE;
+
+ return 1;
+}
+
+
+/* sis900_default_phy : Select one default PHY for sis900 mac */
+static u16 sis900_default_phy(struct device * net_dev)
+{
+ struct sis900_private * sis_priv = (struct sis900_private *)net_dev->priv;
+ struct mii_phy *phy = NULL, *phy_home = NULL, *default_phy = NULL;
+ u16 status;
+
+ for( phy=sis_priv->first_mii; phy; phy=phy->next ){
+ status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+ status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+
+ /* Link ON & Not select deafalut PHY */
+ if ( (status & MII_STAT_LINK) && !(default_phy) )
+ default_phy = phy;
+ else{
+ status = mdio_read(net_dev, phy->phy_addr, MII_CONTROL);
+ mdio_write(net_dev, phy->phy_addr, MII_CONTROL,
+ status | MII_CNTL_AUTO | MII_CNTL_ISOLATE);
+ if( phy->phy_types == HOME )
+ phy_home = phy;
+ }
+ }
+
+ if( (!default_phy) && phy_home )
+ default_phy = phy_home;
+ else if(!default_phy)
+ default_phy = sis_priv->first_mii;
+
+ if( sis_priv->mii != default_phy ){
+ sis_priv->mii = default_phy;
+ sis_priv->cur_phy = default_phy->phy_addr;
+ printk(KERN_INFO "%s: Using transceiver found at address %d as default\n", net_dev->name,sis_priv->cur_phy);
+ }
+
+ status = mdio_read(net_dev, sis_priv->cur_phy, MII_CONTROL);
+ status &= (~MII_CNTL_ISOLATE);
+
+ mdio_write(net_dev, sis_priv->cur_phy, MII_CONTROL, status);
+ status = mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS);
+ status = mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS);
+
+ return status;
+}
+
+
+/* sis900_set_capability : set the media capability of network adapter */
+static void sis900_set_capability( struct device *net_dev , struct mii_phy *phy )
+{
+ u16 cap;
+ u16 status;
+
+ status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+ status = mdio_read(net_dev, phy->phy_addr, MII_STATUS);
+
+ cap = MII_NWAY_CSMA_CD |
+ ((phy->status & MII_STAT_CAN_TX_FDX)? MII_NWAY_TX_FDX:0) |
+ ((phy->status & MII_STAT_CAN_TX) ? MII_NWAY_TX:0) |
+ ((phy->status & MII_STAT_CAN_T_FDX) ? MII_NWAY_T_FDX:0)|
+ ((phy->status & MII_STAT_CAN_T) ? MII_NWAY_T:0);
+
+ mdio_write( net_dev, phy->phy_addr, MII_ANADV, cap );
+}
+
+
+/* Delay between EEPROM clock transitions. */
+#define eeprom_delay() inl(ee_addr)
+
+/* Read Serial EEPROM through EEPROM Access Register, Note that location is
+ in word (16 bits) unit */
+static u16 read_eeprom(long ioaddr, int location)
+{
+ int i;
+ u16 retval = 0;
+ long ee_addr = ioaddr + mear;
+ u32 read_cmd = location | EEread;
+
+ outl(0, ee_addr);
+ eeprom_delay();
+ outl(EECS, ee_addr);
+ eeprom_delay();
+
+ /* Shift the read command (9) bits out. */
+ for (i = 8; i >= 0; i--) {
+ u32 dataval = (read_cmd & (1 << i)) ? EEDI | EECS : EECS;
+ outl(dataval, ee_addr);
+ eeprom_delay();
+ outl(dataval | EECLK, ee_addr);
+ eeprom_delay();
+ }
+ outb(EECS, ee_addr);
+ eeprom_delay();
+
+ /* read the 16-bits data in */
+ for (i = 16; i > 0; i--) {
+ outl(EECS, ee_addr);
+ eeprom_delay();
+ outl(EECS | EECLK, ee_addr);
+ eeprom_delay();
+ retval = (retval << 1) | ((inl(ee_addr) & EEDO) ? 1 : 0);
+ eeprom_delay();
+ }
+
+ /* Terminate the EEPROM access. */
+ outl(0, ee_addr);
+ eeprom_delay();
+// outl(EECLK, ee_addr);
+
+ return (retval);
+}
+
+/* Read and write the MII management registers using software-generated
+ serial MDIO protocol. Note that the command bits and data bits are
+ send out seperately */
+#define mdio_delay() inl(mdio_addr)
+
+static void mdio_idle(long mdio_addr)
+{
+ outl(MDIO | MDDIR, mdio_addr);
+ mdio_delay();
+ outl(MDIO | MDDIR | MDC, mdio_addr);
+}
+
+/* Syncronize the MII management interface by shifting 32 one bits out. */
+static void mdio_reset(long mdio_addr)
+{
+ int i;
+
+ for (i = 31; i >= 0; i--) {
+ outl(MDDIR | MDIO, mdio_addr);
+ mdio_delay();
+ outl(MDDIR | MDIO | MDC, mdio_addr);
+ mdio_delay();
+ }
+ return;
+}
+
+static u16 mdio_read(struct device *net_dev, int phy_id, int location)
+{
+ long mdio_addr = net_dev->base_addr + mear;
+ int mii_cmd = MIIread|(phy_id<<MIIpmdShift)|(location<<MIIregShift);
+ u16 retval = 0;
+ int i;
+
+ mdio_reset(mdio_addr);
+ mdio_idle(mdio_addr);
+
+ for (i = 15; i >= 0; i--) {
+ int dataval = (mii_cmd & (1 << i)) ? MDDIR | MDIO : MDDIR;
+ outl(dataval, mdio_addr);
+ mdio_delay();
+ outl(dataval | MDC, mdio_addr);
+ mdio_delay();
+ }
+
+ /* Read the 16 data bits. */
+ for (i = 16; i > 0; i--) {
+ outl(0, mdio_addr);
+ mdio_delay();
+ retval = (retval << 1) | ((inl(mdio_addr) & MDIO) ? 1 : 0);
+ outl(MDC, mdio_addr);
+ mdio_delay();
+ }
+ outl(0x00, mdio_addr);
+
+ return retval;
+}
+
+static void mdio_write(struct device *net_dev, int phy_id, int location, int value)
+{
+ long mdio_addr = net_dev->base_addr + mear;
+ int mii_cmd = MIIwrite|(phy_id<<MIIpmdShift)|(location<<MIIregShift);
+ int i;
+
+ mdio_reset(mdio_addr);
+ mdio_idle(mdio_addr);
+
+ /* Shift the command bits out. */
+ for (i = 15; i >= 0; i--) {
+ int dataval = (mii_cmd & (1 << i)) ? MDDIR | MDIO : MDDIR;
+ outb(dataval, mdio_addr);
+ mdio_delay();
+ outb(dataval | MDC, mdio_addr);
+ mdio_delay();
+ }
+ mdio_delay();
+
+ /* Shift the value bits out. */
+ for (i = 15; i >= 0; i--) {
+ int dataval = (value & (1 << i)) ? MDDIR | MDIO : MDDIR;
+ outl(dataval, mdio_addr);
+ mdio_delay();
+ outl(dataval | MDC, mdio_addr);
+ mdio_delay();
+ }
+ mdio_delay();
+
+ /* Clear out extra bits. */
+ for (i = 2; i > 0; i--) {
+ outb(0, mdio_addr);
+ mdio_delay();
+ outb(MDC, mdio_addr);
+ mdio_delay();
+ }
+ outl(0x00, mdio_addr);
+
+ return;
+}
+
+static u16 sis900_reset_phy(struct device *net_dev, int phy_addr)
+{
+ int i = 0;
+ u16 status;
+
+ while (i++ < 2)
+ status = mdio_read(net_dev, phy_addr, MII_STATUS);
+
+ mdio_write( net_dev, phy_addr, MII_CONTROL, MII_CNTL_RESET );
+
+ return status;
+}
+
+static int
+sis900_open(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ u8 revision;
+
+ /* Soft reset the chip. */
+ sis900_reset(net_dev);
+
+ /* Equalizer workaroung Rule */
+ pcibios_read_config_byte(sis_priv->pci_bus, sis_priv->pci_device_fn, PCI_CLASS_REVISION, &revision);
+ sis630_set_eq(net_dev, revision);
+
+ if (request_irq(net_dev->irq, &sis900_interrupt, SA_SHIRQ, net_dev->name, net_dev)) {
+ return -EAGAIN;
+ }
+
+ MOD_INC_USE_COUNT;
+
+ sis900_init_rxfilter(net_dev);
+
+ sis900_init_tx_ring(net_dev);
+ sis900_init_rx_ring(net_dev);
+
+ set_rx_mode(net_dev);
+
+ net_dev->tbusy = 0;
+ net_dev->interrupt = 0;
+ net_dev->start = 1;
+
+ /* Workaround for EDB */
+ sis900_set_mode(ioaddr, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
+
+ /* Enable all known interrupts by setting the interrupt mask. */
+ outl((RxSOVR|RxORN|RxERR|RxOK|TxURN|TxERR|TxIDLE), ioaddr + imr);
+ outl(RxENA | inl(ioaddr + cr), ioaddr + cr);
+ outl(IE, ioaddr + ier);
+
+ sis900_check_mode(net_dev, sis_priv->mii);
+
+ /* Set the timer to switch to check for link beat and perhaps switch
+ to an alternate media type. */
+ init_timer(&sis_priv->timer);
+ sis_priv->timer.expires = jiffies + HZ;
+ sis_priv->timer.data = (unsigned long)net_dev;
+ sis_priv->timer.function = &sis900_timer;
+ add_timer(&sis_priv->timer);
+
+ return 0;
+}
+
+/* set receive filter address to our MAC address */
+static void
+sis900_init_rxfilter (struct device * net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ u32 rfcrSave;
+ u32 i;
+
+ rfcrSave = inl(rfcr + ioaddr);
+
+ /* disable packet filtering before setting filter */
+ outl(rfcrSave & ~RFEN, rfcr + ioaddr);
+
+ /* load MAC addr to filter data register */
+ for (i = 0 ; i < 3 ; i++) {
+ u32 w;
+
+ w = (u32) *((u16 *)(net_dev->dev_addr)+i);
+ outl((i << RFADDR_shift), ioaddr + rfcr);
+ outl(w, ioaddr + rfdr);
+
+ if (sis900_debug > 2) {
+ printk(KERN_INFO "%s: Receive Filter Addrss[%d]=%x\n",
+ net_dev->name, i, inl(ioaddr + rfdr));
+ }
+ }
+
+ /* enable packet filitering */
+ outl(rfcrSave | RFEN, rfcr + ioaddr);
+}
+
+/* Initialize the Tx ring. */
+static void
+sis900_init_tx_ring(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ int i;
+
+ sis_priv->tx_full = 0;
+ sis_priv->dirty_tx = sis_priv->cur_tx = 0;
+
+ for (i = 0; i < NUM_TX_DESC; i++) {
+ sis_priv->tx_skbuff[i] = NULL;
+
+ sis_priv->tx_ring[i].link = (u32) virt_to_bus(&sis_priv->tx_ring[i+1]);
+ sis_priv->tx_ring[i].cmdsts = 0;
+ sis_priv->tx_ring[i].bufptr = 0;
+ }
+ sis_priv->tx_ring[i-1].link = (u32) virt_to_bus(&sis_priv->tx_ring[0]);
+
+ /* load Transmit Descriptor Register */
+ outl(virt_to_bus(&sis_priv->tx_ring[0]), ioaddr + txdp);
+ if (sis900_debug > 2)
+ printk(KERN_INFO "%s: TX descriptor register loaded with: %8.8x\n",
+ net_dev->name, inl(ioaddr + txdp));
+}
+
+/* Initialize the Rx descriptor ring, pre-allocate recevie buffers */
+static void
+sis900_init_rx_ring(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ int i;
+
+ sis_priv->cur_rx = 0;
+ sis_priv->dirty_rx = 0;
+
+ /* init RX descriptor */
+ for (i = 0; i < NUM_RX_DESC; i++) {
+ sis_priv->rx_skbuff[i] = NULL;
+
+ sis_priv->rx_ring[i].link = (u32) virt_to_bus(&sis_priv->rx_ring[i+1]);
+ sis_priv->rx_ring[i].cmdsts = 0;
+ sis_priv->rx_ring[i].bufptr = 0;
+ }
+ sis_priv->rx_ring[i-1].link = (u32) virt_to_bus(&sis_priv->rx_ring[0]);
+
+ /* allocate sock buffers */
+ for (i = 0; i < NUM_RX_DESC; i++) {
+ struct sk_buff *skb;
+
+ if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) {
+ /* not enough memory for skbuff, this makes a "hole"
+ on the buffer ring, it is not clear how the
+ hardware will react to this kind of degenerated
+ buffer */
+ break;
+ }
+ skb->dev = net_dev;
+ sis_priv->rx_skbuff[i] = skb;
+ sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE;
+ sis_priv->rx_ring[i].bufptr = virt_to_bus(skb->tail);
+ }
+ sis_priv->dirty_rx = (unsigned int) (i - NUM_RX_DESC);
+
+ /* load Receive Descriptor Register */
+ outl(virt_to_bus(&sis_priv->rx_ring[0]), ioaddr + rxdp);
+ if (sis900_debug > 2)
+ printk(KERN_INFO "%s: RX descriptor register loaded with: %8.8x\n",
+ net_dev->name, inl(ioaddr + rxdp));
+}
+
+/**
+ * sis630_set_eq: - set phy equalizer value for 630 LAN
+ * @net_dev: the net device to set equalizer value
+ * @revision: 630 LAN revision number
+ *
+ * 630E equalizer workaround rule(Cyrus Huang 08/15)
+ * PHY register 14h(Test)
+ * Bit 14: 0 -- Automatically dectect (default)
+ * 1 -- Manually set Equalizer filter
+ * Bit 13: 0 -- (Default)
+ * 1 -- Speed up convergence of equalizer setting
+ * Bit 9 : 0 -- (Default)
+ * 1 -- Disable Baseline Wander
+ * Bit 3~7 -- Equalizer filter setting
+ * Link ON: Set Bit 9, 13 to 1, Bit 14 to 0
+ * Then calculate equalizer value
+ * Then set equalizer value, and set Bit 14 to 1, Bit 9 to 0
+ * Link Off:Set Bit 13 to 1, Bit 14 to 0
+ * Calculate Equalizer value:
+ * When Link is ON and Bit 14 is 0, SIS900PHY will auto-dectect proper equalizer value.
+ * When the equalizer is stable, this value is not a fixed value. It will be within
+ * a small range(eg. 7~9). Then we get a minimum and a maximum value(eg. min=7, max=9)
+ * 0 <= max <= 4 --> set equalizer to max
+ * 5 <= max <= 14 --> set equalizer to max+1 or set equalizer to max+2 if max == min
+ * max >= 15 --> set equalizer to max+5 or set equalizer to max+6 if max == min
+ */
+
+static void sis630_set_eq(struct device *net_dev, u8 revision)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ u16 reg14h, eq_value, max_value=0, min_value=0;
+ u8 host_bridge_rev;
+ int i, maxcount=10;
+ int not_found;
+ u8 pci_bus, pci_device_fn;
+
+ if ( !(revision == SIS630E_900_REV || revision == SIS630EA1_900_REV ||
+ revision == SIS630A_900_REV || revision == SIS630ET_900_REV) )
+ return;
+ not_found = pcibios_find_device(SIS630_VENDOR_ID, SIS630_DEVICE_ID,
+ sis_priv->pci_index,
+ &pci_bus,
+ &pci_device_fn);
+ if (not_found)
+ pcibios_read_config_byte(pci_bus, pci_device_fn, PCI_CLASS_REVISION, &host_bridge_rev);
+
+ if (sis_priv->LinkOn) {
+ reg14h=mdio_read(net_dev, sis_priv->cur_phy, MII_RESV);
+ mdio_write(net_dev, sis_priv->cur_phy, MII_RESV, (0x2200 | reg14h) & 0xBFFF);
+ for (i=0; i < maxcount; i++) {
+ eq_value=(0x00F8 & mdio_read(net_dev, sis_priv->cur_phy, MII_RESV)) >> 3;
+ if (i == 0)
+ max_value=min_value=eq_value;
+ max_value=(eq_value > max_value) ? eq_value : max_value;
+ min_value=(eq_value < min_value) ? eq_value : min_value;
+ }
+ /* 630E rule to determine the equalizer value */
+ if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV ||
+ revision == SIS630ET_900_REV) {
+ if (max_value < 5)
+ eq_value=max_value;
+ else if (max_value >= 5 && max_value < 15)
+ eq_value=(max_value == min_value) ? max_value+2 : max_value+1;
+ else if (max_value >= 15)
+ eq_value=(max_value == min_value) ? max_value+6 : max_value+5;
+ }
+ /* 630B0&B1 rule to determine the equalizer value */
+ if (revision == SIS630A_900_REV &&
+ (host_bridge_rev == SIS630B0 || host_bridge_rev == SIS630B1)) {
+ if (max_value == 0)
+ eq_value=3;
+ else
+ eq_value=(max_value+min_value+1)/2;
+ }
+ /* write equalizer value and setting */
+ reg14h=mdio_read(net_dev, sis_priv->cur_phy, MII_RESV);
+ reg14h=(reg14h & 0xFF07) | ((eq_value << 3) & 0x00F8);
+ reg14h=(reg14h | 0x6000) & 0xFDFF;
+ mdio_write(net_dev, sis_priv->cur_phy, MII_RESV, reg14h);
+ }
+ else {
+ reg14h=mdio_read(net_dev, sis_priv->cur_phy, MII_RESV);
+ if (revision == SIS630A_900_REV &&
+ (host_bridge_rev == SIS630B0 || host_bridge_rev == SIS630B1))
+ mdio_write(net_dev, sis_priv->cur_phy, MII_RESV, (reg14h | 0x2200) & 0xBFFF);
+ else
+ mdio_write(net_dev, sis_priv->cur_phy, MII_RESV, (reg14h | 0x2000) & 0xBFFF);
+ }
+ return;
+}
+
+
+/* on each timer ticks we check two things, Link Status (ON/OFF) and
+ Link Mode (10/100/Full/Half)
+*/
+static void sis900_timer(unsigned long data)
+{
+ struct device *net_dev = (struct device *)data;
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ struct mii_phy *mii_phy = sis_priv->mii;
+ static int next_tick = 5*HZ;
+ u16 status;
+ u8 revision;
+
+ if(!sis_priv->autong_complete){
+ int speed, duplex = 0;
+
+ sis900_read_mode(net_dev, &speed, &duplex);
+ if(duplex){
+ sis900_set_mode(net_dev->base_addr, speed, duplex);
+ pcibios_read_config_byte(sis_priv->pci_bus, sis_priv->pci_device_fn, PCI_CLASS_REVISION, &revision);
+ sis630_set_eq(net_dev, revision);
+ }
+
+ sis_priv->timer.expires = jiffies + HZ;
+ add_timer(&sis_priv->timer);
+ return;
+ }
+
+ status = mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS);
+ status = mdio_read(net_dev, sis_priv->cur_phy, MII_STATUS);
+
+ /* Link OFF -> ON */
+ if ( !sis_priv->LinkOn ) {
+LookForLink:
+ /* Search for new PHY */
+ status = sis900_default_phy( net_dev );
+ mii_phy = sis_priv->mii;
+
+ if( status & MII_STAT_LINK ){
+ sis900_check_mode(net_dev, mii_phy);
+ sis_priv->LinkOn = TRUE;
+ }
+ }
+ /* Link ON -> OFF */
+ else{
+ if( !(status & MII_STAT_LINK) ){
+ sis_priv->LinkOn = FALSE;
+ printk(KERN_INFO "%s: Media Link Off\n", net_dev->name);
+
+ /* Change mode issue */
+ if( (mii_phy->phy_id0 == 0x001D) &&
+ ( (mii_phy->phy_id1 & 0xFFF0) == 0x8000 ))
+ sis900_reset_phy( net_dev, sis_priv->cur_phy );
+
+ pcibios_read_config_byte(sis_priv->pci_bus, sis_priv->pci_device_fn, PCI_CLASS_REVISION, &revision);
+ sis630_set_eq(net_dev, revision);
+
+ goto LookForLink;
+ }
+ }
+
+ sis_priv->timer.expires = jiffies + next_tick;
+ add_timer(&sis_priv->timer);
+}
+
+static void sis900_check_mode (struct device *net_dev, struct mii_phy *mii_phy)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ int speed, duplex;
+
+ if( mii_phy->phy_types == LAN ){
+ outl( ~EXD & inl( ioaddr + cfg ), ioaddr + cfg);
+ sis900_set_capability(net_dev , mii_phy);
+ sis900_auto_negotiate(net_dev, sis_priv->cur_phy);
+ }else{
+ outl(EXD | inl( ioaddr + cfg ), ioaddr + cfg);
+ speed = HW_SPEED_HOME;
+ duplex = FDX_CAPABLE_HALF_SELECTED;
+ sis900_set_mode(net_dev->base_addr, speed, duplex);
+ sis_priv->autong_complete = 1;
+ }
+}
+
+static void sis900_set_mode (long ioaddr, int speed, int duplex)
+{
+ u32 tx_flags = 0, rx_flags = 0;
+
+ if( inl(ioaddr + cfg) & EDB_MASTER_EN ){
+ tx_flags = TxATP | (DMA_BURST_64 << TxMXDMA_shift) | (TX_FILL_THRESH << TxFILLT_shift);
+ rx_flags = DMA_BURST_64 << RxMXDMA_shift;
+ }
+ else{
+ tx_flags = TxATP | (DMA_BURST_512 << TxMXDMA_shift) | (TX_FILL_THRESH << TxFILLT_shift);
+ rx_flags = DMA_BURST_512 << RxMXDMA_shift;
+ }
+
+ if (speed == HW_SPEED_HOME || speed == HW_SPEED_10_MBPS ) {
+ rx_flags |= (RxDRNT_10 << RxDRNT_shift);
+ tx_flags |= (TxDRNT_10 << TxDRNT_shift);
+ }
+ else {
+ rx_flags |= (RxDRNT_100 << RxDRNT_shift);
+ tx_flags |= (TxDRNT_100 << TxDRNT_shift);
+ }
+
+ if (duplex == FDX_CAPABLE_FULL_SELECTED) {
+ tx_flags |= (TxCSI | TxHBI);
+ rx_flags |= RxATX;
+ }
+
+ outl (tx_flags, ioaddr + txcfg);
+ outl (rx_flags, ioaddr + rxcfg);
+}
+
+
+static void sis900_auto_negotiate(struct device *net_dev, int phy_addr)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ int i = 0;
+ u32 status;
+
+ while (i++ < 2)
+ status = mdio_read(net_dev, phy_addr, MII_STATUS);
+
+ if (!(status & MII_STAT_LINK)){
+ printk(KERN_INFO "%s: Media Link Off\n", net_dev->name);
+ sis_priv->autong_complete = 1;
+ sis_priv->LinkOn = FALSE;
+ return;
+ }
+
+ /* (Re)start AutoNegotiate */
+ mdio_write(net_dev, phy_addr, MII_CONTROL,
+ MII_CNTL_AUTO | MII_CNTL_RST_AUTO);
+ sis_priv->autong_complete = 0;
+}
+
+
+static void sis900_read_mode(struct device *net_dev, int *speed, int *duplex)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ struct mii_phy *phy = sis_priv->mii;
+ int phy_addr = sis_priv->cur_phy;
+ u32 status;
+ u16 autoadv, autorec;
+ int i = 0;
+
+ while (i++ < 2)
+ status = mdio_read(net_dev, phy_addr, MII_STATUS);
+
+ if (!(status & MII_STAT_LINK)) return;
+
+ /* AutoNegotiate completed */
+ autoadv = mdio_read(net_dev, phy_addr, MII_ANADV);
+ autorec = mdio_read(net_dev, phy_addr, MII_ANLPAR);
+ status = autoadv & autorec;
+
+ *speed = HW_SPEED_10_MBPS;
+ *duplex = FDX_CAPABLE_HALF_SELECTED;
+
+ if (status & (MII_NWAY_TX | MII_NWAY_TX_FDX))
+ *speed = HW_SPEED_100_MBPS;
+ if (status & ( MII_NWAY_TX_FDX | MII_NWAY_T_FDX))
+ *duplex = FDX_CAPABLE_FULL_SELECTED;
+
+ sis_priv->autong_complete = 1;
+
+ /* Workaround for Realtek RTL8201 PHY issue */
+ if((phy->phy_id0 == 0x0000) && ((phy->phy_id1 & 0xFFF0) == 0x8200)){
+ if(mdio_read(net_dev, phy_addr, MII_CONTROL) & MII_CNTL_FDX)
+ *duplex = FDX_CAPABLE_FULL_SELECTED;
+ if(mdio_read(net_dev, phy_addr, 0x0019) & 0x01)
+ *speed = HW_SPEED_100_MBPS;
+ }
+
+ printk(KERN_INFO "%s: Media Link On %s %s-duplex \n",
+ net_dev->name,
+ *speed == HW_SPEED_100_MBPS ?
+ "100mbps" : "10mbps",
+ *duplex == FDX_CAPABLE_FULL_SELECTED ?
+ "full" : "half");
+}
+
+
+static void sis900_tx_timeout(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ int i;
+
+ printk(KERN_INFO "%s: Transmit timeout, status %8.8x %8.8x \n",
+ net_dev->name, inl(ioaddr + cr), inl(ioaddr + isr));
+
+ /* Disable interrupts by clearing the interrupt mask. */
+ outl(0x0000, ioaddr + imr);
+
+ /* discard unsent packets, should this code section be protected by
+ cli(), sti() ?? */
+ sis_priv->dirty_tx = sis_priv->cur_tx = 0;
+ for (i = 0; i < NUM_TX_DESC; i++) {
+ if (sis_priv->tx_skbuff[i] != NULL) {
+ dev_free_skb(sis_priv->tx_skbuff[i]);
+ sis_priv->tx_skbuff[i] = 0;
+ sis_priv->tx_ring[i].cmdsts = 0;
+ sis_priv->tx_ring[i].bufptr = 0;
+ sis_priv->stats.tx_dropped++;
+ }
+ }
+ net_dev->trans_start = jiffies;
+ net_dev->tbusy = sis_priv->tx_full = 0;
+
+ /* FIXME: Should we restart the transmission thread here ?? */
+ outl(TxENA | inl(ioaddr + cr), ioaddr + cr);
+
+ /* Enable all known interrupts by setting the interrupt mask. */
+ outl((RxSOVR|RxORN|RxERR|RxOK|TxURN|TxERR|TxIDLE), ioaddr + imr);
+ return;
+}
+
+static int
+sis900_start_xmit(struct sk_buff *skb, struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ unsigned int entry;
+
+ /* test tbusy to see if we have timeout situation then set it */
+ if (test_and_set_bit(0, (void*)&net_dev->tbusy) != 0) {
+ if (jiffies - net_dev->trans_start > TX_TIMEOUT)
+ sis900_tx_timeout(net_dev);
+ return 1;
+ }
+
+ /* Calculate the next Tx descriptor entry. */
+ entry = sis_priv->cur_tx % NUM_TX_DESC;
+ sis_priv->tx_skbuff[entry] = skb;
+
+ /* set the transmit buffer descriptor and enable Transmit State Machine */
+ sis_priv->tx_ring[entry].bufptr = virt_to_bus(skb->data);
+ sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len);
+ outl(TxENA | inl(ioaddr + cr), ioaddr + cr);
+
+ if (++sis_priv->cur_tx - sis_priv->dirty_tx < NUM_TX_DESC) {
+ /* Typical path, clear tbusy to indicate more
+ transmission is possible */
+ clear_bit(0, (void*)&net_dev->tbusy);
+ } else {
+ /* no more transmit descriptor avaiable, tbusy remain set */
+ sis_priv->tx_full = 1;
+ }
+
+ net_dev->trans_start = jiffies;
+
+ {
+ int i;
+ for (i = 0; i < 100000; i++); /* GRUIIIIIK */
+ }
+
+ if (sis900_debug > 3)
+ printk(KERN_INFO "%s: Queued Tx packet at %p size %d "
+ "to slot %d.\n",
+ net_dev->name, skb->data, (int)skb->len, entry);
+
+ return 0;
+}
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+ after the Tx thread. */
+static void sis900_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
+{
+ struct device *net_dev = (struct device *)dev_instance;
+ int boguscnt = max_interrupt_work;
+ long ioaddr = net_dev->base_addr;
+ u32 status;
+
+#if defined(__i386__)
+ /* A lock to prevent simultaneous entry bug on Intel SMP machines. */
+ if (test_and_set_bit(0, (void*)&net_dev->interrupt)) {
+ printk(KERN_INFO "%s: SMP simultaneous entry of "
+ "an interrupt handler.\n", net_dev->name);
+ net_dev->interrupt = 0; /* Avoid halting machine. */
+ return;
+ }
+#else
+ if (net_dev->interrupt) {
+ printk(KERN_INFO "%s: Re-entering the interrupt handler.\n",
+ net_dev->name);
+ return;
+ }
+ net_dev->interrupt = 1;
+#endif
+
+ do {
+ status = inl(ioaddr + isr);
+
+ if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0)
+ /* nothing intresting happened */
+ break;
+
+ /* why dow't we break after Tx/Rx case ?? keyword: full-duplex */
+ if (status & (RxORN | RxERR | RxOK))
+ /* Rx interrupt */
+ sis900_rx(net_dev);
+
+ if (status & (TxURN | TxERR | TxIDLE))
+ /* Tx interrupt */
+ sis900_finish_xmit(net_dev);
+
+ /* something strange happened !!! */
+ if (status & HIBERR) {
+ printk(KERN_INFO "%s: Abnormal interrupt,"
+ "status %#8.8x.\n", net_dev->name, status);
+ break;
+ }
+ if (--boguscnt < 0) {
+ printk(KERN_INFO "%s: Too much work at interrupt, "
+ "interrupt status = %#8.8x.\n",
+ net_dev->name, status);
+ break;
+ }
+ } while (1);
+
+ if (sis900_debug > 4)
+ printk(KERN_INFO "%s: exiting interrupt, "
+ "interrupt status = 0x%#8.8x.\n",
+ net_dev->name, inl(ioaddr + isr));
+
+#if defined(__i386__)
+ clear_bit(0, (void*)&net_dev->interrupt);
+#else
+ net_dev->interrupt = 0;
+#endif
+ return;
+}
+
+/* Process receive interrupt events, put buffer to higher layer and refill buffer pool
+ Note: This fucntion is called by interrupt handler, don't do "too much" work here */
+static int sis900_rx(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ long ioaddr = net_dev->base_addr;
+ unsigned int entry = sis_priv->cur_rx % NUM_RX_DESC;
+ u32 rx_status = sis_priv->rx_ring[entry].cmdsts;
+
+ if (sis900_debug > 4)
+ printk(KERN_INFO "sis900_rx, cur_rx:%4.4d, dirty_rx:%4.4d "
+ "status:0x%8.8x\n",
+ sis_priv->cur_rx, sis_priv->dirty_rx, rx_status);
+
+ while (rx_status & OWN) {
+ unsigned int rx_size;
+
+ rx_size = (rx_status & DSIZE) - CRC_SIZE;
+
+ if (rx_status & (ABORT|OVERRUN|TOOLONG|RUNT|RXISERR|CRCERR|FAERR)) {
+ /* corrupted packet received */
+ if (sis900_debug > 4)
+ printk(KERN_INFO "%s: Corrupted packet "
+ "received, buffer status = 0x%8.8x.\n",
+ net_dev->name, rx_status);
+ sis_priv->stats.rx_errors++;
+ if (rx_status & OVERRUN)
+ sis_priv->stats.rx_over_errors++;
+ if (rx_status & (TOOLONG|RUNT))
+ sis_priv->stats.rx_length_errors++;
+ if (rx_status & (RXISERR | FAERR))
+ sis_priv->stats.rx_frame_errors++;
+ if (rx_status & CRCERR)
+ sis_priv->stats.rx_crc_errors++;
+ /* reset buffer descriptor state */
+ sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
+ } else {
+ struct sk_buff * skb;
+
+ /* This situation should never happen, but due to
+ some unknow bugs, it is possible that
+ we are working on NULL sk_buff :-( */
+ if (sis_priv->rx_skbuff[entry] == NULL) {
+ printk(KERN_INFO "%s: NULL pointer "
+ "encountered in Rx ring, skipping\n",
+ net_dev->name);
+ break;
+ }
+
+ /* gvie the socket buffer to upper layers */
+ skb = sis_priv->rx_skbuff[entry];
+ skb_put(skb, rx_size);
+ skb->protocol = eth_type_trans(skb, net_dev);
+ netif_rx(skb);
+
+ /* some network statistics */
+ if ((rx_status & BCAST) == MCAST)
+ sis_priv->stats.multicast++;
+ net_dev->last_rx = jiffies;
+ /* sis_priv->stats.rx_bytes += rx_size;*/
+ sis_priv->stats.rx_packets++;
+
+ /* refill the Rx buffer, what if there is not enought memory for
+ new socket buffer ?? */
+ if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) {
+ /* not enough memory for skbuff, this makes a "hole"
+ on the buffer ring, it is not clear how the
+ hardware will react to this kind of degenerated
+ buffer */
+ printk(KERN_INFO "%s: Memory squeeze,"
+ "deferring packet.\n",
+ net_dev->name);
+ sis_priv->rx_skbuff[entry] = NULL;
+ /* reset buffer descriptor state */
+ sis_priv->rx_ring[entry].cmdsts = 0;
+ sis_priv->rx_ring[entry].bufptr = 0;
+ sis_priv->stats.rx_dropped++;
+ break;
+ }
+ skb->dev = net_dev;
+ sis_priv->rx_skbuff[entry] = skb;
+ sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
+ sis_priv->rx_ring[entry].bufptr = virt_to_bus(skb->tail);
+ sis_priv->dirty_rx++;
+ }
+ sis_priv->cur_rx++;
+ entry = sis_priv->cur_rx % NUM_RX_DESC;
+ rx_status = sis_priv->rx_ring[entry].cmdsts;
+ } // while
+
+ /* refill the Rx buffer, what if the rate of refilling is slower than
+ consuming ?? */
+ for (;sis_priv->cur_rx - sis_priv->dirty_rx > 0; sis_priv->dirty_rx++) {
+ struct sk_buff *skb;
+
+ entry = sis_priv->dirty_rx % NUM_RX_DESC;
+
+ if (sis_priv->rx_skbuff[entry] == NULL) {
+ if ((skb = dev_alloc_skb(RX_BUF_SIZE)) == NULL) {
+ /* not enough memory for skbuff, this makes a "hole"
+ on the buffer ring, it is not clear how the
+ hardware will react to this kind of degenerated
+ buffer */
+ printk(KERN_INFO "%s: Memory squeeze,"
+ "deferring packet.\n",
+ net_dev->name);
+ sis_priv->stats.rx_dropped++;
+ break;
+ }
+ skb->dev = net_dev;
+ sis_priv->rx_skbuff[entry] = skb;
+ sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
+ sis_priv->rx_ring[entry].bufptr = virt_to_bus(skb->tail);
+ }
+ }
+
+ /* re-enable the potentially idle receive state matchine */
+ outl(RxENA | inl(ioaddr + cr), ioaddr + cr );
+
+ return 0;
+}
+
+/* finish up transmission of packets, check for error condition and free skbuff etc.
+ Note: This fucntion is called by interrupt handler, don't do "too much" work here */
+static void sis900_finish_xmit (struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+
+ for (; sis_priv->dirty_tx < sis_priv->cur_tx; sis_priv->dirty_tx++) {
+ unsigned int entry;
+ u32 tx_status;
+
+ entry = sis_priv->dirty_tx % NUM_TX_DESC;
+ tx_status = sis_priv->tx_ring[entry].cmdsts;
+
+ if (tx_status & OWN) {
+ /* The packet is not transmitted yet (owned by hardware) !
+ Note: the interrupt is generated only when Tx Machine
+ is idle, so this is an almost impossible case */
+ break;
+ }
+
+ if (tx_status & (ABORT | UNDERRUN | OWCOLL)) {
+ /* packet unsuccessfully transmitted */
+ if (sis900_debug > 4)
+ printk(KERN_INFO "%s: Transmit "
+ "error, Tx status %8.8x.\n",
+ net_dev->name, tx_status);
+ sis_priv->stats.tx_errors++;
+ if (tx_status & UNDERRUN)
+ sis_priv->stats.tx_fifo_errors++;
+ if (tx_status & ABORT)
+ sis_priv->stats.tx_aborted_errors++;
+ if (tx_status & NOCARRIER)
+ sis_priv->stats.tx_carrier_errors++;
+ if (tx_status & OWCOLL)
+ sis_priv->stats.tx_window_errors++;
+ } else {
+ /* packet successfully transmitted */
+ sis_priv->stats.collisions += (tx_status & COLCNT) >> 16;
+ /* sis_priv->stats.tx_bytes += tx_status & DSIZE;*/
+ sis_priv->stats.tx_packets++;
+ }
+ /* Free the original skb. */
+ dev_free_skb(sis_priv->tx_skbuff[entry]);
+ sis_priv->tx_skbuff[entry] = NULL;
+ sis_priv->tx_ring[entry].bufptr = 0;
+ sis_priv->tx_ring[entry].cmdsts = 0;
+ }
+
+ if (sis_priv->tx_full && net_dev->tbusy &&
+ sis_priv->cur_tx - sis_priv->dirty_tx < NUM_TX_DESC - 4) {
+ /* The ring is no longer full, clear tbusy, tx_full and
+ schedule more transmission by marking NET_BH */
+ sis_priv->tx_full = 0;
+ clear_bit(0, (void *)&net_dev->tbusy);
+ mark_bh(NET_BH);
+ }
+}
+
+static int
+sis900_close(struct device *net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ int i;
+
+ net_dev->start = 0;
+ net_dev->tbusy = 1;
+
+ /* Disable interrupts by clearing the interrupt mask. */
+ outl(0x0000, ioaddr + imr);
+ outl(0x0000, ioaddr + ier);
+
+ /* Stop the chip's Tx and Rx Status Machine */
+ outl(RxDIS | TxDIS | inl(ioaddr + cr), ioaddr + cr);
+
+ del_timer(&sis_priv->timer);
+
+ free_irq(net_dev->irq, net_dev);
+
+ /* Free Tx and RX skbuff */
+ for (i = 0; i < NUM_RX_DESC; i++) {
+ if (sis_priv->rx_skbuff[i] != NULL)
+ dev_free_skb(sis_priv->rx_skbuff[i]);
+ sis_priv->rx_skbuff[i] = 0;
+ }
+ for (i = 0; i < NUM_TX_DESC; i++) {
+ if (sis_priv->tx_skbuff[i] != NULL)
+ dev_free_skb(sis_priv->tx_skbuff[i]);
+ sis_priv->tx_skbuff[i] = 0;
+ }
+
+ /* Green! Put the chip in low-power mode. */
+
+ MOD_DEC_USE_COUNT;
+
+ return 0;
+}
+
+static int mii_ioctl(struct device *net_dev, struct ifreq *rq, int cmd)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ u16 *data = (u16 *)&rq->ifr_data;
+
+ switch(cmd) {
+ case SIOCDEVPRIVATE: /* Get the address of the PHY in use. */
+ data[0] = sis_priv->mii->phy_addr;
+ /* Fall Through */
+ case SIOCDEVPRIVATE+1: /* Read the specified MII register. */
+ data[3] = mdio_read(net_dev, data[0] & 0x1f, data[1] & 0x1f);
+ return 0;
+ case SIOCDEVPRIVATE+2: /* Write the specified MII register */
+ if (!suser())
+ return -EPERM;
+ mdio_write(net_dev, data[0] & 0x1f, data[1] & 0x1f, data[2]);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static struct enet_statistics *
+sis900_get_stats(struct device *net_dev)
+{
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+
+ return &sis_priv->stats;
+}
+
+
+/* SiS 900 uses the most sigificant 7 bits to index a 128 bits multicast
+ * hash table, which makes this function a little bit different from other drivers
+ * SiS 900 B0 & 635 M/B uses the most significat 8 bits to index 256 bits
+ * multicast hash table.
+ */
+static u16 sis900_compute_hashtable_index(u8 *addr, u8 revision)
+{
+
+/* what is the correct value of the POLYNOMIAL ??
+ Donald Becker use 0x04C11DB7U
+ Joseph Zbiciak im14u2c@primenet.com gives me the
+ correct answer, thank you Joe !! */
+#define POLYNOMIAL 0x04C11DB7L
+ u32 crc = 0xffffffff, msb;
+ int i, j;
+ u32 byte;
+
+ for (i = 0; i < 6; i++) {
+ byte = *addr++;
+ for (j = 0; j < 8; j++) {
+ msb = crc >> 31;
+ crc <<= 1;
+ if (msb ^ (byte & 1)) {
+ crc ^= POLYNOMIAL;
+ }
+ byte >>= 1;
+ }
+ }
+
+ /* leave 8 or 7 most siginifant bits */
+ if((revision >= SIS635A_900_REV) || (revision == SIS900B_900_REV))
+ return ((int)(crc >> 24));
+ else
+ return ((int)(crc >> 25));
+}
+
+static void set_rx_mode(struct device *net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ struct sis900_private * sis_priv = (struct sis900_private *)net_dev->priv;
+ u16 mc_filter[16] = {0}; /* 256/128 bits multicast hash table */
+ int i, table_entries;
+ u32 rx_mode;
+ u8 revision;
+
+ /* 635 Hash Table entires = 256(2^16) */
+ pcibios_read_config_byte(sis_priv->pci_bus, sis_priv->pci_device_fn, PCI_CLASS_REVISION, &revision);
+ if((revision >= SIS635A_900_REV) || (revision == SIS900B_900_REV))
+ table_entries = 16;
+ else
+ table_entries = 8;
+
+ if (net_dev->flags & IFF_PROMISC) {
+ /* Accept any kinds of packets */
+ rx_mode = RFPromiscuous;
+ for (i = 0; i < table_entries; i++)
+ mc_filter[i] = 0xffff;
+ } else if ((net_dev->mc_count > multicast_filter_limit) ||
+ (net_dev->flags & IFF_ALLMULTI)) {
+ /* too many multicast addresses or accept all multicast packets */
+ rx_mode = RFAAB | RFAAM;
+ for (i = 0; i < table_entries; i++)
+ mc_filter[i] = 0xffff;
+ } else {
+ /* Accept Broadcast packets, destination addresses match our MAC address,
+ use Receive Filter to reject unwanted MCAST packets */
+ struct dev_mc_list *mclist;
+ rx_mode = RFAAB;
+ for (i = 0, mclist = net_dev->mc_list; mclist && i < net_dev->mc_count;
+ i++, mclist = mclist->next)
+ set_bit(sis900_compute_hashtable_index(mclist->dmi_addr, revision),
+ mc_filter);
+ }
+
+ /* update Multicast Hash Table in Receive Filter */
+ for (i = 0; i < table_entries; i++) {
+ /* why plus 0x04 ??, That makes the correct value for hash table. */
+ outl((u32)(0x00000004+i) << RFADDR_shift, ioaddr + rfcr);
+ outl(mc_filter[i], ioaddr + rfdr);
+ }
+
+ outl(RFEN | rx_mode, ioaddr + rfcr);
+
+ /* sis900 is capatable of looping back packet at MAC level for debugging purpose */
+ if (net_dev->flags & IFF_LOOPBACK) {
+ u32 cr_saved;
+ /* We must disable Tx/Rx before setting loopback mode */
+ cr_saved = inl(ioaddr + cr);
+ outl(cr_saved | TxDIS | RxDIS, ioaddr + cr);
+ /* enable loopback */
+ outl(inl(ioaddr + txcfg) | TxMLB, ioaddr + txcfg);
+ outl(inl(ioaddr + rxcfg) | RxATX, ioaddr + rxcfg);
+ /* restore cr */
+ outl(cr_saved, ioaddr + cr);
+ }
+
+ return;
+}
+
+static void sis900_reset(struct device *net_dev)
+{
+ long ioaddr = net_dev->base_addr;
+ struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv;
+ int i = 0;
+ u8 revision;
+ u32 status = TxRCMP | RxRCMP;
+
+ outl(0, ioaddr + ier);
+ outl(0, ioaddr + imr);
+ outl(0, ioaddr + rfcr);
+
+ outl(RxRESET | TxRESET | RESET | inl(ioaddr + cr), ioaddr + cr);
+
+ /* Check that the chip has finished the reset. */
+ while (status && (i++ < 1000)) {
+ status ^= (inl(isr + ioaddr) & status);
+ }
+
+ pcibios_read_config_byte(sis_priv->pci_bus, sis_priv->pci_device_fn, PCI_CLASS_REVISION, &revision);
+ if( (revision >= SIS635A_900_REV) || (revision == SIS900B_900_REV) )
+ outl(PESEL | RND_CNT, ioaddr + cfg);
+ else
+ outl(PESEL, ioaddr + cfg);
+}
+
+#ifdef MODULE
+int init_module(void)
+{
+ return sis900_probe(NULL);
+}
+
+void
+cleanup_module(void)
+{
+ /* No need to check MOD_IN_USE, as sys_delete_module() checks. */
+ while (root_sis900_dev) {
+ struct sis900_private *sis_priv =
+ (struct sis900_private *)root_sis900_dev->priv;
+ struct device *next_dev = sis_priv->next_module;
+ struct mii_phy *phy = NULL;
+
+ while(sis_priv->first_mii){
+ phy = sis_priv->first_mii;
+ sis_priv->first_mii = phy->next;
+ kfree(phy);
+ }
+
+ unregister_netdev(root_sis900_dev);
+ release_region(root_sis900_dev->base_addr,
+ sis_priv->mac->io_size);
+ kfree(sis_priv);
+ kfree(root_sis900_dev);
+
+ root_sis900_dev = next_dev;
+ }
+}
+
+#endif /* MODULE */
diff --git a/linux/src/drivers/net/sis900.h b/linux/src/drivers/net/sis900.h
new file mode 100644
index 00000000..21536257
--- /dev/null
+++ b/linux/src/drivers/net/sis900.h
@@ -0,0 +1,284 @@
+/* sis900.h Definitions for SiS ethernet controllers including 7014/7016 and 900
+ * Copyright 1999 Silicon Integrated System Corporation
+ * References:
+ * SiS 7016 Fast Ethernet PCI Bus 10/100 Mbps LAN Controller with OnNow Support,
+ * preliminary Rev. 1.0 Jan. 14, 1998
+ * SiS 900 Fast Ethernet PCI Bus 10/100 Mbps LAN Single Chip with OnNow Support,
+ * preliminary Rev. 1.0 Nov. 10, 1998
+ * SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution,
+ * preliminary Rev. 1.0 Jan. 18, 1998
+ * http://www.sis.com.tw/support/databook.htm
+ */
+
+/* MAC operationl registers of SiS 7016 and SiS 900 ehternet controller */
+/* The I/O extent, SiS 900 needs 256 bytes of io address */
+#define SIS900_TOTAL_SIZE 0x100
+
+/* Symbolic offsets to registers. */
+enum sis900_registers {
+ cr=0x0, //Command Register
+ cfg=0x4, //Configuration Register
+ mear=0x8, //EEPROM Access Register
+ ptscr=0xc, //PCI Test Control Register
+ isr=0x10, //Interrupt Status Register
+ imr=0x14, //Interrupt Mask Register
+ ier=0x18, //Interrupt Enable Register
+ epar=0x18, //Enhanced PHY Access Register
+ txdp=0x20, //Transmit Descriptor Pointer Register
+ txcfg=0x24, //Transmit Configuration Register
+ rxdp=0x30, //Receive Descriptor Pointer Register
+ rxcfg=0x34, //Receive Configuration Register
+ flctrl=0x38, //Flow Control Register
+ rxlen=0x3c, //Receive Packet Length Register
+ rfcr=0x48, //Receive Filter Control Register
+ rfdr=0x4C, //Receive Filter Data Register
+ pmctrl=0xB0, //Power Management Control Register
+ pmer=0xB4 //Power Management Wake-up Event Register
+};
+
+/* Symbolic names for bits in various registers */
+enum sis900_command_register_bits {
+ RELOAD = 0x00000400, ACCESSMODE = 0x00000200,/* ET */
+ RESET = 0x00000100, SWI = 0x00000080, RxRESET = 0x00000020,
+ TxRESET = 0x00000010, RxDIS = 0x00000008, RxENA = 0x00000004,
+ TxDIS = 0x00000002, TxENA = 0x00000001
+};
+
+enum sis900_configuration_register_bits {
+ DESCRFMT = 0x00000100 /* 7016 specific */, REQALG = 0x00000080,
+ SB = 0x00000040, POW = 0x00000020, EXD = 0x00000010,
+ PESEL = 0x00000008, LPM = 0x00000004, BEM = 0x00000001,
+ /* 635 & 900B Specific */
+ RND_CNT = 0x00000400, FAIR_BACKOFF = 0x00000200,
+ EDB_MASTER_EN = 0x00002000
+};
+
+enum sis900_eeprom_access_reigster_bits {
+ MDC = 0x00000040, MDDIR = 0x00000020, MDIO = 0x00000010, /* 7016 specific */
+ EECS = 0x00000008, EECLK = 0x00000004, EEDO = 0x00000002,
+ EEDI = 0x00000001
+};
+
+enum sis900_interrupt_register_bits {
+ WKEVT = 0x10000000, TxPAUSEEND = 0x08000000, TxPAUSE = 0x04000000,
+ TxRCMP = 0x02000000, RxRCMP = 0x01000000, DPERR = 0x00800000,
+ SSERR = 0x00400000, RMABT = 0x00200000, RTABT = 0x00100000,
+ RxSOVR = 0x00010000, HIBERR = 0x00008000, SWINT = 0x00001000,
+ MIBINT = 0x00000800, TxURN = 0x00000400, TxIDLE = 0x00000200,
+ TxERR = 0x00000100, TxDESC = 0x00000080, TxOK = 0x00000040,
+ RxORN = 0x00000020, RxIDLE = 0x00000010, RxEARLY = 0x00000008,
+ RxERR = 0x00000004, RxDESC = 0x00000002, RxOK = 0x00000001
+};
+
+enum sis900_interrupt_enable_reigster_bits {
+ IE = 0x00000001
+};
+
+/* maximum dma burst fro transmission and receive*/
+#define MAX_DMA_RANGE 7 /* actually 0 means MAXIMUM !! */
+#define TxMXDMA_shift 20
+#define RxMXDMA_shift 20
+
+enum sis900_tx_rx_dma{
+ DMA_BURST_512 = 0, DMA_BURST_64 = 5
+};
+
+/* transmit FIFO threshholds */
+#define TX_FILL_THRESH 16 /* 1/4 FIFO size */
+#define TxFILLT_shift 8
+#define TxDRNT_shift 0
+#define TxDRNT_100 48 /* 3/4 FIFO size */
+#define TxDRNT_10 16 /* 1/2 FIFO size */
+
+enum sis900_transmit_config_register_bits {
+ TxCSI = 0x80000000, TxHBI = 0x40000000, TxMLB = 0x20000000,
+ TxATP = 0x10000000, TxIFG = 0x0C000000, TxFILLT = 0x00003F00,
+ TxDRNT = 0x0000003F
+};
+
+/* recevie FIFO thresholds */
+#define RxDRNT_shift 1
+#define RxDRNT_100 16 /* 1/2 FIFO size */
+#define RxDRNT_10 24 /* 3/4 FIFO size */
+
+enum sis900_reveive_config_register_bits {
+ RxAEP = 0x80000000, RxARP = 0x40000000, RxATX = 0x10000000,
+ RxAJAB = 0x08000000, RxDRNT = 0x0000007F
+};
+
+#define RFAA_shift 28
+#define RFADDR_shift 16
+
+enum sis900_receive_filter_control_register_bits {
+ RFEN = 0x80000000, RFAAB = 0x40000000, RFAAM = 0x20000000,
+ RFAAP = 0x10000000, RFPromiscuous = (RFAAB|RFAAM|RFAAP)
+};
+
+enum sis900_reveive_filter_data_mask {
+ RFDAT = 0x0000FFFF
+};
+
+/* EEPROM Addresses */
+enum sis900_eeprom_address {
+ EEPROMSignature = 0x00, EEPROMVendorID = 0x02, EEPROMDeviceID = 0x03,
+ EEPROMMACAddr = 0x08, EEPROMChecksum = 0x0b
+};
+
+/* The EEPROM commands include the alway-set leading bit. Refer to NM93Cxx datasheet */
+enum sis900_eeprom_command {
+ EEread = 0x0180, EEwrite = 0x0140, EEerase = 0x01C0,
+ EEwriteEnable = 0x0130, EEwriteDisable = 0x0100,
+ EEeraseAll = 0x0120, EEwriteAll = 0x0110,
+ EEaddrMask = 0x013F, EEcmdShift = 16
+};
+
+/* For SiS962, request the eeprom software access */
+enum sis962_eeprom_command {
+ EEREQ = 0x00000400, EEDONE = 0x00000200, EEGNT = 0x00000100
+};
+
+/* Manamgement Data I/O (mdio) frame */
+#define MIIread 0x6000
+#define MIIwrite 0x5002
+#define MIIpmdShift 7
+#define MIIregShift 2
+#define MIIcmdLen 16
+#define MIIcmdShift 16
+
+/* Buffer Descriptor Status*/
+enum sis900_buffer_status {
+ OWN = 0x80000000, MORE = 0x40000000, INTR = 0x20000000,
+ SUPCRC = 0x10000000, INCCRC = 0x10000000,
+ OK = 0x08000000, DSIZE = 0x00000FFF
+};
+/* Status for TX Buffers */
+enum sis900_tx_buffer_status {
+ ABORT = 0x04000000, UNDERRUN = 0x02000000, NOCARRIER = 0x01000000,
+ DEFERD = 0x00800000, EXCDEFER = 0x00400000, OWCOLL = 0x00200000,
+ EXCCOLL = 0x00100000, COLCNT = 0x000F0000
+};
+
+enum sis900_rx_bufer_status {
+ OVERRUN = 0x02000000, DEST = 0x00800000, BCAST = 0x01800000,
+ MCAST = 0x01000000, UNIMATCH = 0x00800000, TOOLONG = 0x00400000,
+ RUNT = 0x00200000, RXISERR = 0x00100000, CRCERR = 0x00080000,
+ FAERR = 0x00040000, LOOPBK = 0x00020000, RXCOL = 0x00010000
+};
+
+/* MII register offsets */
+enum mii_registers {
+ MII_CONTROL = 0x0000, MII_STATUS = 0x0001, MII_PHY_ID0 = 0x0002,
+ MII_PHY_ID1 = 0x0003, MII_ANADV = 0x0004, MII_ANLPAR = 0x0005,
+ MII_ANEXT = 0x0006
+};
+
+/* mii registers specific to SiS 900 */
+enum sis_mii_registers {
+ MII_CONFIG1 = 0x0010, MII_CONFIG2 = 0x0011, MII_STSOUT = 0x0012,
+ MII_MASK = 0x0013, MII_RESV = 0x0014
+};
+
+/* mii registers specific to ICS 1893 */
+enum ics_mii_registers {
+ MII_EXTCTRL = 0x0010, MII_QPDSTS = 0x0011, MII_10BTOP = 0x0012,
+ MII_EXTCTRL2 = 0x0013
+};
+
+/* mii registers specific to AMD 79C901 */
+enum amd_mii_registers {
+ MII_STATUS_SUMMARY = 0x0018
+};
+
+/* MII Control register bit definitions. */
+enum mii_control_register_bits {
+ MII_CNTL_FDX = 0x0100, MII_CNTL_RST_AUTO = 0x0200,
+ MII_CNTL_ISOLATE = 0x0400, MII_CNTL_PWRDWN = 0x0800,
+ MII_CNTL_AUTO = 0x1000, MII_CNTL_SPEED = 0x2000,
+ MII_CNTL_LPBK = 0x4000, MII_CNTL_RESET = 0x8000
+};
+
+/* MII Status register bit */
+enum mii_status_register_bits {
+ MII_STAT_EXT = 0x0001, MII_STAT_JAB = 0x0002,
+ MII_STAT_LINK = 0x0004, MII_STAT_CAN_AUTO = 0x0008,
+ MII_STAT_FAULT = 0x0010, MII_STAT_AUTO_DONE = 0x0020,
+ MII_STAT_CAN_T = 0x0800, MII_STAT_CAN_T_FDX = 0x1000,
+ MII_STAT_CAN_TX = 0x2000, MII_STAT_CAN_TX_FDX = 0x4000,
+ MII_STAT_CAN_T4 = 0x8000
+};
+
+#define MII_ID1_OUI_LO 0xFC00 /* low bits of OUI mask */
+#define MII_ID1_MODEL 0x03F0 /* model number */
+#define MII_ID1_REV 0x000F /* model number */
+
+/* MII NWAY Register Bits ...
+ valid for the ANAR (Auto-Negotiation Advertisement) and
+ ANLPAR (Auto-Negotiation Link Partner) registers */
+enum mii_nway_register_bits {
+ MII_NWAY_NODE_SEL = 0x001f, MII_NWAY_CSMA_CD = 0x0001,
+ MII_NWAY_T = 0x0020, MII_NWAY_T_FDX = 0x0040,
+ MII_NWAY_TX = 0x0080, MII_NWAY_TX_FDX = 0x0100,
+ MII_NWAY_T4 = 0x0200, MII_NWAY_PAUSE = 0x0400,
+ MII_NWAY_RF = 0x2000, MII_NWAY_ACK = 0x4000,
+ MII_NWAY_NP = 0x8000
+};
+
+enum mii_stsout_register_bits {
+ MII_STSOUT_LINK_FAIL = 0x4000,
+ MII_STSOUT_SPD = 0x0080, MII_STSOUT_DPLX = 0x0040
+};
+
+enum mii_stsics_register_bits {
+ MII_STSICS_SPD = 0x8000, MII_STSICS_DPLX = 0x4000,
+ MII_STSICS_LINKSTS = 0x0001
+};
+
+enum mii_stssum_register_bits {
+ MII_STSSUM_LINK = 0x0008, MII_STSSUM_DPLX = 0x0004,
+ MII_STSSUM_AUTO = 0x0002, MII_STSSUM_SPD = 0x0001
+};
+
+enum sis900_revision_id {
+ SIS630A_900_REV = 0x80, SIS630E_900_REV = 0x81,
+ SIS630S_900_REV = 0x82, SIS630EA1_900_REV = 0x83,
+ SIS630ET_900_REV = 0x84, SIS635A_900_REV = 0x90,
+ SIS962_900_REV = 0X91, SIS900B_900_REV = 0x03
+};
+
+enum sis630_revision_id {
+ SIS630A0 = 0x00, SIS630A1 = 0x01,
+ SIS630B0 = 0x10, SIS630B1 = 0x11
+};
+
+#define FDX_CAPABLE_DUPLEX_UNKNOWN 0
+#define FDX_CAPABLE_HALF_SELECTED 1
+#define FDX_CAPABLE_FULL_SELECTED 2
+
+#define HW_SPEED_UNCONFIG 0
+#define HW_SPEED_HOME 1
+#define HW_SPEED_10_MBPS 10
+#define HW_SPEED_100_MBPS 100
+#define HW_SPEED_DEFAULT (HW_SPEED_100_MBPS)
+
+#define CRC_SIZE 4
+#define MAC_HEADER_SIZE 14
+
+#define TX_BUF_SIZE 1536
+#define RX_BUF_SIZE 1536
+
+#define NUM_TX_DESC 16 /* Number of Tx descriptor registers. */
+#define NUM_RX_DESC 16 /* Number of Rx descriptor registers. */
+
+#define TRUE 1
+#define FALSE 0
+
+/* PCI stuff, should be move to pic.h */
+#define PCI_DEVICE_ID_SI_900 0x900
+#define PCI_DEVICE_ID_SI_7016 0x7016
+#define SIS630_VENDOR_ID 0x1039
+#define SIS630_DEVICE_ID 0x0630
+
+/* ioctl for accessing MII transveiver */
+#define SIOCGMIIPHY (SIOCDEVPRIVATE) /* Get the PHY in use. */
+#define SIOCGMIIREG (SIOCDEVPRIVATE+1) /* Read a PHY register. */
+#define SIOCSMIIREG (SIOCDEVPRIVATE+2) /* Write a PHY register */
diff --git a/linux/src/drivers/net/sundance.c b/linux/src/drivers/net/sundance.c
index cb8dfe52..47f32ebd 100644
--- a/linux/src/drivers/net/sundance.c
+++ b/linux/src/drivers/net/sundance.c
@@ -986,7 +986,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
dev->trans_start = jiffies;
if (np->msg_level & NETIF_MSG_TX_QUEUED) {
- printk(KERN_DEBUG "%s: Transmit frame #%d len %d queued in slot %d.\n",
+ printk(KERN_DEBUG "%s: Transmit frame #%d len %ld queued in slot %ld.\n",
dev->name, np->cur_tx, skb->len, entry);
}
return 0;
diff --git a/linux/src/drivers/pci/pci.c b/linux/src/drivers/pci/pci.c
index bb79cc47..cf7dd807 100644
--- a/linux/src/drivers/pci/pci.c
+++ b/linux/src/drivers/pci/pci.c
@@ -1121,10 +1121,8 @@ static unsigned int scan_bus(struct pci_bus *bus, unsigned long *mem_startp)
pcibios_read_config_dword(bus->number, devfn, PCI_VENDOR_ID, &l);
/* some broken boards return 0 if a slot is empty: */
- if (l == 0xffffffff || l == 0x00000000) {
- is_multi = 0;
+ if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000)
continue;
- }
dev = pci_malloc(sizeof(*dev), mem_startp);
dev->bus = bus;
diff --git a/linux/src/drivers/scsi/aha1542.c b/linux/src/drivers/scsi/aha1542.c
index 0366fd31..68954d75 100644
--- a/linux/src/drivers/scsi/aha1542.c
+++ b/linux/src/drivers/scsi/aha1542.c
@@ -120,8 +120,8 @@ static int aha1542_restart(struct Scsi_Host * shost);
#define aha1542_intr_reset(base) outb(IRST, CONTROL(base))
#define WAIT(port, mask, allof, noneof) \
- { register WAITbits; \
- register WAITtimeout = WAITnexttimeout; \
+ { register int WAITbits; \
+ register int WAITtimeout = WAITnexttimeout; \
while (1) { \
WAITbits = inb(port) & (mask); \
if ((WAITbits & (allof)) == (allof) && ((WAITbits & (noneof)) == 0)) \
@@ -133,8 +133,8 @@ static int aha1542_restart(struct Scsi_Host * shost);
/* Similar to WAIT, except we use the udelay call to regulate the
amount of time we wait. */
#define WAITd(port, mask, allof, noneof, timeout) \
- { register WAITbits; \
- register WAITtimeout = timeout; \
+ { register int WAITbits; \
+ register int WAITtimeout = timeout; \
while (1) { \
WAITbits = inb(port) & (mask); \
if ((WAITbits & (allof)) == (allof) && ((WAITbits & (noneof)) == 0)) \
diff --git a/linux/src/drivers/scsi/eata.c b/linux/src/drivers/scsi/eata.c
index 8d7ba083..ce859ced 100644
--- a/linux/src/drivers/scsi/eata.c
+++ b/linux/src/drivers/scsi/eata.c
@@ -881,6 +881,8 @@ __initfunc (static inline int port_detect \
sprintf(name, "%s%d", driver_name, j);
+ printk("probing eata on %lx\n", port_base);
+
if(check_region(port_base, REGION_SIZE)) {
printk("%s: address 0x%03lx in use, skipping probe.\n", name, port_base);
return FALSE;
diff --git a/linux/src/include/asm-i386/segment.h b/linux/src/include/asm-i386/segment.h
index 6052ad45..5f8af993 100644
--- a/linux/src/include/asm-i386/segment.h
+++ b/linux/src/include/asm-i386/segment.h
@@ -1,12 +1,21 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
+#ifdef MACH
+
+#include <machine/gdt.h>
+#include <machine/ldt.h>
+
+#else /* !MACH */
+
#define KERNEL_CS 0x10
#define KERNEL_DS 0x18
#define USER_CS 0x23
#define USER_DS 0x2B
+#endif /* !MACH */
+
#ifndef __ASSEMBLY__
/*
@@ -32,7 +41,7 @@ struct __segment_dummy { unsigned long a[100]; };
#define __sd(x) ((struct __segment_dummy *) (x))
#define __const_sd(x) ((const struct __segment_dummy *) (x))
-static inline void __put_user(unsigned long x, void * y, int size)
+static inline void __attribute__((always_inline)) __put_user(unsigned long x, void * y, int size)
{
switch (size) {
case 1:
@@ -55,7 +64,7 @@ static inline void __put_user(unsigned long x, void * y, int size)
}
}
-static inline unsigned long __get_user(const void * y, int size)
+static inline unsigned long __attribute__((always_inline)) __get_user(const void * y, int size)
{
unsigned long result;
@@ -80,27 +89,28 @@ static inline unsigned long __get_user(const void * y, int size)
}
}
+#if defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95)
static inline void __generic_memcpy_tofs(void * to, const void * from, unsigned long n)
{
__asm__ volatile
- (" cld
- push %%es
- push %%fs
- cmpl $3,%0
- pop %%es
- jbe 1f
- movl %%edi,%%ecx
- negl %%ecx
- andl $3,%%ecx
- subl %%ecx,%0
- rep; movsb
- movl %0,%%ecx
- shrl $2,%%ecx
- rep; movsl
- andl $3,%0
- 1: movl %0,%%ecx
- rep; movsb
- pop %%es"
+ ("cld\n"
+ "push %%es\n"
+ "push %%fs\n"
+ "cmpl $3,%0\n"
+ "pop %%es\n"
+ "jbe 1f\n"
+ "movl %%edi,%%ecx\n"
+ "negl %%ecx\n"
+ "andl $3,%%ecx\n"
+ "subl %%ecx,%0\n"
+ "rep; movsb\n"
+ "movl %0,%%ecx\n"
+ "shrl $2,%%ecx\n"
+ "rep; movsl\n"
+ "andl $3,%0\n"
+ "1: movl %0,%%ecx\n"
+ "rep; movsb\n"
+ "pop %%es\n"
:"=abd" (n)
:"0" (n),"D" ((long) to),"S" ((long) from)
:"cx","di","si");
@@ -171,24 +181,24 @@ __asm__("cld\n\t" \
static inline void __generic_memcpy_fromfs(void * to, const void * from, unsigned long n)
{
- __asm__ volatile
- (" cld
- cmpl $3,%0
- jbe 1f
- movl %%edi,%%ecx
- negl %%ecx
- andl $3,%%ecx
- subl %%ecx,%0
- fs; rep; movsb
- movl %0,%%ecx
- shrl $2,%%ecx
- fs; rep; movsl
- andl $3,%0
- 1: movl %0,%%ecx
- fs; rep; movsb"
- :"=abd" (n)
- :"0" (n),"D" ((long) to),"S" ((long) from)
- :"cx","di","si", "memory");
+ __asm__ volatile
+ ("cld\n"
+ "cmpl $3,%0\n"
+ "jbe 1f\n"
+ "movl %%edi,%%ecx\n"
+ "negl %%ecx\n"
+ "andl $3,%%ecx\n"
+ "subl %%ecx,%0\n"
+ "fs; rep; movsb\n"
+ "movl %0,%%ecx\n"
+ "shrl $2,%%ecx\n"
+ "fs; rep; movsl\n"
+ "andl $3,%0\n"
+ "1:movl %0,%%ecx\n"
+ "fs; rep; movsb\n"
+ :"=abd" (n)
+ :"0" (n),"D" ((long) to),"S" ((long) from)
+ :"cx","di","si", "memory");
}
static inline void __constant_memcpy_fromfs(void * to, const void * from, unsigned long n)
@@ -260,6 +270,29 @@ __asm__("cld\n\t" \
__constant_memcpy_tofs((to),(from),(n)) : \
__generic_memcpy_tofs((to),(from),(n)))
+
+#else /* code for gcc-2.95.x and newer follows */
+
+static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
+{
+ char *d = (char *)to;
+ const char *s = (const char *)from;
+ while (n-- > 0) {
+ *d++ = __get_user(s++, 1);
+ }
+}
+
+static inline void memcpy_tofs(void * to, const void * from, unsigned long n)
+{
+ char *d = (char *)to;
+ const char *s = (const char *)from;
+ while (n-- > 0) {
+ __put_user(*s++, d++, 1);
+ }
+}
+
+#endif /* not gcc-2.95 */
+
/*
* These are deprecated..
*
diff --git a/linux/src/include/linux/tqueue.h b/linux/src/include/linux/tqueue.h
index 8bd4e7f3..d38e1df5 100644
--- a/linux/src/include/linux/tqueue.h
+++ b/linux/src/include/linux/tqueue.h
@@ -79,7 +79,7 @@ extern task_queue tq_timer, tq_immediate, tq_scheduler, tq_disk;
* "bh_list". You may call this function only from an interrupt
* handler or a bottom half handler.
*/
-extern __inline__ void queue_task_irq(struct tq_struct *bh_pointer,
+static __inline__ void queue_task_irq(struct tq_struct *bh_pointer,
task_queue *bh_list)
{
if (!set_bit(0,&bh_pointer->sync)) {
@@ -92,7 +92,7 @@ extern __inline__ void queue_task_irq(struct tq_struct *bh_pointer,
* queue_task_irq_off: put the bottom half handler "bh_pointer" on the list
* "bh_list". You may call this function only when interrupts are off.
*/
-extern __inline__ void queue_task_irq_off(struct tq_struct *bh_pointer,
+static __inline__ void queue_task_irq_off(struct tq_struct *bh_pointer,
task_queue *bh_list)
{
if (!(bh_pointer->sync & 1)) {
@@ -106,7 +106,7 @@ extern __inline__ void queue_task_irq_off(struct tq_struct *bh_pointer,
/*
* queue_task: as queue_task_irq, but can be called from anywhere.
*/
-extern __inline__ void queue_task(struct tq_struct *bh_pointer,
+static __inline__ void queue_task(struct tq_struct *bh_pointer,
task_queue *bh_list)
{
if (!set_bit(0,&bh_pointer->sync)) {
@@ -122,7 +122,7 @@ extern __inline__ void queue_task(struct tq_struct *bh_pointer,
/*
* Call all "bottom halfs" on a given list.
*/
-extern __inline__ void run_task_queue(task_queue *list)
+static __inline__ void run_task_queue(task_queue *list)
{
struct tq_struct *p;
diff --git a/util/atoi.c b/util/atoi.c
new file mode 100644
index 00000000..64816b9d
--- /dev/null
+++ b/util/atoi.c
@@ -0,0 +1,108 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Olivetti Mach Console driver v0.0
+ * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989
+ * All rights reserved.
+ *
+ */
+/*
+ Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc.,
+Cupertino, California.
+
+ All Rights Reserved
+
+ Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies and that both the copyright notice and this permission notice
+appear in supporting documentation, and that the name of Olivetti
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+
+ OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
+NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+/*
+ Copyright 1988, 1989 by Intel Corporation, Santa Clara, California.
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies and that both the copyright notice and this permission notice
+appear in supporting documentation, and that the name of Intel
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+
+INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
+NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <util/atoi.h>
+
+/*
+ * mach_atoi:
+ *
+ * This function converts an ascii string into an integer, and returns
+ * MACH_ATOI_DEFAULT if no integer was found. Note that this is why we
+ * don't use the regular atio(), because ZERO is ZERO and not the
+ * MACH_ATOI_DEFAULT in all cases.
+ *
+ * input : string
+ * output : a number or possibly MACH_ATOI_DEFAULT, and the count of
+ * characters consumed by the conversion
+ *
+ */
+int
+mach_atoi(cp, nump)
+u_char *cp;
+int *nump;
+{
+ int number;
+ u_char *original;
+
+ original = cp;
+ for (number = 0; ('0' <= *cp) && (*cp <= '9'); cp++)
+ number = (number * 10) + (*cp - '0');
+ if (original == cp)
+ *nump = MACH_ATOI_DEFAULT;
+ else
+ *nump = number;
+ return(cp - original);
+}
diff --git a/util/atoi.h b/util/atoi.h
new file mode 100644
index 00000000..921b1e81
--- /dev/null
+++ b/util/atoi.h
@@ -0,0 +1,67 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/* **********************************************************************
+ File: atoi.h
+ Description: definitions for mach_atoi
+ Authors: Eugene Kuerner, Adrienne Jardetzky, Mike Kupfer
+
+ $ Header: $
+
+ Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989.
+ All rights reserved.
+********************************************************************** */
+/*
+ Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc.,
+Cupertino, California.
+
+ All Rights Reserved
+
+ Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appears in all
+copies and that both the copyright notice and this permission notice
+appear in supporting documentation, and that the name of Olivetti
+not be used in advertising or publicity pertaining to distribution
+of the software without specific, written prior permission.
+
+ OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
+NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION
+WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#ifndef _UTIL_ATOI_H_
+#define _UTIL_ATOI_H_
+
+#include <sys/types.h>
+
+#define MACH_ATOI_DEFAULT -1
+extern int mach_atoi (u_char *, int *);
+
+#endif /* _UTIL_ATOI_H_ */
diff --git a/util/putchar.c b/util/putchar.c
index e349d208..6f8e18ea 100644
--- a/util/putchar.c
+++ b/util/putchar.c
@@ -21,6 +21,8 @@
* Author: Bryan Ford, University of Utah CSL
*/
+#include <device/cons.h>
+
int putchar(int c)
{
cnputc(c);
diff --git a/util/putchar.h b/util/putchar.h
new file mode 100644
index 00000000..2e65bd56
--- /dev/null
+++ b/util/putchar.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Simple putchar implementation header.
+ *
+ */
+
+#ifndef _PUTCHAR_H_
+#define _PUTCHAR_H_
+
+#include <mach/std_types.h>
+
+extern int putchar(int c);
+
+#endif /* _PUTCHAR_H_ */
diff --git a/util/puts.c b/util/puts.c
index c0eec03e..0bd72e98 100644
--- a/util/puts.c
+++ b/util/puts.c
@@ -21,6 +21,9 @@
* Author: Bryan Ford, University of Utah CSL
*/
+#include <device/cons.h>
+#include <util/putchar.h>
+
/* Simple puts() implementation that just uses putchar().
Note that our libc's printf() is implemented
in terms of only puts() and putchar(), so that's all we need. */
diff --git a/vm/memory_object.c b/vm/memory_object.c
index 6bef2c9a..57dde76b 100644
--- a/vm/memory_object.c
+++ b/vm/memory_object.c
@@ -41,6 +41,7 @@
#include <mach/mach_types.h>
#include <mach/kern_return.h>
+#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <mach/memory_object.h>
#include <mach/boolean.h>
@@ -81,23 +82,6 @@ decl_simple_lock_data(,memory_manager_default_lock)
* argument conversion. Explicit deallocation is necessary.
*/
-/*
- * If successful, destroys the map copy object.
- */
-kern_return_t memory_object_data_provided(object, offset, data, data_cnt,
- lock_value)
- vm_object_t object;
- vm_offset_t offset;
- pointer_t data;
- unsigned int data_cnt;
- vm_prot_t lock_value;
-{
- return memory_object_data_supply(object, offset, (vm_map_copy_t) data,
- data_cnt, lock_value, FALSE, IP_NULL,
- 0);
-}
-
-
kern_return_t memory_object_data_supply(object, offset, data_copy, data_cnt,
lock_value, precious, reply_to, reply_to_type)
register
@@ -323,6 +307,24 @@ retry_lookup:
return(result);
}
+
+/*
+ * If successful, destroys the map copy object.
+ */
+kern_return_t memory_object_data_provided(object, offset, data, data_cnt,
+ lock_value)
+ vm_object_t object;
+ vm_offset_t offset;
+ pointer_t data;
+ unsigned int data_cnt;
+ vm_prot_t lock_value;
+{
+ return memory_object_data_supply(object, offset, (vm_map_copy_t) data,
+ data_cnt, lock_value, FALSE, IP_NULL,
+ 0);
+}
+
+
kern_return_t memory_object_data_error(object, offset, size, error_value)
vm_object_t object;
vm_offset_t offset;
diff --git a/vm/memory_object_proxy.c b/vm/memory_object_proxy.c
new file mode 100644
index 00000000..4d81a687
--- /dev/null
+++ b/vm/memory_object_proxy.c
@@ -0,0 +1,200 @@
+/* memory_object_proxy.c - Proxy memory objects for Mach.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+ Written by Marcus Brinkmann.
+
+ This file is part of GNU Mach.
+
+ GNU Mach is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GNU Mach is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+/* A proxy memory object is a kernel port that can be used like a real
+ memory object in a vm_map call, except that the current and maximum
+ protection are restricted to the proxy object's maximum protection
+ at the time the mapping is established. The kernel port will hold
+ a reference to the real memory object for the life time of the
+ proxy object.
+
+ Note that we don't need to do any reference counting on the proxy
+ object. Our caller will hold a reference to the proxy object when
+ looking it up, and is expected to acquire its own reference to the
+ real memory object if needed before releasing the reference to the
+ proxy object.
+
+ The user provided real memory object and the maximum protection are
+ not checked for validity. The maximum protection is only used as a
+ mask, and the memory object is validated at the time the mapping is
+ established. */
+
+#include <mach/port.h>
+#include <mach/kern_return.h>
+#include <mach/notify.h>
+#include <mach/vm_prot.h>
+#include <kern/zalloc.h>
+#include <kern/mach_param.h>
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+
+/* The zone which holds our proxy memory objects. */
+static zone_t memory_object_proxy_zone;
+
+struct memory_object_proxy
+{
+ struct ipc_port *port;
+
+ ipc_port_t object;
+ vm_prot_t max_protection;
+};
+typedef struct memory_object_proxy *memory_object_proxy_t;
+
+
+void
+memory_object_proxy_init (void)
+{
+ /* For limit, see PORT_MAX. */
+ memory_object_proxy_zone = zinit (sizeof (struct memory_object_proxy), 0,
+ (TASK_MAX * 3 + THREAD_MAX)
+ * sizeof (struct memory_object_proxy),
+ 256 * sizeof (struct memory_object_proxy),
+ ZONE_EXHAUSTIBLE,
+ "proxy memory object zone");
+}
+
+/* Lookup a proxy memory object by its port. */
+static memory_object_proxy_t
+memory_object_proxy_port_lookup (ipc_port_t port)
+{
+ memory_object_proxy_t proxy;
+
+ if (!IP_VALID(port))
+ return 0;
+
+ ip_lock (port);
+ if (ip_active (port) && (ip_kotype (port) == IKOT_PAGER_PROXY))
+ proxy = (memory_object_proxy_t) port->ip_kobject;
+ else
+ proxy = 0;
+ ip_unlock (port);
+ return proxy;
+}
+
+
+/* Process a no-sender notification for the proxy memory object
+ port. */
+boolean_t
+memory_object_proxy_notify (mach_msg_header_t *msg)
+{
+ if (msg->msgh_id == MACH_NOTIFY_NO_SENDERS)
+ {
+ memory_object_proxy_t proxy;
+ mach_no_senders_notification_t *ns;
+
+ ns = (mach_no_senders_notification_t *) msg;
+ proxy = memory_object_proxy_port_lookup
+ ((ipc_port_t) ns->not_header.msgh_remote_port);
+ assert (proxy);
+
+ ipc_port_release_send (proxy->object);
+ return TRUE;
+ }
+
+ printf ("memory_object_proxy_notify: strange notification %d\n",
+ msg->msgh_id);
+ return FALSE;
+}
+
+
+/* Create a new proxy memory object from [START;START+LEN) in the
+ given OBJECT at OFFSET in the new object with the maximum
+ protection MAX_PROTECTION and return it in *PORT. */
+kern_return_t
+memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection,
+ ipc_port_t *object, natural_t object_count,
+ vm_offset_t *offset, natural_t offset_count,
+ vm_offset_t *start, natural_t start_count,
+ vm_offset_t *len, natural_t len_count,
+ ipc_port_t *port)
+{
+ kern_return_t kr;
+ memory_object_proxy_t proxy;
+ ipc_port_t notify;
+
+ if (space == IS_NULL)
+ return KERN_INVALID_TASK;
+
+ if (offset_count != object_count || start_count != object_count
+ || len_count != object_count)
+ return KERN_INVALID_ARGUMENT;
+
+ /* FIXME: Support more than one memory object. */
+ if (object_count != 1)
+ return KERN_INVALID_ARGUMENT;
+
+ if (!IP_VALID(object[0]))
+ return KERN_INVALID_NAME;
+
+ /* FIXME: Support a different offset from 0. */
+ if (offset[0] != 0)
+ return KERN_INVALID_ARGUMENT;
+
+ /* FIXME: Support a different range from total. */
+ if (start[0] != 0 || len[0] != (vm_offset_t) ~0)
+ return KERN_INVALID_ARGUMENT;
+
+ proxy = (memory_object_proxy_t) zalloc (memory_object_proxy_zone);
+
+ /* Allocate port, keeping a reference for it. */
+ proxy->port = ipc_port_alloc_kernel ();
+ if (proxy->port == IP_NULL)
+ {
+ zfree (memory_object_proxy_zone, (vm_offset_t) proxy);
+ return KERN_RESOURCE_SHORTAGE;
+ }
+ /* Associate the port with the proxy memory object. */
+ ipc_kobject_set (proxy->port, (ipc_kobject_t) proxy, IKOT_PAGER_PROXY);
+
+ /* Request no-senders notifications on the port. */
+ notify = ipc_port_make_sonce (proxy->port);
+ ip_lock (proxy->port);
+ ipc_port_nsrequest (proxy->port, 1, notify, &notify);
+ assert (notify == IP_NULL);
+
+ proxy->object = ipc_port_copy_send (object[0]);
+ proxy->max_protection = max_protection;
+
+ *port = ipc_port_make_send (proxy->port);
+ return KERN_SUCCESS;
+}
+
+
+/* Lookup the real memory object and maximum protection for the proxy
+ memory object port PORT, for which the caller holds a reference.
+ *OBJECT is only guaranteed to be valid as long as the caller holds
+ the reference to PORT (unless the caller acquires its own reference
+ to it). If PORT is not a proxy memory object, return
+ KERN_INVALID_ARGUMENT. */
+kern_return_t
+memory_object_proxy_lookup (ipc_port_t port, ipc_port_t *object,
+ vm_prot_t *max_protection)
+{
+ memory_object_proxy_t proxy;
+
+ proxy = memory_object_proxy_port_lookup (port);
+ if (!proxy)
+ return KERN_INVALID_ARGUMENT;
+
+ *object = proxy->object;
+ *max_protection = proxy->max_protection;
+
+ return KERN_SUCCESS;
+}
diff --git a/vm/pmap.h b/vm/pmap.h
index 04ab4e92..59fd03ab 100644
--- a/vm/pmap.h
+++ b/vm/pmap.h
@@ -209,6 +209,37 @@ extern kern_return_t pmap_attribute();
#endif /* pmap_attribute */
/*
+ * Grab a physical page:
+ * the standard memory allocation mechanism
+ * during system initialization.
+ */
+extern vm_offset_t pmap_grab_page (void);
+
+extern boolean_t pmap_valid_page(vm_offset_t x);
+
+/*
+ * Make the specified pages (by pmap, offset)
+ * pageable (or not) as requested.
+ */
+extern void pmap_pageable(
+ pmap_t pmap,
+ vm_offset_t start,
+ vm_offset_t end,
+ boolean_t pageable);
+
+/*
+ * Back-door routine for mapping kernel VM at initialization.
+ * Useful for mapping memory outside the range
+ * [phys_first_addr, phys_last_addr) (i.e., devices).
+ * Otherwise like pmap_map.
+ */
+extern vm_offset_t pmap_map_bd(
+ vm_offset_t virt,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_prot_t prot);
+
+/*
* Routines defined as macros.
*/
#ifndef PMAP_ACTIVATE_USER
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index 540d74e7..cce043a1 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -660,7 +660,7 @@ vm_fault_return_t vm_fault_page(first_object, first_offset,
m->offset + object->paging_offset,
PAGE_SIZE, access_required)) != KERN_SUCCESS) {
if (rc != MACH_SEND_INTERRUPTED)
- printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, %d\n",
+ printf("%s(0x%p, 0x%p, 0x%x, 0x%x, 0x%x) failed, %x\n",
"memory_object_data_request",
object->pager,
object->pager_request,
diff --git a/vm/vm_init.c b/vm/vm_init.c
index b76b11b6..f6a40602 100644
--- a/vm/vm_init.c
+++ b/vm/vm_init.c
@@ -37,6 +37,7 @@
#include <mach/machine/vm_types.h>
#include <kern/zalloc.h>
#include <kern/kalloc.h>
+#include <vm/vm_fault.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
@@ -81,4 +82,5 @@ void vm_mem_bootstrap()
void vm_mem_init()
{
vm_object_init();
+ memory_object_proxy_init();
}
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index 2864fa1f..dc7f7e4b 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -37,11 +37,13 @@
#include <string.h>
#include <mach/kern_return.h>
+#include <machine/locore.h>
#include <machine/vm_param.h>
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/lock.h>
#include <kern/thread.h>
+#include <vm/pmap.h>
#include <vm/vm_fault.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
@@ -267,9 +269,9 @@ projected_buffer_deallocate(map, start, end)
/*Prepare for deallocation*/
if (entry->vme_start < start)
- _vm_map_clip_start(map, entry, start);
+ _vm_map_clip_start(&map->hdr, entry, start);
if (entry->vme_end > end)
- _vm_map_clip_end(map, entry, end);
+ _vm_map_clip_end(&map->hdr, entry, end);
if (map->first_free == entry) /*Adjust first_free hint*/
map->first_free = entry->vme_prev;
entry->projected_on = 0; /*Needed to allow deletion*/
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
index 228c26d9..ca93d7a4 100644
--- a/vm/vm_kern.h
+++ b/vm/vm_kern.h
@@ -77,4 +77,9 @@ extern vm_map_t kernel_map;
extern vm_map_t kernel_pageable_map;
extern vm_map_t ipc_kernel_map;
+extern boolean_t projected_buffer_in_range(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end);
+
#endif /* _VM_VM_KERN_H_ */
diff --git a/vm/vm_map.c b/vm/vm_map.c
index 54c74701..dc2388d7 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -42,13 +42,28 @@
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/zalloc.h>
+#include <vm/pmap.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
+#include <vm/vm_resident.h>
#include <vm/vm_kern.h>
#include <ipc/ipc_port.h>
+/* Forward declarations */
+kern_return_t vm_map_delete(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end);
+
+kern_return_t vm_map_copyout_page_list(
+ vm_map_t dst_map,
+ vm_offset_t *dst_addr, /* OUT */
+ vm_map_copy_t copy);
+
+void vm_map_copy_page_discard (vm_map_copy_t copy);
+
/*
* Macros to copy a vm_map_entry. We must be careful to correctly
* manage the wired page count. vm_map_entry_copy() creates a new
@@ -676,7 +691,7 @@ vm_map_pmap_enter(map, addr, end_addr, object, offset, protection)
if (vm_map_pmap_enter_print) {
printf("vm_map_pmap_enter:");
- printf("map: %x, addr: %x, object: %x, offset: %x\n",
+ printf("map: %p, addr: %x, object: %p, offset: %x\n",
map, addr, object, offset);
}
@@ -783,7 +798,7 @@ kern_return_t vm_map_enter(
*/
if (((start + mask) & ~mask) < start)
- return(KERN_NO_SPACE);
+ RETURN(KERN_NO_SPACE);
start = ((start + mask) & ~mask);
end = start + size;
@@ -2131,8 +2146,10 @@ start_pass_1:
* the copy cannot be interrupted.
*/
- if (interruptible && contains_permanent_objects)
+ if (interruptible && contains_permanent_objects) {
+ vm_map_unlock(dst_map);
return(KERN_FAILURE); /* XXX */
+ }
/*
* XXXO If there are no permanent objects in the destination,
@@ -4379,12 +4396,13 @@ kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version,
prot = entry->protection;
- if ((fault_type & (prot)) != fault_type)
+ if ((fault_type & (prot)) != fault_type) {
if ((prot & VM_PROT_NOTIFY) && (fault_type & VM_PROT_WRITE)) {
RETURN(KERN_WRITE_PROTECTION_FAILURE);
} else {
RETURN(KERN_PROTECTION_FAILURE);
}
+ }
/*
* If this page is not pageable, we have to get
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 25c00651..567fe933 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -226,7 +226,7 @@ typedef struct vm_map_version {
* (returned) and an abort flag (abort if TRUE).
*/
-#define VM_MAP_COPY_PAGE_LIST_MAX 8
+#define VM_MAP_COPY_PAGE_LIST_MAX 64
typedef struct vm_map_copy {
int type;
@@ -393,6 +393,9 @@ extern void vm_map_print(vm_map_t);
extern kern_return_t vm_map_lookup(vm_map_t *, vm_offset_t, vm_prot_t,
vm_map_version_t *, vm_object_t *,
vm_offset_t *, vm_prot_t *, boolean_t *);
+/* Find a map entry */
+extern boolean_t vm_map_lookup_entry(vm_map_t, vm_offset_t,
+ vm_map_entry_t *);
/* Verify that a previous lookup is still valid */
extern boolean_t vm_map_verify(vm_map_t, vm_map_version_t *);
/* vm_map_verify_done is now a macro -- see below */
@@ -411,6 +414,7 @@ extern kern_return_t vm_map_copy_overwrite(vm_map_t, vm_offset_t,
vm_map_copy_t, boolean_t);
/* Discard a copy without using it */
extern void vm_map_copy_discard(vm_map_copy_t);
+extern void vm_map_copy_page_discard(vm_map_copy_t);
extern vm_map_copy_t vm_map_copy_copy(vm_map_copy_t);
/* Page list continuation version of previous */
extern kern_return_t vm_map_copy_discard_cont(vm_map_copyin_args_t,
@@ -422,6 +426,9 @@ extern kern_return_t vm_map_machine_attribute(vm_map_t, vm_offset_t,
vm_machine_attribute_t,
vm_machine_attribute_val_t *);
+/* Delete entry from map */
+extern void vm_map_entry_delete(vm_map_t, vm_map_entry_t);
+
/*
* Functions implemented as macros
*/
@@ -460,6 +467,42 @@ extern kern_return_t vm_map_pageable_common(vm_map_t, vm_offset_t,
extern vm_object_t vm_submap_object;
/*
+ * vm_map_copyin_object:
+ *
+ * Create a copy object from an object.
+ * Our caller donates an object reference.
+ */
+extern kern_return_t vm_map_copyin_object(
+ vm_object_t object,
+ vm_offset_t offset, /* offset of region in object */
+ vm_size_t size, /* size of region in object */
+ vm_map_copy_t *copy_result); /* OUT */
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find using
+ * the vm_submap_object, and no other operations may have been
+ * performed on this range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copyin!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+extern kern_return_t vm_map_submap(
+ vm_map_t map,
+ vm_offset_t start,
+ vm_offset_t end,
+ vm_map_t submap);
+
+/*
* Wait and wakeup macros for in_transition map entries.
*/
#define vm_map_entry_wait(map, interruptible) \
@@ -471,4 +514,22 @@ extern vm_object_t vm_submap_object;
#define vm_map_entry_wakeup(map) thread_wakeup((event_t)&(map)->hdr)
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+extern void _vm_map_clip_start(
+ struct vm_map_header *map_header,
+ vm_map_entry_t entry,
+ vm_offset_t start);
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+void _vm_map_clip_end();
+
#endif /* _VM_VM_MAP_H_ */
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 479dc93e..9057973d 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -179,7 +179,7 @@ vm_object_t kernel_object;
*/
queue_head_t vm_object_cached_list;
int vm_object_cached_count;
-int vm_object_cached_max = 32768; /* may be patched*/
+int vm_object_cached_max = 4000; /* may be patched*/
decl_simple_lock_data(,vm_object_cached_lock_data)
@@ -2539,11 +2539,11 @@ void vm_object_collapse(
/* Fall through to... */
default:
- printf("vm_object_collapse: %#x (pager %#x, request %#x) up to %#x\n",
+ printf("vm_object_collapse: %p (pager %p, request %p) up to %p\n",
backing_object, backing_object->pager, backing_object->pager_request,
object);
if (vm_object_collapse_debug > 2)
- Debugger("vm_object_collapse");
+ SoftDebugger("vm_object_collapse");
}
object->pager = backing_object->pager;
diff --git a/vm/vm_object.h b/vm/vm_object.h
index 74732895..c9925709 100644
--- a/vm/vm_object.h
+++ b/vm/vm_object.h
@@ -34,6 +34,7 @@
#ifndef _VM_VM_OBJECT_H_
#define _VM_VM_OBJECT_H_
+#include <sys/types.h>
#include <mach/kern_return.h>
#include <mach/boolean.h>
#include <mach/memory_object.h>
@@ -46,6 +47,7 @@
#include <kern/debug.h>
#include <kern/macro_help.h>
#include <vm/pmap.h>
+#include <ipc/ipc_types.h>
#if MACH_PAGEMAP
#include <vm/vm_external.h>
@@ -235,6 +237,16 @@ extern void vm_object_print(vm_object_t);
extern vm_object_t vm_object_request_object(struct ipc_port *);
+extern boolean_t vm_object_coalesce(
+ vm_object_t prev_object,
+ vm_object_t next_object,
+ vm_offset_t prev_offset,
+ vm_offset_t next_offset,
+ vm_size_t prev_size,
+ vm_size_t next_size);
+
+extern void vm_object_pager_wakeup(ipc_port_t pager);
+
/*
* Event waiting handling
*/
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index 6db1cb74..7a755bf4 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -34,6 +34,7 @@
* The proverbial page-out daemon.
*/
+#include <device/net_io.h>
#include <mach/mach_types.h>
#include <mach/memory_object.h>
#include <vm/memory_object_default.user.h>
@@ -42,12 +43,14 @@
#include <mach/vm_statistics.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/task.h>
#include <kern/thread.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <machine/locore.h>
#include <machine/vm_tuning.h>
@@ -548,7 +551,6 @@ void vm_pageout_scan()
* internal memory even if we never reach vm_page_free_target.
*/
- Restart:
stack_collect();
net_kmsg_collect();
consider_task_collect();
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index ef131451..b23e0f35 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -935,7 +935,7 @@ vm_page_grab_contiguous_pages(
register int first_set;
int size, alloc_size;
kern_return_t ret;
- vm_page_t mem, prevmem;
+ vm_page_t mem, *prevmemp;
#ifndef NBBY
#define NBBY 8 /* size in bits of sizeof()`s unity */
@@ -1043,7 +1043,6 @@ count_ones:
* search for beans here
*/
bits_so_far = 0;
-count_zeroes:
while ((bitpos < NBPEL) && ((v & 1) == 0)) {
bitpos++;
v >>= 1;
@@ -1063,7 +1062,6 @@ count_zeroes:
/*
* We could not find enough contiguous pages.
*/
-not_found_em:
simple_unlock(&vm_page_queue_free_lock);
ret = KERN_RESOURCE_SHORTAGE;
@@ -1090,7 +1088,7 @@ found_em:
/* running pointers */
mem = vm_page_queue_free;
- prevmem = VM_PAGE_NULL;
+ prevmemp = &vm_page_queue_free;
while (mem) {
@@ -1100,8 +1098,7 @@ found_em:
if ((addr >= first_phys) &&
(addr < last_phys)) {
- if (prevmem)
- prevmem->pageq.next = mem->pageq.next;
+ *prevmemp = (vm_page_t) mem->pageq.next;
pages[(addr - first_phys) >> PAGE_SHIFT] = mem;
mem->free = FALSE;
mem->extcounted = mem->external = external;
@@ -1110,7 +1107,7 @@ found_em:
*/
if (--npages == 0) break;
} else
- prevmem = mem;
+ prevmemp = (vm_page_t *) &mem->pageq.next;
mem = (vm_page_t) mem->pageq.next;
}
diff --git a/vm/vm_resident.h b/vm/vm_resident.h
new file mode 100644
index 00000000..67f1807f
--- /dev/null
+++ b/vm/vm_resident.h
@@ -0,0 +1,45 @@
+/*
+ * Resident memory management module functions.
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author: Barry deFreese.
+ */
+/*
+ * Resident memory management module functions.
+ *
+ */
+
+#ifndef _VM_RESIDENT_H_
+#define _VM_RESIDENT_H_
+
+#include <mach/std_types.h>
+
+/*
+ * vm_page_replace:
+ *
+ * Exactly like vm_page_insert, except that we first
+ * remove any existing page at the given offset in object
+ * and we don't do deactivate-behind.
+ *
+ * The object and page must be locked.
+ */
+extern void vm_page_replace (
+ register vm_page_t mem,
+ register vm_object_t object,
+ register vm_offset_t offset);
+
+#endif /* _VM_RESIDENT_H_ */
diff --git a/vm/vm_user.c b/vm/vm_user.c
index ebe98449..672daab8 100644
--- a/vm/vm_user.c
+++ b/vm/vm_user.c
@@ -41,6 +41,7 @@
#include <kern/host.h>
#include <kern/task.h>
#include <vm/vm_fault.h>
+#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@@ -275,6 +276,12 @@ kern_return_t vm_copy(map, source_address, size, dest_address)
return KERN_SUCCESS;
}
+
+/* XXX From memory_object_proxy.c */
+kern_return_t
+memory_object_proxy_lookup (ipc_port_t proxy_object, ipc_port_t *object,
+ vm_prot_t *max_protection);
+
/*
* Routine: vm_map
*/
@@ -324,7 +331,22 @@ kern_return_t vm_map(
copy = FALSE;
} else if ((object = vm_object_enter(memory_object, size, FALSE))
== VM_OBJECT_NULL)
- return(KERN_INVALID_ARGUMENT);
+ {
+ ipc_port_t real_memobj;
+ vm_prot_t prot;
+ result = memory_object_proxy_lookup (memory_object, &real_memobj,
+ &prot);
+ if (result != KERN_SUCCESS)
+ return result;
+
+ /* Reduce the allowed access to the memory object. */
+ max_protection &= prot;
+ cur_protection &= prot;
+
+ if ((object = vm_object_enter(real_memobj, size, FALSE))
+ == VM_OBJECT_NULL)
+ return KERN_INVALID_ARGUMENT;
+ }
/*
* Perform the copy if requested
diff --git a/xen/Makefrag.am b/xen/Makefrag.am
new file mode 100644
index 00000000..61eb475b
--- /dev/null
+++ b/xen/Makefrag.am
@@ -0,0 +1,83 @@
+# Makefile fragment for the Xen platform.
+
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 2, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+#
+# Xen support.
+#
+
+libkernel_a_SOURCES += \
+ xen/public/arch-x86_32.h \
+ xen/public/arch-x86_64.h \
+ xen/public/arch-x86/xen.h \
+ xen/public/arch-x86/xen-mca.h \
+ xen/public/arch-x86/xen-x86_32.h \
+ xen/public/arch-x86/xen-x86_64.h \
+ xen/public/callback.h \
+ xen/public/COPYING \
+ xen/public/dom0_ops.h \
+ xen/public/domctl.h \
+ xen/public/elfnote.h \
+ xen/public/elfstructs.h \
+ xen/public/event_channel.h \
+ xen/public/features.h \
+ xen/public/grant_table.h \
+ xen/public/io/blkif.h \
+ xen/public/io/console.h \
+ xen/public/io/fbif.h \
+ xen/public/io/fsif.h \
+ xen/public/io/kbdif.h \
+ xen/public/io/netif.h \
+ xen/public/io/pciif.h \
+ xen/public/io/protocols.h \
+ xen/public/io/ring.h \
+ xen/public/io/tpmif.h \
+ xen/public/io/xenbus.h \
+ xen/public/io/xs_wire.h \
+ xen/public/kexec.h \
+ xen/public/libelf.h \
+ xen/public/memory.h \
+ xen/public/nmi.h \
+ xen/public/physdev.h \
+ xen/public/platform.h \
+ xen/public/sched.h \
+ xen/public/sysctl.h \
+ xen/public/trace.h \
+ xen/public/vcpu.h \
+ xen/public/version.h \
+ xen/public/xencomm.h \
+ xen/public/xen-compat.h \
+ xen/public/xen.h \
+ xen/public/xenoprof.h \
+ xen/block.c \
+ xen/block.h \
+ xen/console.c \
+ xen/console.h \
+ xen/evt.c \
+ xen/evt.h \
+ xen/grant.c \
+ xen/grant.h \
+ xen/net.c \
+ xen/net.h \
+ xen/ring.c \
+ xen/ring.h \
+ xen/store.c \
+ xen/store.h \
+ xen/time.c \
+ xen/time.h \
+ xen/xen.c \
+ xen/xen.h
diff --git a/xen/block.c b/xen/block.c
new file mode 100644
index 00000000..3c188bff
--- /dev/null
+++ b/xen/block.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <mach/mig_errors.h>
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_user.h>
+#include <device/device_types.h>
+#include <device/device_port.h>
+#include <device/disk_status.h>
+#include <device/device_reply.user.h>
+#include <device/device_emul.h>
+#include <device/ds_routines.h>
+#include <xen/public/io/blkif.h>
+#include <xen/evt.h>
+#include <string.h>
+#include <util/atoi.h>
+#include "store.h"
+#include "block.h"
+#include "grant.h"
+#include "ring.h"
+#include "xen.h"
+
+/* Hypervisor part */
+
+struct block_data {
+ struct device device;
+ char *name;
+ int open_count;
+ char *backend;
+ domid_t domid;
+ char *vbd;
+ int handle;
+ unsigned info;
+ dev_mode_t mode;
+ unsigned sector_size;
+ unsigned long nr_sectors;
+ ipc_port_t port;
+ blkif_front_ring_t ring;
+ evtchn_port_t evt;
+ simple_lock_data_t lock;
+ simple_lock_data_t pushlock;
+};
+
+static int n_vbds;
+static struct block_data *vbd_data;
+
+struct device_emulation_ops hyp_block_emulation_ops;
+
+static void hyp_block_intr(int unit) {
+ struct block_data *bd = &vbd_data[unit];
+ blkif_response_t *rsp;
+ int more;
+ io_return_t *err;
+
+ simple_lock(&bd->lock);
+ more = RING_HAS_UNCONSUMED_RESPONSES(&bd->ring);
+ while (more) {
+ rmb(); /* make sure we see responses */
+ rsp = RING_GET_RESPONSE(&bd->ring, bd->ring.rsp_cons++);
+ err = (void *) (unsigned long) rsp->id;
+ switch (rsp->status) {
+ case BLKIF_RSP_ERROR:
+ *err = D_IO_ERROR;
+ break;
+ case BLKIF_RSP_OKAY:
+ break;
+ default:
+ printf("Unrecognized blkif status %d\n", rsp->status);
+ goto drop;
+ }
+ thread_wakeup(err);
+drop:
+ thread_wakeup_one(bd);
+ RING_FINAL_CHECK_FOR_RESPONSES(&bd->ring, more);
+ }
+ simple_unlock(&bd->lock);
+}
+
+#define VBD_PATH "device/vbd"
+void hyp_block_init(void) {
+ char **vbds, **vbd;
+ char *c;
+ int i, disk, partition;
+ int n;
+ int grant;
+ char port_name[10];
+ char *prefix;
+ char device_name[32];
+ domid_t domid;
+ evtchn_port_t evt;
+ hyp_store_transaction_t t;
+ vm_offset_t addr;
+ struct block_data *bd;
+ blkif_sring_t *ring;
+
+ vbds = hyp_store_ls(0, 1, VBD_PATH);
+ if (!vbds) {
+ printf("hd: No block device (%s). Hoping you don't need any\n", hyp_store_error);
+ n_vbds = 0;
+ return;
+ }
+
+ n = 0;
+ for (vbd = vbds; *vbd; vbd++)
+ n++;
+
+ vbd_data = (void*) kalloc(n * sizeof(*vbd_data));
+ if (!vbd_data) {
+ printf("hd: No memory room for VBD\n");
+ n_vbds = 0;
+ return;
+ }
+ n_vbds = n;
+
+ for (n = 0; n < n_vbds; n++) {
+ bd = &vbd_data[n];
+ mach_atoi((u_char *) vbds[n], &bd->handle);
+ if (bd->handle == MACH_ATOI_DEFAULT)
+ continue;
+
+ bd->open_count = -2;
+ bd->vbd = vbds[n];
+
+ /* Get virtual number. */
+ i = hyp_store_read_int(0, 5, VBD_PATH, "/", vbds[n], "/", "virtual-device");
+ if (i == -1)
+ panic("hd: couldn't virtual device of VBD %s\n",vbds[n]);
+ if ((i >> 28) == 1) {
+ /* xvd, new format */
+ prefix = "xvd";
+ disk = (i >> 8) & ((1 << 20) - 1);
+ partition = i & ((1 << 8) - 1);
+ } else if ((i >> 8) == 202) {
+ /* xvd, old format */
+ prefix = "xvd";
+ disk = (i >> 4) & ((1 << 4) - 1);
+ partition = i & ((1 << 4) - 1);
+ } else if ((i >> 8) == 8) {
+ /* SCSI */
+ prefix = "sd";
+ disk = (i >> 4) & ((1 << 4) - 1);
+ partition = i & ((1 << 4) - 1);
+ } else if ((i >> 8) == 22) {
+ /* IDE secondary */
+ prefix = "hd";
+ disk = ((i >> 6) & ((1 << 2) - 1)) + 2;
+ partition = i & ((1 << 6) - 1);
+ } else if ((i >> 8) == 3) {
+ /* IDE primary */
+ prefix = "hd";
+ disk = (i >> 6) & ((1 << 2) - 1);
+ partition = i & ((1 << 6) - 1);
+ }
+ if (partition)
+ sprintf(device_name, "%s%us%u", prefix, disk, partition);
+ else
+ sprintf(device_name, "%s%u", prefix, disk);
+ bd->name = (char*) kalloc(strlen(device_name));
+ strcpy(bd->name, device_name);
+
+ /* Get domain id of backend driver. */
+ i = hyp_store_read_int(0, 5, VBD_PATH, "/", vbds[n], "/", "backend-id");
+ if (i == -1)
+ panic("%s: couldn't read backend domid (%s)", device_name, hyp_store_error);
+ bd->domid = domid = i;
+
+ do {
+ t = hyp_store_transaction_start();
+
+ /* Get a page for ring */
+ if (kmem_alloc_wired(kernel_map, &addr, PAGE_SIZE) != KERN_SUCCESS)
+ panic("%s: couldn't allocate space for store ring\n", device_name);
+ ring = (void*) addr;
+ SHARED_RING_INIT(ring);
+ FRONT_RING_INIT(&bd->ring, ring, PAGE_SIZE);
+ grant = hyp_grant_give(domid, atop(kvtophys(addr)), 0);
+
+ /* and give it to backend. */
+ i = sprintf(port_name, "%u", grant);
+ c = hyp_store_write(t, port_name, 5, VBD_PATH, "/", vbds[n], "/", "ring-ref");
+ if (!c)
+ panic("%s: couldn't store ring reference (%s)", device_name, hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ /* Allocate an event channel and give it to backend. */
+ bd->evt = evt = hyp_event_channel_alloc(domid);
+ hyp_evt_handler(evt, hyp_block_intr, n, SPL7);
+ i = sprintf(port_name, "%lu", evt);
+ c = hyp_store_write(t, port_name, 5, VBD_PATH, "/", vbds[n], "/", "event-channel");
+ if (!c)
+ panic("%s: couldn't store event channel (%s)", device_name, hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+ c = hyp_store_write(t, hyp_store_state_initialized, 5, VBD_PATH, "/", vbds[n], "/", "state");
+ if (!c)
+ panic("%s: couldn't store state (%s)", device_name, hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+ } while (!hyp_store_transaction_stop(t));
+
+ c = hyp_store_read(0, 5, VBD_PATH, "/", vbds[n], "/", "backend");
+ if (!c)
+ panic("%s: couldn't get path to backend (%s)", device_name, hyp_store_error);
+ bd->backend = c;
+
+ while(1) {
+ i = hyp_store_read_int(0, 3, bd->backend, "/", "state");
+ if (i == MACH_ATOI_DEFAULT)
+ panic("can't read state from %s", bd->backend);
+ if (i == XenbusStateConnected)
+ break;
+ hyp_yield();
+ }
+
+ i = hyp_store_read_int(0, 3, bd->backend, "/", "sectors");
+ if (i == -1)
+ panic("%s: couldn't get number of sectors (%s)", device_name, hyp_store_error);
+ bd->nr_sectors = i;
+
+ i = hyp_store_read_int(0, 3, bd->backend, "/", "sector-size");
+ if (i == -1)
+ panic("%s: couldn't get sector size (%s)", device_name, hyp_store_error);
+ if (i & ~(2*(i-1)+1))
+ panic("sector size %d is not a power of 2\n", i);
+ if (i > PAGE_SIZE || PAGE_SIZE % i != 0)
+ panic("%s: couldn't handle sector size %d with pages of size %d\n", device_name, i, PAGE_SIZE);
+ bd->sector_size = i;
+
+ i = hyp_store_read_int(0, 3, bd->backend, "/", "info");
+ if (i == -1)
+ panic("%s: couldn't get info (%s)", device_name, hyp_store_error);
+ bd->info = i;
+
+ c = hyp_store_read(0, 3, bd->backend, "/", "mode");
+ if (!c)
+ panic("%s: couldn't get backend's mode (%s)", device_name, hyp_store_error);
+ if ((c[0] == 'w') && !(bd->info & VDISK_READONLY))
+ bd->mode = D_READ|D_WRITE;
+ else
+ bd->mode = D_READ;
+
+ c = hyp_store_read(0, 3, bd->backend, "/", "params");
+ if (!c)
+ panic("%s: couldn't get backend's real device (%s)", device_name, hyp_store_error);
+
+ /* TODO: change suffix */
+ printf("%s: dom%d's VBD %s (%s,%c%s) %ldMB\n", device_name, domid,
+ vbds[n], c, bd->mode & D_WRITE ? 'w' : 'r',
+ bd->info & VDISK_CDROM ? ", cdrom" : "",
+ bd->nr_sectors / ((1<<20) / 512));
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ c = hyp_store_write(0, hyp_store_state_connected, 5, VBD_PATH, "/", bd->vbd, "/", "state");
+ if (!c)
+ panic("couldn't store state for %s (%s)", device_name, hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ bd->open_count = -1;
+ bd->device.emul_ops = &hyp_block_emulation_ops;
+ bd->device.emul_data = bd;
+ simple_lock_init(&bd->lock);
+ simple_lock_init(&bd->pushlock);
+ }
+}
+
+static ipc_port_t
+dev_to_port(void *d)
+{
+ struct block_data *b = d;
+ if (!d)
+ return IP_NULL;
+ return ipc_port_make_send(b->port);
+}
+
+static int
+device_close(void *devp)
+{
+ struct block_data *bd = devp;
+ if (--bd->open_count < 0)
+ panic("too many closes on %s", bd->name);
+ printf("close, %s count %d\n", bd->name, bd->open_count);
+ if (bd->open_count)
+ return 0;
+ ipc_kobject_set(bd->port, IKO_NULL, IKOT_NONE);
+ ipc_port_dealloc_kernel(bd->port);
+ return 0;
+}
+
+static io_return_t
+device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type,
+ dev_mode_t mode, char *name, device_t *devp /* out */)
+{
+ int i, err = 0;
+ ipc_port_t port, notify;
+ struct block_data *bd;
+
+ for (i = 0; i < n_vbds; i++)
+ if (!strcmp(name, vbd_data[i].name))
+ break;
+
+ if (i == n_vbds)
+ return D_NO_SUCH_DEVICE;
+
+ bd = &vbd_data[i];
+ if (bd->open_count == -2)
+ /* couldn't be initialized */
+ return D_NO_SUCH_DEVICE;
+
+ if ((mode & D_WRITE) && !(bd->mode & D_WRITE))
+ return D_READ_ONLY;
+
+ if (bd->open_count >= 0) {
+ *devp = &bd->device ;
+ bd->open_count++ ;
+ printf("re-open, %s count %d\n", bd->name, bd->open_count);
+ return D_SUCCESS;
+ }
+
+ bd->open_count = 1;
+ printf("%s count %d\n", bd->name, bd->open_count);
+
+ port = ipc_port_alloc_kernel();
+ if (port == IP_NULL) {
+ err = KERN_RESOURCE_SHORTAGE;
+ goto out;
+ }
+ bd->port = port;
+
+ *devp = &bd->device;
+
+ ipc_kobject_set (port, (ipc_kobject_t) &bd->device, IKOT_DEVICE);
+
+ notify = ipc_port_make_sonce (bd->port);
+ ip_lock (bd->port);
+ ipc_port_nsrequest (bd->port, 1, notify, &notify);
+ assert (notify == IP_NULL);
+
+out:
+ if (IP_VALID (reply_port))
+ ds_device_open_reply (reply_port, reply_port_type, D_SUCCESS, port);
+ else
+ device_close(bd);
+ return MIG_NO_REPLY;
+}
+
+static io_return_t
+device_read (void *d, ipc_port_t reply_port,
+ mach_msg_type_name_t reply_port_type, dev_mode_t mode,
+ recnum_t bn, int count, io_buf_ptr_t *data,
+ unsigned *bytes_read)
+{
+ int resid, amt;
+ io_return_t err = 0;
+ vm_page_t pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int nbpages;
+ vm_map_copy_t copy;
+ vm_offset_t offset, alloc_offset, o;
+ vm_object_t object;
+ vm_page_t m;
+ vm_size_t len, size;
+ struct block_data *bd = d;
+ struct blkif_request *req;
+
+ *data = 0;
+ *bytes_read = 0;
+
+ if (count < 0)
+ return D_INVALID_SIZE;
+ if (count == 0)
+ return 0;
+
+ /* Allocate an object to hold the data. */
+ size = round_page (count);
+ object = vm_object_allocate (size);
+ if (! object)
+ {
+ err = D_NO_MEMORY;
+ goto out;
+ }
+ alloc_offset = offset = 0;
+ resid = count;
+
+ while (resid && !err)
+ {
+ unsigned reqn;
+ int i;
+ int last_sect;
+
+ nbpages = 0;
+
+ /* Determine size of I/O this time around. */
+ len = round_page(offset + resid) - offset;
+ if (len > PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+ len = PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+ /* Allocate pages. */
+ while (alloc_offset < offset + len)
+ {
+ while ((m = vm_page_grab (FALSE)) == 0)
+ VM_PAGE_WAIT (0);
+ assert (! m->active && ! m->inactive);
+ m->busy = TRUE;
+ assert(nbpages < BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ pages[nbpages++] = m;
+ alloc_offset += PAGE_SIZE;
+ }
+
+ /* Do the read. */
+ amt = len;
+ if (amt > resid)
+ amt = resid;
+
+ /* allocate a request */
+ spl_t spl = splsched();
+ while(1) {
+ simple_lock(&bd->lock);
+ if (!RING_FULL(&bd->ring))
+ break;
+ thread_sleep(bd, &bd->lock, FALSE);
+ }
+ mb();
+ reqn = bd->ring.req_prod_pvt++;;
+ simple_lock(&bd->pushlock);
+ simple_unlock(&bd->lock);
+ (void) splx(spl);
+
+ req = RING_GET_REQUEST(&bd->ring, reqn);
+ req->operation = BLKIF_OP_READ;
+ req->nr_segments = nbpages;
+ req->handle = bd->handle;
+ req->id = (unsigned64_t) (unsigned long) &err; /* pointer on the stack */
+ req->sector_number = bn + offset / 512;
+ for (i = 0; i < nbpages; i++) {
+ req->seg[i].gref = gref[i] = hyp_grant_give(bd->domid, atop(pages[i]->phys_addr), 0);
+ req->seg[i].first_sect = 0;
+ req->seg[i].last_sect = PAGE_SIZE/512 - 1;
+ }
+ last_sect = ((amt - 1) & PAGE_MASK) / 512;
+ req->seg[nbpages-1].last_sect = last_sect;
+
+ memset((void*) phystokv(pages[nbpages-1]->phys_addr
+ + (last_sect + 1) * 512),
+ 0, PAGE_SIZE - (last_sect + 1) * 512);
+
+ /* no need for a lock: as long as the request is not pushed, the event won't be triggered */
+ assert_wait((event_t) &err, FALSE);
+
+ int notify;
+ wmb(); /* make sure it sees requests */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bd->ring, notify);
+ if (notify)
+ hyp_event_channel_send(bd->evt);
+ simple_unlock(&bd->pushlock);
+
+ thread_block(NULL);
+
+ if (err)
+ printf("error reading %d bytes at sector %d\n", amt,
+ bn + offset / 512);
+
+ for (i = 0; i < nbpages; i++)
+ hyp_grant_takeback(gref[i]);
+
+ /* Compute number of pages to insert in object. */
+ o = offset;
+
+ resid -= amt;
+ if (resid == 0)
+ offset = o + len;
+ else
+ offset += amt;
+
+ /* Add pages to the object. */
+ vm_object_lock (object);
+ for (i = 0; i < nbpages; i++)
+ {
+ m = pages[i];
+ assert (m->busy);
+ vm_page_lock_queues ();
+ PAGE_WAKEUP_DONE (m);
+ m->dirty = TRUE;
+ vm_page_insert (m, object, o);
+ vm_page_unlock_queues ();
+ o += PAGE_SIZE;
+ }
+ vm_object_unlock (object);
+ }
+
+out:
+ if (! err)
+ err = vm_map_copyin_object (object, 0, round_page (count), &copy);
+ if (! err)
+ {
+ *data = (io_buf_ptr_t) copy;
+ *bytes_read = count - resid;
+ }
+ else
+ vm_object_deallocate (object);
+ return err;
+}
+
+static io_return_t
+device_write(void *d, ipc_port_t reply_port,
+ mach_msg_type_name_t reply_port_type, dev_mode_t mode,
+ recnum_t bn, io_buf_ptr_t data, unsigned int count,
+ int *bytes_written)
+{
+ io_return_t err = 0;
+ vm_map_copy_t copy = (vm_map_copy_t) data;
+ vm_offset_t aligned_buffer = 0;
+ int copy_npages = atop(round_page(count));
+ vm_offset_t phys_addrs[copy_npages];
+ struct block_data *bd = d;
+ blkif_request_t *req;
+ grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned reqn, size;
+ int i, nbpages, j;
+
+ if (!(bd->mode & D_WRITE))
+ return D_READ_ONLY;
+
+ if (count == 0) {
+ vm_map_copy_discard(copy);
+ return 0;
+ }
+
+ if (count % bd->sector_size)
+ return D_INVALID_SIZE;
+
+ if (count > copy->size)
+ return D_INVALID_SIZE;
+
+ if (copy->type != VM_MAP_COPY_PAGE_LIST || copy->offset & PAGE_MASK) {
+ /* Unaligned write. Has to copy data before passing it to the backend. */
+ kern_return_t kr;
+ vm_offset_t buffer;
+
+ kr = kmem_alloc(device_io_map, &aligned_buffer, count);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ kr = vm_map_copyout(device_io_map, &buffer, vm_map_copy_copy(copy));
+ if (kr != KERN_SUCCESS) {
+ kmem_free(device_io_map, aligned_buffer, count);
+ return kr;
+ }
+
+ memcpy((void*) aligned_buffer, (void*) buffer, count);
+
+ vm_deallocate (device_io_map, buffer, count);
+
+ for (i = 0; i < copy_npages; i++)
+ phys_addrs[i] = kvtophys(aligned_buffer + ptoa(i));
+ } else {
+ for (i = 0; i < copy_npages; i++)
+ phys_addrs[i] = copy->cpy_page_list[i]->phys_addr;
+ }
+
+ for (i=0; i<copy_npages; i+=nbpages) {
+
+ nbpages = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ if (nbpages > copy_npages-i)
+ nbpages = copy_npages-i;
+
+ /* allocate a request */
+ spl_t spl = splsched();
+ while(1) {
+ simple_lock(&bd->lock);
+ if (!RING_FULL(&bd->ring))
+ break;
+ thread_sleep(bd, &bd->lock, FALSE);
+ }
+ mb();
+ reqn = bd->ring.req_prod_pvt++;;
+ simple_lock(&bd->pushlock);
+ simple_unlock(&bd->lock);
+ (void) splx(spl);
+
+ req = RING_GET_REQUEST(&bd->ring, reqn);
+ req->operation = BLKIF_OP_WRITE;
+ req->nr_segments = nbpages;
+ req->handle = bd->handle;
+ req->id = (unsigned64_t) (unsigned long) &err; /* pointer on the stack */
+ req->sector_number = bn + i*PAGE_SIZE / 512;
+
+ for (j = 0; j < nbpages; j++) {
+ req->seg[j].gref = gref[j] = hyp_grant_give(bd->domid, atop(phys_addrs[i + j]), 1);
+ req->seg[j].first_sect = 0;
+ size = PAGE_SIZE;
+ if ((i + j + 1) * PAGE_SIZE > count)
+ size = count - (i + j) * PAGE_SIZE;
+ req->seg[j].last_sect = size/512 - 1;
+ }
+
+ /* no need for a lock: as long as the request is not pushed, the event won't be triggered */
+ assert_wait((event_t) &err, FALSE);
+
+ int notify;
+ wmb(); /* make sure it sees requests */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bd->ring, notify);
+ if (notify)
+ hyp_event_channel_send(bd->evt);
+ simple_unlock(&bd->pushlock);
+
+ thread_block(NULL);
+
+ for (j = 0; j < nbpages; j++)
+ hyp_grant_takeback(gref[j]);
+
+ if (err) {
+ printf("error writing %d bytes at sector %d\n", count, bn);
+ break;
+ }
+ }
+
+ if (aligned_buffer)
+ kmem_free(device_io_map, aligned_buffer, count);
+
+ vm_map_copy_discard (copy);
+
+ if (!err)
+ *bytes_written = count;
+
+ if (IP_VALID(reply_port))
+ ds_device_write_reply (reply_port, reply_port_type, err, count);
+
+ return MIG_NO_REPLY;
+}
+
+static io_return_t
+device_get_status(void *d, dev_flavor_t flavor, dev_status_t status,
+ mach_msg_type_number_t *status_count)
+{
+ struct block_data *bd = d;
+
+ switch (flavor)
+ {
+ case DEV_GET_SIZE:
+ status[DEV_GET_SIZE_DEVICE_SIZE] = (unsigned long long) bd->nr_sectors * 512;
+ status[DEV_GET_SIZE_RECORD_SIZE] = bd->sector_size;
+ *status_count = DEV_GET_SIZE_COUNT;
+ break;
+ case DEV_GET_RECORDS:
+ status[DEV_GET_RECORDS_DEVICE_RECORDS] = ((unsigned long long) bd->nr_sectors * 512) / bd->sector_size;
+ status[DEV_GET_RECORDS_RECORD_SIZE] = bd->sector_size;
+ *status_count = DEV_GET_RECORDS_COUNT;
+ break;
+ default:
+ printf("TODO: block_%s(%d)\n", __func__, flavor);
+ return D_INVALID_OPERATION;
+ }
+ return D_SUCCESS;
+}
+
+struct device_emulation_ops hyp_block_emulation_ops = {
+ NULL, /* dereference */
+ NULL, /* deallocate */
+ dev_to_port,
+ device_open,
+ device_close,
+ device_write,
+ NULL, /* write_inband */
+ device_read,
+ NULL, /* read_inband */
+ NULL, /* set_status */
+ device_get_status,
+ NULL, /* set_filter */
+ NULL, /* map */
+ NULL, /* no_senders */
+ NULL, /* write_trap */
+ NULL, /* writev_trap */
+};
diff --git a/xen/block.h b/xen/block.h
new file mode 100644
index 00000000..5955968a
--- /dev/null
+++ b/xen/block.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_BLOCK_H
+#define XEN_BLOCK_H
+
+void hyp_block_init(void);
+
+#endif /* XEN_BLOCK_H */
diff --git a/xen/configfrag.ac b/xen/configfrag.ac
new file mode 100644
index 00000000..eb689960
--- /dev/null
+++ b/xen/configfrag.ac
@@ -0,0 +1,44 @@
+dnl Configure fragment for the Xen platform.
+
+dnl Copyright (C) 2007 Free Software Foundation, Inc.
+
+dnl This program is free software; you can redistribute it and/or modify it
+dnl under the terms of the GNU General Public License as published by the
+dnl Free Software Foundation; either version 2, or (at your option) any later
+dnl version.
+dnl
+dnl This program is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+dnl for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License along
+dnl with this program; if not, write to the Free Software Foundation, Inc.,
+dnl 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+#
+# Xen platform.
+#
+
+[if [ "$host_platform" = xen ]; then]
+ AC_DEFINE([MACH_XEN], [], [build a MachXen kernel])
+ AC_DEFINE([MACH_HYP], [], [be a hypervisor guest])
+ AM_CONDITIONAL([PLATFORM_xen], [true])
+
+ AC_ARG_ENABLE([pseudo-phys],
+ AS_HELP_STRING([--enable-pseudo-phys], [Pseudo physical support]))
+ [if [ x"$enable_pseudo_phys" = xno ]; then]
+ AM_CONDITIONAL([enable_pseudo_phys], [false])
+ [else]
+ AC_DEFINE([MACH_PSEUDO_PHYS], [], [Enable pseudo physical memory support])
+ AM_CONDITIONAL([enable_pseudo_phys], [true])
+ [fi]
+
+[else]
+ AM_CONDITIONAL([PLATFORM_xen], [false])
+ AM_CONDITIONAL([enable_pseudo_phys], [false])
+[fi]
+
+dnl Local Variables:
+dnl mode: autoconf
+dnl End:
diff --git a/xen/console.c b/xen/console.c
new file mode 100644
index 00000000..9798ec0a
--- /dev/null
+++ b/xen/console.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <device/tty.h>
+#include <device/cons.h>
+#include <machine/pmap.h>
+#include <machine/machspl.h>
+#include <xen/public/io/console.h>
+#include "console.h"
+#include "ring.h"
+#include "evt.h"
+
+/* Hypervisor part */
+
+decl_simple_lock_data(static, outlock);
+decl_simple_lock_data(static, inlock);
+static struct xencons_interface *console;
+static int kd_pollc;
+int kb_mode; /* XXX: actually don't care. */
+
+#undef hyp_console_write
+void hyp_console_write(const char *str, int len)
+{
+ hyp_console_io (CONSOLEIO_write, len, kvtolin(str));
+}
+
+int hypputc(int c)
+{
+ if (!console) {
+ char d = c;
+ hyp_console_io(CONSOLEIO_write, 1, kvtolin(&d));
+ } else {
+ spl_t spl = splhigh();
+ simple_lock(&outlock);
+ while (hyp_ring_smash(console->out, console->out_prod, console->out_cons)) {
+ hyp_console_put("ring smash\n");
+ /* TODO: are we allowed to sleep in putc? */
+ hyp_yield();
+ }
+ hyp_ring_cell(console->out, console->out_prod) = c;
+ wmb();
+ console->out_prod++;
+ hyp_event_channel_send(boot_info.console_evtchn);
+ simple_unlock(&outlock);
+ splx(spl);
+ }
+ return 0;
+}
+
+int hypcnputc(dev_t dev, int c)
+{
+ return hypputc(c);
+}
+
+/* get char by polling, used by debugger */
+int hypcngetc(dev_t dev, int wait)
+{
+ int ret;
+ if (wait)
+ while (console->in_prod == console->in_cons)
+ hyp_yield();
+ else
+ if (console->in_prod == console->in_cons)
+ return -1;
+ ret = hyp_ring_cell(console->in, console->in_cons);
+ mb();
+ console->in_cons++;
+ hyp_event_channel_send(boot_info.console_evtchn);
+ return ret;
+}
+
+void cnpollc(boolean_t on) {
+ if (on) {
+ kd_pollc++;
+ } else {
+ --kd_pollc;
+ }
+}
+
+void kd_setleds1(u_char val)
+{
+ /* Can't do this. */
+}
+
+/* Mach part */
+
+struct tty hypcn_tty;
+
+static void hypcnintr(int unit, spl_t spl, void *ret_addr, void *regs) {
+ struct tty *tp = &hypcn_tty;
+ if (kd_pollc)
+ return;
+ simple_lock(&inlock);
+ while (console->in_prod != console->in_cons) {
+ int c = hyp_ring_cell(console->in, console->in_cons);
+ mb();
+ console->in_cons++;
+#if MACH_KDB
+ if (c == (char)'£') {
+ printf("£ pressed\n");
+ kdb_kintr();
+ continue;
+ }
+#endif /* MACH_KDB */
+ if ((tp->t_state & (TS_ISOPEN|TS_WOPEN)))
+ (*linesw[tp->t_line].l_rint)(c, tp);
+ }
+ hyp_event_channel_send(boot_info.console_evtchn);
+ simple_unlock(&inlock);
+}
+
+int hypcnread(int dev, io_req_t ior)
+{
+ struct tty *tp = &hypcn_tty;
+ tp->t_state |= TS_CARR_ON;
+ return char_read(tp, ior);
+}
+
+int hypcnwrite(int dev, io_req_t ior)
+{
+ return char_write(&hypcn_tty, ior);
+}
+
+void hypcnstart(struct tty *tp)
+{
+ spl_t o_pri;
+ int ch;
+ unsigned char c;
+
+ if (tp->t_state & TS_TTSTOP)
+ return;
+ while (1) {
+ tp->t_state &= ~TS_BUSY;
+ if (tp->t_state & TS_TTSTOP)
+ break;
+ if ((tp->t_outq.c_cc <= 0) || (ch = getc(&tp->t_outq)) == -1)
+ break;
+ c = ch;
+ o_pri = splsoftclock();
+ hypputc(c);
+ splx(o_pri);
+ }
+ if (tp->t_outq.c_cc <= TTLOWAT(tp)) {
+ tt_write_wakeup(tp);
+ }
+}
+
+void hypcnstop()
+{
+}
+
+io_return_t hypcngetstat(dev_t dev, int flavor, int *data, unsigned int *count)
+{
+ return tty_get_status(&hypcn_tty, flavor, data, count);
+}
+
+io_return_t hypcnsetstat(dev_t dev, int flavor, int *data, unsigned int count)
+{
+ return tty_set_status(&hypcn_tty, flavor, data, count);
+}
+
+int hypcnportdeath(dev_t dev, mach_port_t port)
+{
+ return tty_portdeath(&hypcn_tty, (ipc_port_t) port);
+}
+
+int hypcnopen(dev_t dev, int flag, io_req_t ior)
+{
+ struct tty *tp = &hypcn_tty;
+ spl_t o_pri;
+
+ o_pri = spltty();
+ simple_lock(&tp->t_lock);
+ if (!(tp->t_state & (TS_ISOPEN|TS_WOPEN))) {
+ /* XXX ttychars allocates memory */
+ simple_unlock(&tp->t_lock);
+ ttychars(tp);
+ simple_lock(&tp->t_lock);
+ tp->t_oproc = hypcnstart;
+ tp->t_stop = hypcnstop;
+ tp->t_ospeed = tp->t_ispeed = B9600;
+ tp->t_flags = ODDP|EVENP|ECHO|CRMOD|XTABS;
+ }
+ tp->t_state |= TS_CARR_ON;
+ simple_unlock(&tp->t_lock);
+ splx(o_pri);
+ return (char_open(dev, tp, flag, ior));
+}
+
+int hypcnclose(int dev, int flag)
+{
+ struct tty *tp = &hypcn_tty;
+ spl_t s = spltty();
+ simple_lock(&tp->t_lock);
+ ttyclose(tp);
+ simple_unlock(&tp->t_lock);
+ splx(s);
+ return 0;
+}
+
+int hypcnprobe(struct consdev *cp)
+{
+ struct xencons_interface *my_console;
+ my_console = (void*) mfn_to_kv(boot_info.console_mfn);
+
+ cp->cn_dev = makedev(0, 0);
+ cp->cn_pri = CN_INTERNAL;
+ return 0;
+}
+
+int hypcninit(struct consdev *cp)
+{
+ if (console)
+ return 0;
+ simple_lock_init(&outlock);
+ simple_lock_init(&inlock);
+ console = (void*) mfn_to_kv(boot_info.console_mfn);
+ pmap_set_page_readwrite(console);
+ hyp_evt_handler(boot_info.console_evtchn, hypcnintr, 0, SPL6);
+ return 0;
+}
diff --git a/xen/console.h b/xen/console.h
new file mode 100644
index 00000000..fa13dc0f
--- /dev/null
+++ b/xen/console.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_CONSOLE_H
+#define XEN_CONSOLE_H
+#include <machine/xen.h>
+#include <string.h>
+
+#define hyp_console_write(str, len) hyp_console_io (CONSOLEIO_write, (len), kvtolin(str))
+
+#define hyp_console_put(str) ({ \
+ const char *__str = (void*) (str); \
+ hyp_console_write (__str, strlen (__str)); \
+})
+
+extern void hyp_console_init(void);
+
+#endif /* XEN_CONSOLE_H */
diff --git a/xen/evt.c b/xen/evt.c
new file mode 100644
index 00000000..345e1d06
--- /dev/null
+++ b/xen/evt.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2007 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <mach/xen.h>
+#include <machine/xen.h>
+#include <machine/ipl.h>
+#include <machine/gdt.h>
+#include <xen/console.h>
+#include "evt.h"
+
+#define NEVNT (sizeof(unsigned long) * sizeof(unsigned long) * 8)
+int int_mask[NSPL];
+
+spl_t curr_ipl;
+
+void (*ivect[NEVNT])();
+int intpri[NEVNT];
+int iunit[NEVNT];
+
+void hyp_c_callback(void *ret_addr, void *regs)
+{
+ int i, j, n;
+ int cpu = 0;
+ unsigned long pending_sel;
+
+ hyp_shared_info.vcpu_info[cpu].evtchn_upcall_pending = 0;
+ /* no need for a barrier on x86, xchg is already one */
+#if !(defined(__i386__) || defined(__x86_64__))
+ wmb();
+#endif
+ while ((pending_sel = xchgl(&hyp_shared_info.vcpu_info[cpu].evtchn_pending_sel, 0))) {
+
+ for (i = 0; pending_sel; i++, pending_sel >>= 1) {
+ unsigned long pending;
+
+ if (!(pending_sel & 1))
+ continue;
+
+ while ((pending = (hyp_shared_info.evtchn_pending[i] & ~hyp_shared_info.evtchn_mask[i]))) {
+
+ n = i * sizeof(unsigned long);
+ for (j = 0; pending; j++, n++, pending >>= 1) {
+ if (!(pending & 1))
+ continue;
+
+ if (ivect[n]) {
+ spl_t spl = splx(intpri[n]);
+ asm ("lock; andl %1,%0":"=m"(hyp_shared_info.evtchn_pending[i]):"r"(~(1<<j)));
+ ivect[n](iunit[n], spl, ret_addr, regs);
+ splx_cli(spl);
+ } else {
+ printf("warning: lost unbound event %d\n", n);
+ asm ("lock; andl %1,%0":"=m"(hyp_shared_info.evtchn_pending[i]):"r"(~(1<<j)));
+ }
+ }
+ }
+ }
+ }
+}
+
+void form_int_mask(void)
+{
+ unsigned int i, j, bit, mask;
+
+ for (i=SPL0; i < NSPL; i++) {
+ for (j=0x00, bit=0x01, mask = 0; j < NEVNT; j++, bit<<=1)
+ if (intpri[j] <= i)
+ mask |= bit;
+ int_mask[i] = mask;
+ }
+}
+
+extern void hyp_callback(void);
+extern void hyp_failsafe_callback(void);
+
+void hyp_intrinit() {
+ form_int_mask();
+ curr_ipl = SPLHI;
+ hyp_shared_info.evtchn_mask[0] = int_mask[SPLHI];
+ hyp_set_callbacks(KERNEL_CS, hyp_callback,
+ KERNEL_CS, hyp_failsafe_callback);
+}
+
+void hyp_evt_handler(evtchn_port_t port, void (*handler)(), int unit, spl_t spl) {
+ if (port > NEVNT)
+ panic("event channel port %d > %d not supported\n", port, NEVNT);
+ intpri[port] = spl;
+ iunit[port] = unit;
+ form_int_mask();
+ wmb();
+ ivect[port] = handler;
+}
diff --git a/xen/evt.h b/xen/evt.h
new file mode 100644
index 00000000..a5839776
--- /dev/null
+++ b/xen/evt.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_EVT_H
+#define XEN_EVT_H
+
+#include <machine/spl.h>
+
+void hyp_intrinit(void);
+void form_int_mask(void);
+void hyp_evt_handler(evtchn_port_t port, void (*handler)(), int unit, spl_t spl);
+void hyp_c_callback(void *ret_addr, void *regs);
+
+#endif /* XEN_EVT_H */
diff --git a/xen/grant.c b/xen/grant.c
new file mode 100644
index 00000000..505d2026
--- /dev/null
+++ b/xen/grant.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <mach/vm_param.h>
+#include <machine/spl.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include "grant.h"
+
+#define NR_RESERVED_ENTRIES 8
+#define NR_GRANT_PAGES 4
+
+decl_simple_lock_data(static,lock);
+static struct grant_entry *grants;
+static vm_map_entry_t grants_map_entry;
+static int last_grant = NR_RESERVED_ENTRIES;
+
+static grant_ref_t free_grants = -1;
+
+static grant_ref_t grant_alloc(void) {
+ grant_ref_t grant;
+ if (free_grants != -1) {
+ grant = free_grants;
+ free_grants = grants[grant].frame;
+ } else {
+ grant = last_grant++;
+ if (grant == (NR_GRANT_PAGES * PAGE_SIZE)/sizeof(*grants))
+ panic("not enough grant entries, increase NR_GRANT_PAGES");
+ }
+ return grant;
+}
+
+static void grant_free(grant_ref_t grant) {
+ grants[grant].frame = free_grants;
+ free_grants = grant;
+}
+
+static grant_ref_t grant_set(domid_t domid, unsigned long mfn, uint16_t flags) {
+ spl_t spl = splhigh();
+ simple_lock(&lock);
+
+ grant_ref_t grant = grant_alloc();
+ grants[grant].domid = domid;
+ grants[grant].frame = mfn;
+ wmb();
+ grants[grant].flags = flags;
+
+ simple_unlock(&lock);
+ splx(spl);
+ return grant;
+}
+
+grant_ref_t hyp_grant_give(domid_t domid, unsigned long frame, int readonly) {
+ return grant_set(domid, pfn_to_mfn(frame),
+ GTF_permit_access | (readonly ? GTF_readonly : 0));
+}
+
+grant_ref_t hyp_grant_accept_transfer(domid_t domid, unsigned long frame) {
+ return grant_set(domid, frame, GTF_accept_transfer);
+}
+
+unsigned long hyp_grant_finish_transfer(grant_ref_t grant) {
+ unsigned long frame;
+ spl_t spl = splhigh();
+ simple_lock(&lock);
+
+ if (!(grants[grant].flags & GTF_transfer_committed))
+ panic("grant transfer %x not committed\n", grant);
+ while (!(grants[grant].flags & GTF_transfer_completed))
+ machine_relax();
+ rmb();
+ frame = grants[grant].frame;
+ grant_free(grant);
+
+ simple_unlock(&lock);
+ splx(spl);
+ return frame;
+}
+
+void hyp_grant_takeback(grant_ref_t grant) {
+ spl_t spl = splhigh();
+ simple_lock(&lock);
+
+ if (grants[grant].flags & (GTF_reading|GTF_writing))
+ panic("grant %d still in use (%lx)\n", grant, grants[grant].flags);
+
+ /* Note: this is not safe, a cmpxchg is needed, see grant_table.h */
+ grants[grant].flags = 0;
+ wmb();
+
+ grant_free(grant);
+
+ simple_unlock(&lock);
+ splx(spl);
+}
+
+void *hyp_grant_address(grant_ref_t grant) {
+ return &grants[grant];
+}
+
+void hyp_grant_init(void) {
+ struct gnttab_setup_table setup;
+ unsigned long frame[NR_GRANT_PAGES];
+ long ret;
+ int i;
+ vm_offset_t addr;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = NR_GRANT_PAGES;
+ setup.frame_list = (void*) kvtolin(frame);
+
+ ret = hyp_grant_table_op(GNTTABOP_setup_table, kvtolin(&setup), 1);
+ if (ret)
+ panic("setup grant table error %d", ret);
+ if (setup.status)
+ panic("setup grant table: %d\n", setup.status);
+
+ simple_lock_init(&lock);
+ vm_map_find_entry(kernel_map, &addr, NR_GRANT_PAGES * PAGE_SIZE,
+ (vm_offset_t) 0, kernel_object, &grants_map_entry);
+ grants = (void*) addr;
+
+ for (i = 0; i < NR_GRANT_PAGES; i++)
+ pmap_map_mfn((void *)grants + i * PAGE_SIZE, frame[i]);
+}
diff --git a/xen/grant.h b/xen/grant.h
new file mode 100644
index 00000000..ff8617d4
--- /dev/null
+++ b/xen/grant.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_GRANT_H
+#define XEN_GRANT_H
+#include <sys/types.h>
+#include <machine/xen.h>
+#include <xen/public/xen.h>
+#include <xen/public/grant_table.h>
+
+void hyp_grant_init(void);
+grant_ref_t hyp_grant_give(domid_t domid, unsigned long frame_nr, int readonly);
+void hyp_grant_takeback(grant_ref_t grant);
+grant_ref_t hyp_grant_accept_transfer(domid_t domid, unsigned long frame_nr);
+unsigned long hyp_grant_finish_transfer(grant_ref_t grant);
+void *hyp_grant_address(grant_ref_t grant);
+
+#endif /* XEN_GRANT_H */
diff --git a/xen/net.c b/xen/net.c
new file mode 100644
index 00000000..1bb217ba
--- /dev/null
+++ b/xen/net.c
@@ -0,0 +1,665 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <mach/mig_errors.h>
+#include <ipc/ipc_port.h>
+#include <ipc/ipc_space.h>
+#include <vm/vm_kern.h>
+#include <device/device_types.h>
+#include <device/device_port.h>
+#include <device/if_hdr.h>
+#include <device/if_ether.h>
+#include <device/net_io.h>
+#include <device/device_reply.user.h>
+#include <device/device_emul.h>
+#include <intel/pmap.h>
+#include <xen/public/io/netif.h>
+#include <xen/public/memory.h>
+#include <string.h>
+#include <util/atoi.h>
+#include "evt.h"
+#include "store.h"
+#include "net.h"
+#include "grant.h"
+#include "ring.h"
+#include "time.h"
+#include "xen.h"
+
+/* Hypervisor part */
+
+#define ADDRESS_SIZE 6
+#define WINDOW __RING_SIZE((netif_rx_sring_t*)0, PAGE_SIZE)
+
+/* TODO: use rx-copy instead, since we're memcpying anyway */
+
+/* Are we paranoid enough to not leak anything to backend? */
+static const int paranoia = 0;
+
+struct net_data {
+ struct device device;
+ struct ifnet ifnet;
+ int open_count;
+ char *backend;
+ domid_t domid;
+ char *vif;
+ u_char address[ADDRESS_SIZE];
+ int handle;
+ ipc_port_t port;
+ netif_tx_front_ring_t tx;
+ netif_rx_front_ring_t rx;
+ void *rx_buf[WINDOW];
+ grant_ref_t rx_buf_gnt[WINDOW];
+ unsigned long rx_buf_pfn[WINDOW];
+ evtchn_port_t evt;
+ simple_lock_data_t lock;
+ simple_lock_data_t pushlock;
+};
+
+static int n_vifs;
+static struct net_data *vif_data;
+
+struct device_emulation_ops hyp_net_emulation_ops;
+
+int hextoi(char *cp, int *nump)
+{
+ int number;
+ char *original;
+ char c;
+
+ original = cp;
+ for (number = 0, c = *cp | 0x20; (('0' <= c) && (c <= '9')) || (('a' <= c) && (c <= 'f')); c = *(++cp)) {
+ number *= 16;
+ if (c <= '9')
+ number += c - '0';
+ else
+ number += c - 'a' + 10;
+ }
+ if (original == cp)
+ *nump = -1;
+ else
+ *nump = number;
+ return(cp - original);
+}
+
+static void enqueue_rx_buf(struct net_data *nd, int number) {
+ unsigned reqn = nd->rx.req_prod_pvt++;
+ netif_rx_request_t *req = RING_GET_REQUEST(&nd->rx, reqn);
+
+ assert(number < WINDOW);
+
+ req->id = number;
+ req->gref = nd->rx_buf_gnt[number] = hyp_grant_accept_transfer(nd->domid, nd->rx_buf_pfn[number]);
+
+ /* give back page */
+ hyp_free_page(nd->rx_buf_pfn[number], nd->rx_buf[number]);
+}
+
+static void hyp_net_intr(int unit) {
+ ipc_kmsg_t kmsg;
+ struct ether_header *eh;
+ struct packet_header *ph;
+ netif_rx_response_t *rx_rsp;
+ netif_tx_response_t *tx_rsp;
+ void *data;
+ int len, more;
+ struct net_data *nd = &vif_data[unit];
+
+ simple_lock(&nd->lock);
+ if ((nd->rx.sring->rsp_prod - nd->rx.rsp_cons) >= (WINDOW*3)/4)
+ printf("window %ld a bit small!\n", WINDOW);
+
+ more = RING_HAS_UNCONSUMED_RESPONSES(&nd->rx);
+ while (more) {
+ rmb(); /* make sure we see responses */
+ rx_rsp = RING_GET_RESPONSE(&nd->rx, nd->rx.rsp_cons++);
+
+ unsigned number = rx_rsp->id;
+ assert(number < WINDOW);
+ unsigned long mfn = hyp_grant_finish_transfer(nd->rx_buf_gnt[number]);
+
+#ifdef MACH_PSEUDO_PHYS
+ mfn_list[nd->rx_buf_pfn[number]] = mfn;
+#endif /* MACH_PSEUDO_PHYS */
+ pmap_map_mfn(nd->rx_buf[number], mfn);
+
+ kmsg = net_kmsg_get();
+ if (!kmsg)
+ /* gasp! Drop */
+ goto drop;
+
+ if (rx_rsp->status <= 0)
+ switch (rx_rsp->status) {
+ case NETIF_RSP_DROPPED:
+ printf("Packet dropped\n");
+ goto drop;
+ case NETIF_RSP_ERROR:
+ panic("Packet error");
+ case 0:
+ printf("nul packet\n");
+ goto drop;
+ default:
+ printf("Unknown error %d\n", rx_rsp->status);
+ goto drop;
+ }
+
+ data = nd->rx_buf[number] + rx_rsp->offset;
+ len = rx_rsp->status;
+
+ eh = (void*) (net_kmsg(kmsg)->header);
+ ph = (void*) (net_kmsg(kmsg)->packet);
+ memcpy(eh, data, sizeof (struct ether_header));
+ memcpy(ph + 1, data + sizeof (struct ether_header), len - sizeof(struct ether_header));
+ RING_FINAL_CHECK_FOR_RESPONSES(&nd->rx, more);
+ enqueue_rx_buf(nd, number);
+ ph->type = eh->ether_type;
+ ph->length = len - sizeof(struct ether_header) + sizeof (struct packet_header);
+
+ net_kmsg(kmsg)->sent = FALSE; /* Mark packet as received. */
+
+ net_packet(&nd->ifnet, kmsg, ph->length, ethernet_priority(kmsg));
+ continue;
+
+drop:
+ RING_FINAL_CHECK_FOR_RESPONSES(&nd->rx, more);
+ enqueue_rx_buf(nd, number);
+ }
+
+ /* commit new requests */
+ int notify;
+ wmb(); /* make sure it sees requests */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&nd->rx, notify);
+ if (notify)
+ hyp_event_channel_send(nd->evt);
+
+ /* Now the tx side */
+ more = RING_HAS_UNCONSUMED_RESPONSES(&nd->tx);
+ spl_t s = splsched ();
+ while (more) {
+ rmb(); /* make sure we see responses */
+ tx_rsp = RING_GET_RESPONSE(&nd->tx, nd->tx.rsp_cons++);
+ switch (tx_rsp->status) {
+ case NETIF_RSP_DROPPED:
+ printf("Packet dropped\n");
+ break;
+ case NETIF_RSP_ERROR:
+ panic("Packet error");
+ case NETIF_RSP_OKAY:
+ break;
+ default:
+ printf("Unknown error %d\n", tx_rsp->status);
+ goto drop_tx;
+ }
+ thread_wakeup((event_t) hyp_grant_address(tx_rsp->id));
+drop_tx:
+ thread_wakeup_one(nd);
+ RING_FINAL_CHECK_FOR_RESPONSES(&nd->tx, more);
+ }
+ splx(s);
+
+ simple_unlock(&nd->lock);
+}
+
+#define VIF_PATH "device/vif"
+void hyp_net_init(void) {
+ char **vifs, **vif;
+ char *c;
+ int i;
+ int n;
+ int grant;
+ char port_name[10];
+ domid_t domid;
+ evtchn_port_t evt;
+ hyp_store_transaction_t t;
+ vm_offset_t addr;
+ struct net_data *nd;
+ struct ifnet *ifp;
+ netif_tx_sring_t *tx_ring;
+ netif_rx_sring_t *rx_ring;
+
+ vifs = hyp_store_ls(0, 1, VIF_PATH);
+ if (!vifs) {
+ printf("eth: No net device (%s). Hoping you don't need any\n", hyp_store_error);
+ n_vifs = 0;
+ return;
+ }
+
+ n = 0;
+ for (vif = vifs; *vif; vif++)
+ n++;
+
+ vif_data = (void*) kalloc(n * sizeof(*vif_data));
+ if (!vif_data) {
+ printf("eth: No memory room for VIF\n");
+ n_vifs = 0;
+ return;
+ }
+ n_vifs = n;
+
+ for (n = 0; n < n_vifs; n++) {
+ nd = &vif_data[n];
+ mach_atoi((u_char *) vifs[n], &nd->handle);
+ if (nd->handle == MACH_ATOI_DEFAULT)
+ continue;
+
+ nd->open_count = -2;
+ nd->vif = vifs[n];
+
+ /* Get domain id of frontend driver. */
+ i = hyp_store_read_int(0, 5, VIF_PATH, "/", vifs[n], "/", "backend-id");
+ if (i == -1)
+ panic("eth: couldn't read frontend domid of VIF %s (%s)",vifs[n], hyp_store_error);
+ nd->domid = domid = i;
+
+ do {
+ t = hyp_store_transaction_start();
+
+ /* Get a page for tx_ring */
+ if (kmem_alloc_wired(kernel_map, &addr, PAGE_SIZE) != KERN_SUCCESS)
+ panic("eth: couldn't allocate space for store tx_ring");
+ tx_ring = (void*) addr;
+ SHARED_RING_INIT(tx_ring);
+ FRONT_RING_INIT(&nd->tx, tx_ring, PAGE_SIZE);
+ grant = hyp_grant_give(domid, atop(kvtophys(addr)), 0);
+
+ /* and give it to backend. */
+ i = sprintf(port_name, "%u", grant);
+ c = hyp_store_write(t, port_name, 5, VIF_PATH, "/", vifs[n], "/", "tx-ring-ref");
+ if (!c)
+ panic("eth: couldn't store tx_ring reference for VIF %s (%s)", vifs[n], hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ /* Get a page for rx_ring */
+ if (kmem_alloc_wired(kernel_map, &addr, PAGE_SIZE) != KERN_SUCCESS)
+ panic("eth: couldn't allocate space for store tx_ring");
+ rx_ring = (void*) addr;
+ SHARED_RING_INIT(rx_ring);
+ FRONT_RING_INIT(&nd->rx, rx_ring, PAGE_SIZE);
+ grant = hyp_grant_give(domid, atop(kvtophys(addr)), 0);
+
+ /* and give it to backend. */
+ i = sprintf(port_name, "%u", grant);
+ c = hyp_store_write(t, port_name, 5, VIF_PATH, "/", vifs[n], "/", "rx-ring-ref");
+ if (!c)
+ panic("eth: couldn't store rx_ring reference for VIF %s (%s)", vifs[n], hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ /* tell we need csums. */
+ c = hyp_store_write(t, "1", 5, VIF_PATH, "/", vifs[n], "/", "feature-no-csum-offload");
+ if (!c)
+ panic("eth: couldn't store feature-no-csum-offload reference for VIF %s (%s)", vifs[n], hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ /* Allocate an event channel and give it to backend. */
+ nd->evt = evt = hyp_event_channel_alloc(domid);
+ i = sprintf(port_name, "%lu", evt);
+ c = hyp_store_write(t, port_name, 5, VIF_PATH, "/", vifs[n], "/", "event-channel");
+ if (!c)
+ panic("eth: couldn't store event channel for VIF %s (%s)", vifs[n], hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+ c = hyp_store_write(t, hyp_store_state_initialized, 5, VIF_PATH, "/", vifs[n], "/", "state");
+ if (!c)
+ panic("eth: couldn't store state for VIF %s (%s)", vifs[n], hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+ } while ((!hyp_store_transaction_stop(t)));
+ /* TODO randomly wait? */
+
+ c = hyp_store_read(0, 5, VIF_PATH, "/", vifs[n], "/", "backend");
+ if (!c)
+ panic("eth: couldn't get path to VIF %s backend (%s)", vifs[n], hyp_store_error);
+ nd->backend = c;
+
+ while(1) {
+ i = hyp_store_read_int(0, 3, nd->backend, "/", "state");
+ if (i == MACH_ATOI_DEFAULT)
+ panic("can't read state from %s", nd->backend);
+ if (i == XenbusStateInitWait)
+ break;
+ hyp_yield();
+ }
+
+ c = hyp_store_read(0, 3, nd->backend, "/", "mac");
+ if (!c)
+ panic("eth: couldn't get VIF %s's mac (%s)", vifs[n], hyp_store_error);
+
+ for (i=0; ; i++) {
+ int val;
+ hextoi(&c[3*i], &val);
+ if (val == -1)
+ panic("eth: couldn't understand %dth number of VIF %s's mac %s", i, vifs[n], c);
+ nd->address[i] = val;
+ if (i==ADDRESS_SIZE-1)
+ break;
+ if (c[3*i+2] != ':')
+ panic("eth: couldn't understand %dth separator of VIF %s's mac %s", i, vifs[n], c);
+ }
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ printf("eth%d: dom%d's VIF %s ", n, domid, vifs[n]);
+ for (i=0; ; i++) {
+ printf("%02x", nd->address[i]);
+ if (i==ADDRESS_SIZE-1)
+ break;
+ printf(":");
+ }
+ printf("\n");
+
+ c = hyp_store_write(0, hyp_store_state_connected, 5, VIF_PATH, "/", nd->vif, "/", "state");
+ if (!c)
+ panic("couldn't store state for eth%d (%s)", nd - vif_data, hyp_store_error);
+ kfree((vm_offset_t) c, strlen(c)+1);
+
+ /* Get a page for packet reception */
+ for (i= 0; i<WINDOW; i++) {
+ if (kmem_alloc_wired(kernel_map, &addr, PAGE_SIZE) != KERN_SUCCESS)
+ panic("eth: couldn't allocate space for store tx_ring");
+ nd->rx_buf[i] = (void*)phystokv(kvtophys(addr));
+ nd->rx_buf_pfn[i] = atop(kvtophys((vm_offset_t)nd->rx_buf[i]));
+ if (hyp_do_update_va_mapping(kvtolin(addr), 0, UVMF_INVLPG|UVMF_ALL))
+ panic("eth: couldn't clear rx kv buf %d at %p", i, addr);
+ /* and enqueue it to backend. */
+ enqueue_rx_buf(nd, i);
+ }
+ int notify;
+ wmb(); /* make sure it sees requests */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&nd->rx, notify);
+ if (notify)
+ hyp_event_channel_send(nd->evt);
+
+
+ nd->open_count = -1;
+ nd->device.emul_ops = &hyp_net_emulation_ops;
+ nd->device.emul_data = nd;
+ simple_lock_init(&nd->lock);
+ simple_lock_init(&nd->pushlock);
+
+ ifp = &nd->ifnet;
+ ifp->if_unit = n;
+ ifp->if_flags = IFF_UP | IFF_RUNNING;
+ ifp->if_header_size = 14;
+ ifp->if_header_format = HDR_ETHERNET;
+ /* Set to the maximum that we can handle in device_write. */
+ ifp->if_mtu = PAGE_SIZE - ifp->if_header_size;
+ ifp->if_address_size = ADDRESS_SIZE;
+ ifp->if_address = (void*) nd->address;
+ if_init_queues (ifp);
+
+ /* Now we can start receiving */
+ hyp_evt_handler(evt, hyp_net_intr, n, SPL6);
+ }
+}
+
+static ipc_port_t
+dev_to_port(void *d)
+{
+ struct net_data *b = d;
+ if (!d)
+ return IP_NULL;
+ return ipc_port_make_send(b->port);
+}
+
+static int
+device_close(void *devp)
+{
+ struct net_data *nd = devp;
+ if (--nd->open_count < 0)
+ panic("too many closes on eth%d", nd - vif_data);
+ printf("close, eth%d count %d\n",nd-vif_data,nd->open_count);
+ if (nd->open_count)
+ return 0;
+ ipc_kobject_set(nd->port, IKO_NULL, IKOT_NONE);
+ ipc_port_dealloc_kernel(nd->port);
+ return 0;
+}
+
+static io_return_t
+device_open (ipc_port_t reply_port, mach_msg_type_name_t reply_port_type,
+ dev_mode_t mode, char *name, device_t *devp /* out */)
+{
+ int i, n, err = 0;
+ ipc_port_t port, notify;
+ struct net_data *nd;
+
+ if (name[0] != 'e' || name[1] != 't' || name[2] != 'h' || name[3] < '0' || name[3] > '9')
+ return D_NO_SUCH_DEVICE;
+ i = mach_atoi((u_char *) &name[3], &n);
+ if (n == MACH_ATOI_DEFAULT)
+ return D_NO_SUCH_DEVICE;
+ if (name[3 + i])
+ return D_NO_SUCH_DEVICE;
+ if (n >= n_vifs)
+ return D_NO_SUCH_DEVICE;
+ nd = &vif_data[n];
+ if (nd->open_count == -2)
+ /* couldn't be initialized */
+ return D_NO_SUCH_DEVICE;
+
+ if (nd->open_count >= 0) {
+ *devp = &nd->device ;
+ nd->open_count++ ;
+ printf("re-open, eth%d count %d\n",nd-vif_data,nd->open_count);
+ return D_SUCCESS;
+ }
+
+ nd->open_count = 1;
+ printf("eth%d count %d\n",nd-vif_data,nd->open_count);
+
+ port = ipc_port_alloc_kernel();
+ if (port == IP_NULL) {
+ err = KERN_RESOURCE_SHORTAGE;
+ goto out;
+ }
+ nd->port = port;
+
+ *devp = &nd->device;
+
+ ipc_kobject_set (port, (ipc_kobject_t) &nd->device, IKOT_DEVICE);
+
+ notify = ipc_port_make_sonce (nd->port);
+ ip_lock (nd->port);
+ ipc_port_nsrequest (nd->port, 1, notify, &notify);
+ assert (notify == IP_NULL);
+
+out:
+ if (IP_VALID (reply_port))
+ ds_device_open_reply (reply_port, reply_port_type, D_SUCCESS, dev_to_port(nd));
+ else
+ device_close(nd);
+ return MIG_NO_REPLY;
+}
+
+static io_return_t
+device_write(void *d, ipc_port_t reply_port,
+ mach_msg_type_name_t reply_port_type, dev_mode_t mode,
+ recnum_t bn, io_buf_ptr_t data, unsigned int count,
+ int *bytes_written)
+{
+ vm_map_copy_t copy = (vm_map_copy_t) data;
+ grant_ref_t gref;
+ struct net_data *nd = d;
+ struct ifnet *ifp = &nd->ifnet;
+ netif_tx_request_t *req;
+ unsigned reqn;
+ vm_offset_t offset;
+ vm_page_t m;
+ vm_size_t size;
+
+ /* The maximum that we can handle. */
+ assert(ifp->if_header_size + ifp->if_mtu <= PAGE_SIZE);
+
+ if (count < ifp->if_header_size ||
+ count > ifp->if_header_size + ifp->if_mtu)
+ return D_INVALID_SIZE;
+
+ assert(copy->type == VM_MAP_COPY_PAGE_LIST);
+
+ assert(copy->cpy_npages <= 2);
+ assert(copy->cpy_npages >= 1);
+
+ offset = copy->offset & PAGE_MASK;
+ if (paranoia || copy->cpy_npages == 2) {
+ /* have to copy :/ */
+ while ((m = vm_page_grab(FALSE)) == 0)
+ VM_PAGE_WAIT (0);
+ assert (! m->active && ! m->inactive);
+ m->busy = TRUE;
+
+ if (copy->cpy_npages == 1)
+ size = count;
+ else
+ size = PAGE_SIZE - offset;
+
+ memcpy((void*)phystokv(m->phys_addr), (void*)phystokv(copy->cpy_page_list[0]->phys_addr + offset), size);
+ if (copy->cpy_npages == 2)
+ memcpy((void*)phystokv(m->phys_addr + size), (void*)phystokv(copy->cpy_page_list[1]->phys_addr), count - size);
+
+ offset = 0;
+ } else
+ m = copy->cpy_page_list[0];
+
+ /* allocate a request */
+ spl_t spl = splimp();
+ while (1) {
+ simple_lock(&nd->lock);
+ if (!RING_FULL(&nd->tx))
+ break;
+ thread_sleep(nd, &nd->lock, FALSE);
+ }
+ mb();
+ reqn = nd->tx.req_prod_pvt++;;
+ simple_lock(&nd->pushlock);
+ simple_unlock(&nd->lock);
+ (void) splx(spl);
+
+ req = RING_GET_REQUEST(&nd->tx, reqn);
+ req->gref = gref = hyp_grant_give(nd->domid, atop(m->phys_addr), 1);
+ req->offset = offset;
+ req->flags = 0;
+ req->id = gref;
+ req->size = count;
+
+ assert_wait(hyp_grant_address(gref), FALSE);
+
+ int notify;
+ wmb(); /* make sure it sees requests */
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&nd->tx, notify);
+ if (notify)
+ hyp_event_channel_send(nd->evt);
+ simple_unlock(&nd->pushlock);
+
+ thread_block(NULL);
+
+ hyp_grant_takeback(gref);
+
+ /* Send packet to filters. */
+ {
+ struct packet_header *packet;
+ struct ether_header *header;
+ ipc_kmsg_t kmsg;
+
+ kmsg = net_kmsg_get ();
+
+ if (kmsg != IKM_NULL)
+ {
+ /* Suitable for Ethernet only. */
+ header = (struct ether_header *) (net_kmsg (kmsg)->header);
+ packet = (struct packet_header *) (net_kmsg (kmsg)->packet);
+ memcpy (header, (void*)phystokv(m->phys_addr + offset), sizeof (struct ether_header));
+
+ /* packet is prefixed with a struct packet_header,
+ see include/device/net_status.h. */
+ memcpy (packet + 1, (void*)phystokv(m->phys_addr + offset + sizeof (struct ether_header)),
+ count - sizeof (struct ether_header));
+ packet->length = count - sizeof (struct ether_header)
+ + sizeof (struct packet_header);
+ packet->type = header->ether_type;
+ net_kmsg (kmsg)->sent = TRUE; /* Mark packet as sent. */
+ spl_t s = splimp ();
+ net_packet (&nd->ifnet, kmsg, packet->length,
+ ethernet_priority (kmsg));
+ splx (s);
+ }
+ }
+
+ if (paranoia || copy->cpy_npages == 2)
+ VM_PAGE_FREE(m);
+
+ vm_map_copy_discard (copy);
+
+ *bytes_written = count;
+
+ if (IP_VALID(reply_port))
+ ds_device_write_reply (reply_port, reply_port_type, 0, count);
+
+ return MIG_NO_REPLY;
+}
+
+static io_return_t
+device_get_status(void *d, dev_flavor_t flavor, dev_status_t status,
+ mach_msg_type_number_t *status_count)
+{
+ struct net_data *nd = d;
+
+ return net_getstat (&nd->ifnet, flavor, status, status_count);
+}
+
+static io_return_t
+device_set_status(void *d, dev_flavor_t flavor, dev_status_t status,
+ mach_msg_type_number_t count)
+{
+ struct net_data *nd = d;
+
+ switch (flavor)
+ {
+ default:
+ printf("TODO: net_%s(%p, 0x%x)\n", __func__, nd, flavor);
+ return D_INVALID_OPERATION;
+ }
+ return D_SUCCESS;
+}
+
+static io_return_t
+device_set_filter(void *d, ipc_port_t port, int priority,
+ filter_t * filter, unsigned filter_count)
+{
+ struct net_data *nd = d;
+
+ if (!nd)
+ return D_NO_SUCH_DEVICE;
+
+ return net_set_filter (&nd->ifnet, port, priority, filter, filter_count);
+}
+
+struct device_emulation_ops hyp_net_emulation_ops = {
+ NULL, /* dereference */
+ NULL, /* deallocate */
+ dev_to_port,
+ device_open,
+ device_close,
+ device_write,
+ NULL, /* write_inband */
+ NULL,
+ NULL, /* read_inband */
+ device_set_status, /* set_status */
+ device_get_status,
+ device_set_filter, /* set_filter */
+ NULL, /* map */
+ NULL, /* no_senders */
+ NULL, /* write_trap */
+ NULL, /* writev_trap */
+};
diff --git a/xen/net.h b/xen/net.h
new file mode 100644
index 00000000..66838700
--- /dev/null
+++ b/xen/net.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_NET_H
+#define XEN_NET_H
+
+void hyp_net_init(void);
+
+#endif /* XEN_NET_H */
diff --git a/xen/public/COPYING b/xen/public/COPYING
new file mode 100644
index 00000000..ffc6d616
--- /dev/null
+++ b/xen/public/COPYING
@@ -0,0 +1,38 @@
+XEN NOTICE
+==========
+
+This copyright applies to all files within this subdirectory and its
+subdirectories:
+ include/public/*.h
+ include/public/hvm/*.h
+ include/public/io/*.h
+
+The intention is that these files can be freely copied into the source
+tree of an operating system when porting that OS to run on Xen. Doing
+so does *not* cause the OS to become subject to the terms of the GPL.
+
+All other files in the Xen source distribution are covered by version
+2 of the GNU General Public License except where explicitly stated
+otherwise within individual source files.
+
+ -- Keir Fraser (on behalf of the Xen team)
+
+=====================================================================
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/xen/public/arch-x86/xen-mca.h b/xen/public/arch-x86/xen-mca.h
new file mode 100644
index 00000000..103d41fd
--- /dev/null
+++ b/xen/public/arch-x86/xen-mca.h
@@ -0,0 +1,279 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ * (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ * (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ * This is better done as separate task, physical CPU hotplugging,
+ * and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ * move a DomU (or Dom0 itself) away from a malicious page
+ * producing correctable errors.
+ * 9. offlining physical page:
+ * Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ * and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+#define XEN_MCA_INTERFACE_VERSION 0x03000001
+
+/* IN: Dom0 calls hypercall from MC event handler. */
+#define XEN_MC_CORRECTABLE 0x0
+/* IN: Dom0/DomU calls hypercall from MC trap handler. */
+#define XEN_MC_TRAP 0x1
+/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */
+
+/* OUT: All is ok */
+#define XEN_MC_OK 0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED 0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA 0x2
+/* OUT: Between notification time and this hypercall an other
+ * (most likely) correctable error happened. The fetched data,
+ * does not match the original machine check data. */
+#define XEN_MC_NOMATCH 0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
+ */
+#define MC_TYPE_GLOBAL 0
+#define MC_TYPE_BANK 1
+#define MC_TYPE_EXTENDED 2
+
+struct mcinfo_common {
+ uint16_t type; /* structure type */
+ uint16_t size; /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE (1 << 0)
+#define MC_FLAG_UNCORRECTABLE (1 << 1)
+
+/* contains global x86 mc information */
+struct mcinfo_global {
+ struct mcinfo_common common;
+
+ /* running domain at the time in error (most likely the impacted one) */
+ uint16_t mc_domid;
+ uint32_t mc_socketid; /* physical socket of the physical core */
+ uint16_t mc_coreid; /* physical impacted core */
+ uint16_t mc_core_threadid; /* core thread of physical core */
+ uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+ uint64_t mc_gstatus; /* global status */
+ uint32_t mc_flags;
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+ struct mcinfo_common common;
+
+ uint16_t mc_bank; /* bank nr */
+ uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+ * and if mc_addr is valid. Never valid on DomU. */
+ uint64_t mc_status; /* bank status */
+ uint64_t mc_addr; /* bank address, only valid
+ * if addr bit is set in mc_status */
+ uint64_t mc_misc;
+};
+
+
+struct mcinfo_msr {
+ uint64_t reg; /* MSR */
+ uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+ struct mcinfo_common common;
+
+ /* You can fill up to five registers.
+ * If you need more, then use this structure
+ * multiple times. */
+
+ uint32_t mc_msrs; /* Number of msr with valid values. */
+ struct mcinfo_msr mc_msr[5];
+};
+
+#define MCINFO_HYPERCALLSIZE 1024
+#define MCINFO_MAXSIZE 768
+
+struct mc_info {
+ /* Number of mcinfo_* entries in mi_data */
+ uint32_t mi_nentries;
+
+ uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)];
+};
+typedef struct mc_info mc_info_t;
+
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ * uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi) \
+ (_mi)->mi_nentries
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi) \
+ (struct mcinfo_common *)((_mi)->mi_data)
+/* Prototype:
+ * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic) \
+ (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)
+
+/* Prototype:
+ * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+#define x86_mcinfo_lookup(_ret, _mi, _type) \
+ do { \
+ uint32_t found, i; \
+ struct mcinfo_common *_mic; \
+ \
+ found = 0; \
+ (_ret) = NULL; \
+ if (_mi == NULL) break; \
+ _mic = x86_mcinfo_first(_mi); \
+ for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \
+ if (_mic->type == (_type)) { \
+ found = 1; \
+ break; \
+ } \
+ _mic = x86_mcinfo_next(_mic); \
+ } \
+ (_ret) = found ? _mic : NULL; \
+ } while (0)
+
+
+/* Usecase 1
+ * Register machine check trap callback handler
+ * (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch 1
+struct xen_mc_fetch {
+ /* IN/OUT variables. */
+ uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */
+
+ /* OUT variables. */
+ uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */
+ struct mc_info mc_info;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain 2
+struct xen_mc_notifydomain {
+ /* IN variables. */
+ uint16_t mc_domid; /* The unprivileged domain to notify. */
+ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify.
+ * Usually echo'd value from the fetch hypercall. */
+ uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */
+
+ /* IN/OUT variables. */
+ uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+
+
+struct xen_mc {
+ uint32_t cmd;
+ uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+ union {
+ struct xen_mc_fetch mc_fetch;
+ struct xen_mc_notifydomain mc_notifydomain;
+ uint8_t pad[MCINFO_HYPERCALLSIZE];
+ } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
diff --git a/xen/public/arch-x86/xen-x86_32.h b/xen/public/arch-x86/xen-x86_32.h
new file mode 100644
index 00000000..7cb6a017
--- /dev/null
+++ b/xen/public/arch-x86/xen-x86_32.h
@@ -0,0 +1,180 @@
+/******************************************************************************
+ * xen-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2007, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+
+/*
+ * Hypercall interface:
+ * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5)
+ * Output: %eax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ * call hypercall_page + hypercall-number * 32
+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)
+ */
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+/*
+ * Legacy hypercall interface:
+ * As above, except the entry sequence to the hypervisor is:
+ * mov $hypercall-number*32,%eax ; int $0x82
+ */
+#define TRAP_INSTR "int $0x82"
+#endif
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021 /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033 /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033 /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS FLAT_RING3_CS
+#define FLAT_USER_DS FLAT_RING3_DS
+#define FLAT_USER_SS FLAT_RING3_SS
+
+#define __HYPERVISOR_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_START_PAE 0xF5800000
+#define __MACH2PHYS_VIRT_END_PAE 0xF6800000
+#define HYPERVISOR_VIRT_START_PAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_START_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_END_PAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)
+
+/* Non-PAE bounds are obsolete. */
+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000
+#define HYPERVISOR_VIRT_START_NONPAE \
+ mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_START_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_END_NONPAE \
+ mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)
+
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
+#endif
+
+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#undef ___DEFINE_XEN_GUEST_HANDLE
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } \
+ __guest_handle_ ## name; \
+ typedef struct { union { type *p; uint64_aligned_t q; }; } \
+ __guest_handle_64_ ## name
+#undef set_xen_guest_handle
+#define set_xen_guest_handle(hnd, val) \
+ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \
+ (hnd).p = val; \
+ } while ( 0 )
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
+#endif
+
+#ifndef __ASSEMBLY__
+
+struct cpu_user_regs {
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+ uint32_t esi;
+ uint32_t edi;
+ uint32_t ebp;
+ uint32_t eax;
+ uint16_t error_code; /* private */
+ uint16_t entry_vector; /* private */
+ uint32_t eip;
+ uint16_t cs;
+ uint8_t saved_upcall_mask;
+ uint8_t _pad0;
+ uint32_t eflags; /* eflags.IF == !saved_upcall_mask */
+ uint32_t esp;
+ uint16_t ss, _pad1;
+ uint16_t es, _pad2;
+ uint16_t ds, _pad3;
+ uint16_t fs, _pad4;
+ uint16_t gs, _pad5;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
+ unsigned long cs;
+ unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/arch-x86/xen-x86_64.h b/xen/public/arch-x86/xen-x86_64.h
new file mode 100644
index 00000000..1e54cf92
--- /dev/null
+++ b/xen/public/arch-x86/xen-x86_64.h
@@ -0,0 +1,212 @@
+/******************************************************************************
+ * xen-x86_64.h
+ *
+ * Guest OS interface to x86 64-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+
+/*
+ * Hypercall interface:
+ * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5)
+ * Output: %rax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ * call hypercall_page + hypercall-number * 32
+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)
+ */
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+/*
+ * Legacy hypercall interface:
+ * As above, except the entry sequence to the hypervisor is:
+ * mov $hypercall-number*32,%eax ; syscall
+ * Clobbered: %rcx, %r11, argument registers (as above)
+ */
+#define TRAP_INSTR "syscall"
+#endif
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000 /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END 0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START 0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END 0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ * @which == SEGBASE_* ; @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS 0
+#define SEGBASE_GS_USER 1
+#define SEGBASE_GS_KERNEL 2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ * RING0 -> RING3 kernel mode.
+ * RING1 -> RING3 kernel mode.
+ * RING2 -> RING3 kernel mode.
+ * RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ * orb $3,1*8(%rsp)
+ * iretq
+ * If flags contains VGCF_in_syscall:
+ * Restore RAX, RIP, RFLAGS, RSP.
+ * Discard R11, RCX, CS, SS.
+ * Otherwise:
+ * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+ /* Top of stack (%rsp at point of hypercall). */
+ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+ /* Bottom of iret stack frame. */
+};
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+ uint64_t r ## name, e ## name; \
+ uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+struct cpu_user_regs {
+ uint64_t r15;
+ uint64_t r14;
+ uint64_t r13;
+ uint64_t r12;
+ __DECL_REG(bp);
+ __DECL_REG(bx);
+ uint64_t r11;
+ uint64_t r10;
+ uint64_t r9;
+ uint64_t r8;
+ __DECL_REG(ax);
+ __DECL_REG(cx);
+ __DECL_REG(dx);
+ __DECL_REG(si);
+ __DECL_REG(di);
+ uint32_t error_code; /* private */
+ uint32_t entry_vector; /* private */
+ __DECL_REG(ip);
+ uint16_t cs, _pad0[1];
+ uint8_t saved_upcall_mask;
+ uint8_t _pad1[3];
+ __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */
+ __DECL_REG(sp);
+ uint16_t ss, _pad2[3];
+ uint16_t es, _pad3[3];
+ uint16_t ds, _pad4[3];
+ uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */
+ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+#undef __DECL_REG
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+typedef unsigned long xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/arch-x86/xen.h b/xen/public/arch-x86/xen.h
new file mode 100644
index 00000000..084348fb
--- /dev/null
+++ b/xen/public/arch-x86/xen.h
@@ -0,0 +1,204 @@
+/******************************************************************************
+ * arch-x86/xen.h
+ *
+ * Guest OS interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "../xen.h"
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_H__
+
+/* Structural guest handles introduced in 0x00030201. */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef struct { type *p; } __guest_handle_ ## name
+#else
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+ typedef type * __guest_handle_ ## name
+#endif
+
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+ ___DEFINE_XEN_GUEST_HANDLE(name, type); \
+ ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name
+#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name)
+#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
+#endif
+
+#if defined(__i386__)
+#include "xen-x86_32.h"
+#elif defined(__x86_64__)
+#include "xen-x86_64.h"
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
+#endif
+
+/*
+ * SEGMENT DESCRIPTOR TABLES
+ */
+/*
+ * A number of GDT entries are reserved by Xen. These are not situated at the
+ * start of the GDT because some stupid OSes export hard-coded selector values
+ * in their ABI. These hard-coded values are always near the start of the GDT,
+ * so Xen places itself out of the way, at the far end of the GDT.
+ */
+#define FIRST_RESERVED_GDT_PAGE 14
+#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 32
+
+
+/* Machine check support */
+#include "xen-mca.h"
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long xen_ulong_t;
+
+/*
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ * Level == 0: Noone may enter
+ * Level == 1: Kernel may enter
+ * Level == 2: Kernel may enter
+ * Level == 3: Everyone may enter
+ */
+#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
+#define TI_GET_IF(_ti) ((_ti)->flags & 4)
+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
+#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2))
+struct trap_info {
+ uint8_t vector; /* exception vector */
+ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
+ uint16_t cs; /* code selector */
+ unsigned long address; /* code offset */
+};
+typedef struct trap_info trap_info_t;
+DEFINE_XEN_GUEST_HANDLE(trap_info_t);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * The following is all CPU context. Note that the fpu_ctxt block is filled
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ */
+struct vcpu_guest_context {
+ /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
+ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
+#define VGCF_I387_VALID (1<<0)
+#define VGCF_IN_KERNEL (1<<2)
+#define _VGCF_i387_valid 0
+#define VGCF_i387_valid (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel 2
+#define VGCF_in_kernel (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events 4
+#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online 5
+#define VGCF_online (1<<_VGCF_online)
+ unsigned long flags; /* VGCF_* flags */
+ struct cpu_user_regs user_regs; /* User-level CPU registers */
+ struct trap_info trap_ctxt[256]; /* Virtual IDT */
+ unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
+ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
+ /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
+ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
+ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
+#ifdef __i386__
+ unsigned long event_callback_cs; /* CS:EIP of event callback */
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
+ unsigned long failsafe_callback_eip;
+#else
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_eip;
+#ifdef __XEN__
+ union {
+ unsigned long syscall_callback_eip;
+ struct {
+ unsigned int event_callback_cs; /* compat CS of event cb */
+ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */
+ };
+ };
+#else
+ unsigned long syscall_callback_eip;
+#endif
+#endif
+ unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+ /* Segment base addresses. */
+ uint64_t fs_base;
+ uint64_t gs_base_kernel;
+ uint64_t gs_base_user;
+#endif
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_shared_info {
+ unsigned long max_pfn; /* max pfn that appears in table */
+ /* Frame containing list of mfns containing list of mfns containing p2m. */
+ xen_pfn_t pfn_to_mfn_frame_list_list;
+ unsigned long nmi_reason;
+ uint64_t pad[32];
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Prefix forces emulation of some non-trapping instructions.
+ * Currently only CPUID.
+ */
+#ifdef __ASSEMBLY__
+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
+#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
+#else
+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
+#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
+#endif
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/arch-x86_32.h b/xen/public/arch-x86_32.h
new file mode 100644
index 00000000..45842b20
--- /dev/null
+++ b/xen/public/arch-x86_32.h
@@ -0,0 +1,27 @@
+/******************************************************************************
+ * arch-x86_32.h
+ *
+ * Guest OS interface to x86 32-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "arch-x86/xen.h"
diff --git a/xen/public/arch-x86_64.h b/xen/public/arch-x86_64.h
new file mode 100644
index 00000000..fbb26399
--- /dev/null
+++ b/xen/public/arch-x86_64.h
@@ -0,0 +1,27 @@
+/******************************************************************************
+ * arch-x86_64.h
+ *
+ * Guest OS interface to x86 64-bit Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "arch-x86/xen.h"
diff --git a/xen/public/callback.h b/xen/public/callback.h
new file mode 100644
index 00000000..f4962f66
--- /dev/null
+++ b/xen/public/callback.h
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ * long callback_op(int cmd, void *extra_args)
+ * @cmd == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* ia64, x86: Callback for event delivery. */
+#define CALLBACKTYPE_event 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe 1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall 2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ * feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated 3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi 4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ * [nb. also 64-bit guest applications on Intel CPUs
+ * ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter 5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32 7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events 0
+#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register 0
+struct callback_register {
+ uint16_t type;
+ uint16_t flags;
+ xen_callback_t address;
+};
+typedef struct callback_register callback_register_t;
+DEFINE_XEN_GUEST_HANDLE(callback_register_t);
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister 1
+struct callback_unregister {
+ uint16_t type;
+ uint16_t _unused;
+};
+typedef struct callback_unregister callback_unregister_t;
+DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030207
+#undef CALLBACKTYPE_sysenter
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#endif
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/dom0_ops.h b/xen/public/dom0_ops.h
new file mode 100644
index 00000000..5d2b3245
--- /dev/null
+++ b/xen/public/dom0_ops.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * dom0_ops.h
+ *
+ * Process command requests from domain-0 guest OS.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOM0_OPS_H__
+#define __XEN_PUBLIC_DOM0_OPS_H__
+
+#include "xen.h"
+#include "platform.h"
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030204
+#error "dom0_ops.h is a compatibility interface only"
+#endif
+
+#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION
+
+#define DOM0_SETTIME XENPF_settime
+#define dom0_settime xenpf_settime
+#define dom0_settime_t xenpf_settime_t
+
+#define DOM0_ADD_MEMTYPE XENPF_add_memtype
+#define dom0_add_memtype xenpf_add_memtype
+#define dom0_add_memtype_t xenpf_add_memtype_t
+
+#define DOM0_DEL_MEMTYPE XENPF_del_memtype
+#define dom0_del_memtype xenpf_del_memtype
+#define dom0_del_memtype_t xenpf_del_memtype_t
+
+#define DOM0_READ_MEMTYPE XENPF_read_memtype
+#define dom0_read_memtype xenpf_read_memtype
+#define dom0_read_memtype_t xenpf_read_memtype_t
+
+#define DOM0_MICROCODE XENPF_microcode_update
+#define dom0_microcode xenpf_microcode_update
+#define dom0_microcode_t xenpf_microcode_update_t
+
+#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk
+#define dom0_platform_quirk xenpf_platform_quirk
+#define dom0_platform_quirk_t xenpf_platform_quirk_t
+
+typedef uint64_t cpumap_t;
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_MSR 15
+struct dom0_msr {
+ /* IN variables. */
+ uint32_t write;
+ cpumap_t cpu_mask;
+ uint32_t msr;
+ uint32_t in1;
+ uint32_t in2;
+ /* OUT variables. */
+ uint32_t out1;
+ uint32_t out2;
+};
+typedef struct dom0_msr dom0_msr_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_PHYSICAL_MEMORY_MAP 40
+struct dom0_memory_map_entry {
+ uint64_t start, end;
+ uint32_t flags; /* reserved */
+ uint8_t is_ram;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
+
+struct dom0_op {
+ uint32_t cmd;
+ uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
+ union {
+ struct dom0_msr msr;
+ struct dom0_settime settime;
+ struct dom0_add_memtype add_memtype;
+ struct dom0_del_memtype del_memtype;
+ struct dom0_read_memtype read_memtype;
+ struct dom0_microcode microcode;
+ struct dom0_platform_quirk platform_quirk;
+ struct dom0_memory_map_entry physical_memory_map;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct dom0_op dom0_op_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
+
+#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/domctl.h b/xen/public/domctl.h
new file mode 100644
index 00000000..b7075ac4
--- /dev/null
+++ b/xen/public/domctl.h
@@ -0,0 +1,680 @@
+/******************************************************************************
+ * domctl.h
+ *
+ * Domain management operations. For use by node control stack.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOMCTL_H__
+#define __XEN_PUBLIC_DOMCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "domctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005
+
+struct xenctl_cpumap {
+ XEN_GUEST_HANDLE_64(uint8) bitmap;
+ uint32_t nr_cpus;
+};
+
+/*
+ * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
+ * If it is specified as zero, an id is auto-allocated and returned.
+ */
+#define XEN_DOMCTL_createdomain 1
+struct xen_domctl_createdomain {
+ /* IN parameters */
+ uint32_t ssidref;
+ xen_domain_handle_t handle;
+ /* Is this an HVM guest (as opposed to a PV guest)? */
+#define _XEN_DOMCTL_CDF_hvm_guest 0
+#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest)
+ /* Use hardware-assisted paging if available? */
+#define _XEN_DOMCTL_CDF_hap 1
+#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap)
+ uint32_t flags;
+};
+typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
+
+#define XEN_DOMCTL_destroydomain 2
+#define XEN_DOMCTL_pausedomain 3
+#define XEN_DOMCTL_unpausedomain 4
+#define XEN_DOMCTL_resumedomain 27
+
+#define XEN_DOMCTL_getdomaininfo 5
+struct xen_domctl_getdomaininfo {
+ /* OUT variables. */
+ domid_t domain; /* Also echoed in domctl.domain */
+ /* Domain is scheduled to die. */
+#define _XEN_DOMINF_dying 0
+#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying)
+ /* Domain is an HVM guest (as opposed to a PV guest). */
+#define _XEN_DOMINF_hvm_guest 1
+#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest)
+ /* The guest OS has shut down. */
+#define _XEN_DOMINF_shutdown 2
+#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown)
+ /* Currently paused by control software. */
+#define _XEN_DOMINF_paused 3
+#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused)
+ /* Currently blocked pending an event. */
+#define _XEN_DOMINF_blocked 4
+#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked)
+ /* Domain is currently running. */
+#define _XEN_DOMINF_running 5
+#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running)
+ /* Being debugged. */
+#define _XEN_DOMINF_debugged 6
+#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged)
+ /* CPU to which this domain is bound. */
+#define XEN_DOMINF_cpumask 255
+#define XEN_DOMINF_cpushift 8
+ /* XEN_DOMINF_shutdown guest-supplied code. */
+#define XEN_DOMINF_shutdownmask 255
+#define XEN_DOMINF_shutdownshift 16
+ uint32_t flags; /* XEN_DOMINF_* */
+ uint64_aligned_t tot_pages;
+ uint64_aligned_t max_pages;
+ uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
+ uint64_aligned_t cpu_time;
+ uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */
+ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
+ uint32_t ssidref;
+ xen_domain_handle_t handle;
+};
+typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
+
+
+#define XEN_DOMCTL_getmemlist 6
+struct xen_domctl_getmemlist {
+ /* IN variables. */
+ /* Max entries to write to output buffer. */
+ uint64_aligned_t max_pfns;
+ /* Start index in guest's page list. */
+ uint64_aligned_t start_pfn;
+ XEN_GUEST_HANDLE_64(uint64) buffer;
+ /* OUT variables. */
+ uint64_aligned_t num_pfns;
+};
+typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
+
+
+#define XEN_DOMCTL_getpageframeinfo 7
+
+#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28
+#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28)
+#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28)
+#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28)
+#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28)
+#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28)
+#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28)
+#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
+#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */
+#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
+
+struct xen_domctl_getpageframeinfo {
+ /* IN variables. */
+ uint64_aligned_t gmfn; /* GMFN to query */
+ /* OUT variables. */
+ /* Is the page PINNED to a type? */
+ uint32_t type; /* see above type defs */
+};
+typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
+
+
+#define XEN_DOMCTL_getpageframeinfo2 8
+struct xen_domctl_getpageframeinfo2 {
+ /* IN variables. */
+ uint64_aligned_t num;
+ /* IN/OUT variables. */
+ XEN_GUEST_HANDLE_64(uint32) array;
+};
+typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
+
+
+/*
+ * Control shadow pagetables operation
+ */
+#define XEN_DOMCTL_shadow_op 10
+
+/* Disable shadow mode. */
+#define XEN_DOMCTL_SHADOW_OP_OFF 0
+
+/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE 32
+
+/* Log-dirty bitmap operations. */
+ /* Return the bitmap and clean internal copy for next round. */
+#define XEN_DOMCTL_SHADOW_OP_CLEAN 11
+ /* Return the bitmap but do not modify internal copy. */
+#define XEN_DOMCTL_SHADOW_OP_PEEK 12
+
+/* Memory allocation accessors. */
+#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30
+#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31
+
+/* Legacy enable operations. */
+ /* Equiv. to ENABLE with no mode flags. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1
+ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2
+ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3
+
+/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */
+ /*
+ * Shadow pagetables are refcounted: guest does not use explicit mmu
+ * operations nor write-protect its pagetables.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1)
+ /*
+ * Log pages in a bitmap as they are dirtied.
+ * Used for live relocation to determine which pages must be re-sent.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2)
+ /*
+ * Automatically translate GPFNs into MFNs.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3)
+ /*
+ * Xen does not steal virtual address space from the guest.
+ * Requires HVM support.
+ */
+#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4)
+
+struct xen_domctl_shadow_op_stats {
+ uint32_t fault_count;
+ uint32_t dirty_count;
+};
+typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t);
+
+struct xen_domctl_shadow_op {
+ /* IN variables. */
+ uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */
+
+ /* OP_ENABLE */
+ uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */
+
+ /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */
+ uint32_t mb; /* Shadow memory allocation in MB */
+
+ /* OP_PEEK / OP_CLEAN */
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+ uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */
+ struct xen_domctl_shadow_op_stats stats;
+};
+typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);
+
+
+#define XEN_DOMCTL_max_mem 11
+struct xen_domctl_max_mem {
+ /* IN variables. */
+ uint64_aligned_t max_memkb;
+};
+typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
+
+
+#define XEN_DOMCTL_setvcpucontext 12
+#define XEN_DOMCTL_getvcpucontext 13
+struct xen_domctl_vcpucontext {
+ uint32_t vcpu; /* IN */
+ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
+
+
+#define XEN_DOMCTL_getvcpuinfo 14
+struct xen_domctl_getvcpuinfo {
+ /* IN variables. */
+ uint32_t vcpu;
+ /* OUT variables. */
+ uint8_t online; /* currently online (not hotplugged)? */
+ uint8_t blocked; /* blocked waiting for an event? */
+ uint8_t running; /* currently scheduled on its CPU? */
+ uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */
+ uint32_t cpu; /* current mapping */
+};
+typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
+
+
+/* Get/set which physical cpus a vcpu can execute on. */
+#define XEN_DOMCTL_setvcpuaffinity 9
+#define XEN_DOMCTL_getvcpuaffinity 25
+struct xen_domctl_vcpuaffinity {
+ uint32_t vcpu; /* IN */
+ struct xenctl_cpumap cpumap; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t);
+
+
+#define XEN_DOMCTL_max_vcpus 15
+struct xen_domctl_max_vcpus {
+ uint32_t max; /* maximum number of vcpus */
+};
+typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
+
+
+#define XEN_DOMCTL_scheduler_op 16
+/* Scheduler types. */
+#define XEN_SCHEDULER_SEDF 4
+#define XEN_SCHEDULER_CREDIT 5
+/* Set or get info? */
+#define XEN_DOMCTL_SCHEDOP_putinfo 0
+#define XEN_DOMCTL_SCHEDOP_getinfo 1
+struct xen_domctl_scheduler_op {
+ uint32_t sched_id; /* XEN_SCHEDULER_* */
+ uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */
+ union {
+ struct xen_domctl_sched_sedf {
+ uint64_aligned_t period;
+ uint64_aligned_t slice;
+ uint64_aligned_t latency;
+ uint32_t extratime;
+ uint32_t weight;
+ } sedf;
+ struct xen_domctl_sched_credit {
+ uint16_t weight;
+ uint16_t cap;
+ } credit;
+ } u;
+};
+typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t);
+
+
+#define XEN_DOMCTL_setdomainhandle 17
+struct xen_domctl_setdomainhandle {
+ xen_domain_handle_t handle;
+};
+typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t);
+
+
+#define XEN_DOMCTL_setdebugging 18
+struct xen_domctl_setdebugging {
+ uint8_t enable;
+};
+typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t);
+
+
+#define XEN_DOMCTL_irq_permission 19
+struct xen_domctl_irq_permission {
+ uint8_t pirq;
+ uint8_t allow_access; /* flag to specify enable/disable of IRQ access */
+};
+typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);
+
+
+#define XEN_DOMCTL_iomem_permission 20
+struct xen_domctl_iomem_permission {
+ uint64_aligned_t first_mfn;/* first page (physical page number) in range */
+ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */
+ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */
+};
+typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);
+
+
+#define XEN_DOMCTL_ioport_permission 21
+struct xen_domctl_ioport_permission {
+ uint32_t first_port; /* first port int range */
+ uint32_t nr_ports; /* size of port range */
+ uint8_t allow_access; /* allow or deny access to range? */
+};
+typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);
+
+
+#define XEN_DOMCTL_hypercall_init 22
+struct xen_domctl_hypercall_init {
+ uint64_aligned_t gmfn; /* GMFN to be initialised */
+};
+typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);
+
+
+#define XEN_DOMCTL_arch_setup 23
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */
+#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query)
+#define _XEN_DOMAINSETUP_sioemu_guest 2
+#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest)
+typedef struct xen_domctl_arch_setup {
+ uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */
+#ifdef __ia64__
+ uint64_aligned_t bp; /* mpaddr of boot param area */
+ uint64_aligned_t maxmem; /* Highest memory address for MDT. */
+ uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */
+ uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */
+ int8_t vhpt_size_log2; /* Log2 of VHPT size. */
+#endif
+} xen_domctl_arch_setup_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);
+
+
+#define XEN_DOMCTL_settimeoffset 24
+struct xen_domctl_settimeoffset {
+ int32_t time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
+
+
+#define XEN_DOMCTL_gethvmcontext 33
+#define XEN_DOMCTL_sethvmcontext 34
+typedef struct xen_domctl_hvmcontext {
+ uint32_t size; /* IN/OUT: size of buffer / bytes filled */
+ XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call
+ * gethvmcontext with NULL
+ * buffer to get size req'd */
+} xen_domctl_hvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
+
+
+#define XEN_DOMCTL_set_address_size 35
+#define XEN_DOMCTL_get_address_size 36
+typedef struct xen_domctl_address_size {
+ uint32_t size;
+} xen_domctl_address_size_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);
+
+
+#define XEN_DOMCTL_real_mode_area 26
+struct xen_domctl_real_mode_area {
+ uint32_t log; /* log2 of Real Mode Area size */
+};
+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+
+#define XEN_DOMCTL_sendtrigger 28
+#define XEN_DOMCTL_SENDTRIGGER_NMI 0
+#define XEN_DOMCTL_SENDTRIGGER_RESET 1
+#define XEN_DOMCTL_SENDTRIGGER_INIT 2
+struct xen_domctl_sendtrigger {
+ uint32_t trigger; /* IN */
+ uint32_t vcpu; /* IN */
+};
+typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
+
+
+/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
+#define XEN_DOMCTL_assign_device 37
+#define XEN_DOMCTL_test_assign_device 45
+#define XEN_DOMCTL_deassign_device 47
+struct xen_domctl_assign_device {
+ uint32_t machine_bdf; /* machine PCI ID of assigned device */
+};
+typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
+
+/* Retrieve sibling devices infomation of machine_bdf */
+#define XEN_DOMCTL_get_device_group 50
+struct xen_domctl_get_device_group {
+ uint32_t machine_bdf; /* IN */
+ uint32_t max_sdevs; /* IN */
+ uint32_t num_sdevs; /* OUT */
+ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
+
+/* Pass-through interrupts: bind real irq -> hvm devfn. */
+#define XEN_DOMCTL_bind_pt_irq 38
+#define XEN_DOMCTL_unbind_pt_irq 48
+typedef enum pt_irq_type_e {
+ PT_IRQ_TYPE_PCI,
+ PT_IRQ_TYPE_ISA,
+ PT_IRQ_TYPE_MSI,
+} pt_irq_type_t;
+struct xen_domctl_bind_pt_irq {
+ uint32_t machine_irq;
+ pt_irq_type_t irq_type;
+ uint32_t hvm_domid;
+
+ union {
+ struct {
+ uint8_t isa_irq;
+ } isa;
+ struct {
+ uint8_t bus;
+ uint8_t device;
+ uint8_t intx;
+ } pci;
+ struct {
+ uint8_t gvec;
+ uint32_t gflags;
+ } msi;
+ } u;
+};
+typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t);
+
+
+/* Bind machine I/O address range -> HVM address range. */
+#define XEN_DOMCTL_memory_mapping 39
+#define DPCI_ADD_MAPPING 1
+#define DPCI_REMOVE_MAPPING 0
+struct xen_domctl_memory_mapping {
+ uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */
+ uint64_aligned_t first_mfn; /* first page (machine page) in range */
+ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */
+ uint32_t add_mapping; /* add or remove mapping */
+ uint32_t padding; /* padding for 64-bit aligned structure */
+};
+typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);
+
+
+/* Bind machine I/O port range -> HVM I/O port range. */
+#define XEN_DOMCTL_ioport_mapping 40
+struct xen_domctl_ioport_mapping {
+ uint32_t first_gport; /* first guest IO port*/
+ uint32_t first_mport; /* first machine IO port */
+ uint32_t nr_ports; /* size of port range */
+ uint32_t add_mapping; /* add or remove mapping */
+};
+typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
+
+
+/*
+ * Pin caching type of RAM space for x86 HVM domU.
+ */
+#define XEN_DOMCTL_pin_mem_cacheattr 41
+/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */
+#define XEN_DOMCTL_MEM_CACHEATTR_UC 0
+#define XEN_DOMCTL_MEM_CACHEATTR_WC 1
+#define XEN_DOMCTL_MEM_CACHEATTR_WT 4
+#define XEN_DOMCTL_MEM_CACHEATTR_WP 5
+#define XEN_DOMCTL_MEM_CACHEATTR_WB 6
+#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7
+struct xen_domctl_pin_mem_cacheattr {
+ uint64_aligned_t start, end;
+ unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */
+};
+typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
+
+
+#define XEN_DOMCTL_set_ext_vcpucontext 42
+#define XEN_DOMCTL_get_ext_vcpucontext 43
+struct xen_domctl_ext_vcpucontext {
+ /* IN: VCPU that this call applies to. */
+ uint32_t vcpu;
+ /*
+ * SET: Size of struct (IN)
+ * GET: Size of struct (OUT)
+ */
+ uint32_t size;
+#if defined(__i386__) || defined(__x86_64__)
+ /* SYSCALL from 32-bit mode and SYSENTER callback information. */
+ /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */
+ uint64_aligned_t syscall32_callback_eip;
+ uint64_aligned_t sysenter_callback_eip;
+ uint16_t syscall32_callback_cs;
+ uint16_t sysenter_callback_cs;
+ uint8_t syscall32_disables_events;
+ uint8_t sysenter_disables_events;
+#endif
+};
+typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
+
+/*
+ * Set optimizaton features for a domain
+ */
+#define XEN_DOMCTL_set_opt_feature 44
+struct xen_domctl_set_opt_feature {
+#if defined(__ia64__)
+ struct xen_ia64_opt_feature optf;
+#else
+ /* Make struct non-empty: do not depend on this field name! */
+ uint64_t dummy;
+#endif
+};
+typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t);
+
+/*
+ * Set the target domain for a domain
+ */
+#define XEN_DOMCTL_set_target 46
+struct xen_domctl_set_target {
+ domid_t target;
+};
+typedef struct xen_domctl_set_target xen_domctl_set_target_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);
+
+#if defined(__i386__) || defined(__x86_64__)
+# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF
+# define XEN_DOMCTL_set_cpuid 49
+struct xen_domctl_cpuid {
+ unsigned int input[2];
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+};
+typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
+#endif
+
+#define XEN_DOMCTL_subscribe 29
+struct xen_domctl_subscribe {
+ uint32_t port; /* IN */
+};
+typedef struct xen_domctl_subscribe xen_domctl_subscribe_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t);
+
+/*
+ * Define the maximum machine address size which should be allocated
+ * to a guest.
+ */
+#define XEN_DOMCTL_set_machine_address_size 51
+#define XEN_DOMCTL_get_machine_address_size 52
+
+/*
+ * Do not inject spurious page faults into this domain.
+ */
+#define XEN_DOMCTL_suppress_spurious_page_faults 53
+
+struct xen_domctl {
+ uint32_t cmd;
+ uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
+ domid_t domain;
+ union {
+ struct xen_domctl_createdomain createdomain;
+ struct xen_domctl_getdomaininfo getdomaininfo;
+ struct xen_domctl_getmemlist getmemlist;
+ struct xen_domctl_getpageframeinfo getpageframeinfo;
+ struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
+ struct xen_domctl_vcpuaffinity vcpuaffinity;
+ struct xen_domctl_shadow_op shadow_op;
+ struct xen_domctl_max_mem max_mem;
+ struct xen_domctl_vcpucontext vcpucontext;
+ struct xen_domctl_getvcpuinfo getvcpuinfo;
+ struct xen_domctl_max_vcpus max_vcpus;
+ struct xen_domctl_scheduler_op scheduler_op;
+ struct xen_domctl_setdomainhandle setdomainhandle;
+ struct xen_domctl_setdebugging setdebugging;
+ struct xen_domctl_irq_permission irq_permission;
+ struct xen_domctl_iomem_permission iomem_permission;
+ struct xen_domctl_ioport_permission ioport_permission;
+ struct xen_domctl_hypercall_init hypercall_init;
+ struct xen_domctl_arch_setup arch_setup;
+ struct xen_domctl_settimeoffset settimeoffset;
+ struct xen_domctl_real_mode_area real_mode_area;
+ struct xen_domctl_hvmcontext hvmcontext;
+ struct xen_domctl_address_size address_size;
+ struct xen_domctl_sendtrigger sendtrigger;
+ struct xen_domctl_get_device_group get_device_group;
+ struct xen_domctl_assign_device assign_device;
+ struct xen_domctl_bind_pt_irq bind_pt_irq;
+ struct xen_domctl_memory_mapping memory_mapping;
+ struct xen_domctl_ioport_mapping ioport_mapping;
+ struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;
+ struct xen_domctl_ext_vcpucontext ext_vcpucontext;
+ struct xen_domctl_set_opt_feature set_opt_feature;
+ struct xen_domctl_set_target set_target;
+ struct xen_domctl_subscribe subscribe;
+#if defined(__i386__) || defined(__x86_64__)
+ struct xen_domctl_cpuid cpuid;
+#endif
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_domctl xen_domctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
+
+#endif /* __XEN_PUBLIC_DOMCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/elfnote.h b/xen/public/elfnote.h
new file mode 100644
index 00000000..77be41bb
--- /dev/null
+++ b/xen/public/elfnote.h
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * elfnote.h
+ *
+ * Definitions used for the Xen ELF notes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
+ */
+
+#ifndef __XEN_PUBLIC_ELFNOTE_H__
+#define __XEN_PUBLIC_ELFNOTE_H__
+
+/*
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
+ * name field.
+ *
+ * Numeric types are either 4 or 8 bytes depending on the content of
+ * the desc field.
+ *
+ * LEGACY indicated the fields in the legacy __xen_guest string which
+ * this a note type replaces.
+ */
+
+/*
+ * NAME=VALUE pair (string).
+ */
+#define XEN_ELFNOTE_INFO 0
+
+/*
+ * The virtual address of the entry point (numeric).
+ *
+ * LEGACY: VIRT_ENTRY
+ */
+#define XEN_ELFNOTE_ENTRY 1
+
+/* The virtual address of the hypercall transfer page (numeric).
+ *
+ * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page
+ * number not a virtual address)
+ */
+#define XEN_ELFNOTE_HYPERCALL_PAGE 2
+
+/* The virtual address where the kernel image should be mapped (numeric).
+ *
+ * Defaults to 0.
+ *
+ * LEGACY: VIRT_BASE
+ */
+#define XEN_ELFNOTE_VIRT_BASE 3
+
+/*
+ * The offset of the ELF paddr field from the acutal required
+ * psuedo-physical address (numeric).
+ *
+ * This is used to maintain backwards compatibility with older kernels
+ * which wrote __PAGE_OFFSET into that field. This field defaults to 0
+ * if not present.
+ *
+ * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)
+ */
+#define XEN_ELFNOTE_PADDR_OFFSET 4
+
+/*
+ * The version of Xen that we work with (string).
+ *
+ * LEGACY: XEN_VER
+ */
+#define XEN_ELFNOTE_XEN_VERSION 5
+
+/*
+ * The name of the guest operating system (string).
+ *
+ * LEGACY: GUEST_OS
+ */
+#define XEN_ELFNOTE_GUEST_OS 6
+
+/*
+ * The version of the guest operating system (string).
+ *
+ * LEGACY: GUEST_VER
+ */
+#define XEN_ELFNOTE_GUEST_VERSION 7
+
+/*
+ * The loader type (string).
+ *
+ * LEGACY: LOADER
+ */
+#define XEN_ELFNOTE_LOADER 8
+
+/*
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
+ *
+ * LEGACY: PAE (n.b. The legacy interface included a provision to
+ * indicate 'extended-cr3' support allowing L3 page tables to be
+ * placed above 4G. It is assumed that any kernel new enough to use
+ * these ELF notes will include this and therefore "yes" here is
+ * equivalent to "yes[entended-cr3]" in the __xen_guest interface.
+ */
+#define XEN_ELFNOTE_PAE_MODE 9
+
+/*
+ * The features supported/required by this kernel (string).
+ *
+ * The string must consist of a list of feature names (as given in
+ * features.h, without the "XENFEAT_" prefix) separated by '|'
+ * characters. If a feature is required for the kernel to function
+ * then the feature name must be preceded by a '!' character.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_FEATURES 10
+
+/*
+ * The kernel requires the symbol table to be loaded (string = "yes" or "no")
+ * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence
+ * of this string as a boolean flag rather than requiring "yes" or
+ * "no".
+ */
+#define XEN_ELFNOTE_BSD_SYMTAB 11
+
+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW 12
+
+/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID 13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
+
+#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/elfstructs.h b/xen/public/elfstructs.h
new file mode 100644
index 00000000..77362f3b
--- /dev/null
+++ b/xen/public/elfstructs.h
@@ -0,0 +1,527 @@
+#ifndef __XEN_PUBLIC_ELFSTRUCTS_H__
+#define __XEN_PUBLIC_ELFSTRUCTS_H__ 1
+/*
+ * Copyright (c) 1995, 1996 Erik Theisen. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+typedef uint8_t Elf_Byte;
+
+typedef uint32_t Elf32_Addr; /* Unsigned program address */
+typedef uint32_t Elf32_Off; /* Unsigned file offset */
+typedef int32_t Elf32_Sword; /* Signed large integer */
+typedef uint32_t Elf32_Word; /* Unsigned large integer */
+typedef uint16_t Elf32_Half; /* Unsigned medium integer */
+
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef int32_t Elf64_Shalf;
+
+typedef int32_t Elf64_Sword;
+typedef uint32_t Elf64_Word;
+
+typedef int64_t Elf64_Sxword;
+typedef uint64_t Elf64_Xword;
+
+typedef uint32_t Elf64_Half;
+typedef uint16_t Elf64_Quarter;
+
+/*
+ * e_ident[] identification indexes
+ * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html
+ */
+#define EI_MAG0 0 /* file ID */
+#define EI_MAG1 1 /* file ID */
+#define EI_MAG2 2 /* file ID */
+#define EI_MAG3 3 /* file ID */
+#define EI_CLASS 4 /* file class */
+#define EI_DATA 5 /* data encoding */
+#define EI_VERSION 6 /* ELF header version */
+#define EI_OSABI 7 /* OS/ABI ID */
+#define EI_ABIVERSION 8 /* ABI version */
+#define EI_PAD 9 /* start of pad bytes */
+#define EI_NIDENT 16 /* Size of e_ident[] */
+
+/* e_ident[] magic number */
+#define ELFMAG0 0x7f /* e_ident[EI_MAG0] */
+#define ELFMAG1 'E' /* e_ident[EI_MAG1] */
+#define ELFMAG2 'L' /* e_ident[EI_MAG2] */
+#define ELFMAG3 'F' /* e_ident[EI_MAG3] */
+#define ELFMAG "\177ELF" /* magic */
+#define SELFMAG 4 /* size of magic */
+
+/* e_ident[] file class */
+#define ELFCLASSNONE 0 /* invalid */
+#define ELFCLASS32 1 /* 32-bit objs */
+#define ELFCLASS64 2 /* 64-bit objs */
+#define ELFCLASSNUM 3 /* number of classes */
+
+/* e_ident[] data encoding */
+#define ELFDATANONE 0 /* invalid */
+#define ELFDATA2LSB 1 /* Little-Endian */
+#define ELFDATA2MSB 2 /* Big-Endian */
+#define ELFDATANUM 3 /* number of data encode defines */
+
+/* e_ident[] Operating System/ABI */
+#define ELFOSABI_SYSV 0 /* UNIX System V ABI */
+#define ELFOSABI_HPUX 1 /* HP-UX operating system */
+#define ELFOSABI_NETBSD 2 /* NetBSD */
+#define ELFOSABI_LINUX 3 /* GNU/Linux */
+#define ELFOSABI_HURD 4 /* GNU/Hurd */
+#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */
+#define ELFOSABI_SOLARIS 6 /* Solaris */
+#define ELFOSABI_MONTEREY 7 /* Monterey */
+#define ELFOSABI_IRIX 8 /* IRIX */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD */
+#define ELFOSABI_TRU64 10 /* TRU64 UNIX */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+/* e_ident */
+#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
+ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
+ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
+ (ehdr).e_ident[EI_MAG3] == ELFMAG3)
+
+/* ELF Header */
+typedef struct elfhdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF Identification */
+ Elf32_Half e_type; /* object file type */
+ Elf32_Half e_machine; /* machine */
+ Elf32_Word e_version; /* object file version */
+ Elf32_Addr e_entry; /* virtual entry point */
+ Elf32_Off e_phoff; /* program header table offset */
+ Elf32_Off e_shoff; /* section header table offset */
+ Elf32_Word e_flags; /* processor-specific flags */
+ Elf32_Half e_ehsize; /* ELF header size */
+ Elf32_Half e_phentsize; /* program header entry size */
+ Elf32_Half e_phnum; /* number of program header entries */
+ Elf32_Half e_shentsize; /* section header entry size */
+ Elf32_Half e_shnum; /* number of section header entries */
+ Elf32_Half e_shstrndx; /* section header table's "section
+ header string table" entry offset */
+} Elf32_Ehdr;
+
+typedef struct {
+ unsigned char e_ident[EI_NIDENT]; /* Id bytes */
+ Elf64_Quarter e_type; /* file type */
+ Elf64_Quarter e_machine; /* machine type */
+ Elf64_Half e_version; /* version number */
+ Elf64_Addr e_entry; /* entry point */
+ Elf64_Off e_phoff; /* Program hdr offset */
+ Elf64_Off e_shoff; /* Section hdr offset */
+ Elf64_Half e_flags; /* Processor flags */
+ Elf64_Quarter e_ehsize; /* sizeof ehdr */
+ Elf64_Quarter e_phentsize; /* Program header entry size */
+ Elf64_Quarter e_phnum; /* Number of program headers */
+ Elf64_Quarter e_shentsize; /* Section header entry size */
+ Elf64_Quarter e_shnum; /* Number of section headers */
+ Elf64_Quarter e_shstrndx; /* String table index */
+} Elf64_Ehdr;
+
+/* e_type */
+#define ET_NONE 0 /* No file type */
+#define ET_REL 1 /* relocatable file */
+#define ET_EXEC 2 /* executable file */
+#define ET_DYN 3 /* shared object file */
+#define ET_CORE 4 /* core file */
+#define ET_NUM 5 /* number of types */
+#define ET_LOPROC 0xff00 /* reserved range for processor */
+#define ET_HIPROC 0xffff /* specific e_type */
+
+/* e_machine */
+#define EM_NONE 0 /* No Machine */
+#define EM_M32 1 /* AT&T WE 32100 */
+#define EM_SPARC 2 /* SPARC */
+#define EM_386 3 /* Intel 80386 */
+#define EM_68K 4 /* Motorola 68000 */
+#define EM_88K 5 /* Motorola 88000 */
+#define EM_486 6 /* Intel 80486 - unused? */
+#define EM_860 7 /* Intel 80860 */
+#define EM_MIPS 8 /* MIPS R3000 Big-Endian only */
+/*
+ * Don't know if EM_MIPS_RS4_BE,
+ * EM_SPARC64, EM_PARISC,
+ * or EM_PPC are ABI compliant
+ */
+#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */
+#define EM_SPARC64 11 /* SPARC v9 64-bit unoffical */
+#define EM_PARISC 15 /* HPPA */
+#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */
+#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
+#define EM_ARM 40 /* Advanced RISC Machines ARM */
+#define EM_ALPHA 41 /* DEC ALPHA */
+#define EM_SPARCV9 43 /* SPARC version 9 */
+#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */
+#define EM_IA_64 50 /* Intel Merced */
+#define EM_X86_64 62 /* AMD x86-64 architecture */
+#define EM_VAX 75 /* DEC VAX */
+
+/* Version */
+#define EV_NONE 0 /* Invalid */
+#define EV_CURRENT 1 /* Current */
+#define EV_NUM 2 /* number of versions */
+
+/* Section Header */
+typedef struct {
+ Elf32_Word sh_name; /* name - index into section header
+ string table section */
+ Elf32_Word sh_type; /* type */
+ Elf32_Word sh_flags; /* flags */
+ Elf32_Addr sh_addr; /* address */
+ Elf32_Off sh_offset; /* file offset */
+ Elf32_Word sh_size; /* section size */
+ Elf32_Word sh_link; /* section header table index link */
+ Elf32_Word sh_info; /* extra information */
+ Elf32_Word sh_addralign; /* address alignment */
+ Elf32_Word sh_entsize; /* section entry size */
+} Elf32_Shdr;
+
+typedef struct {
+ Elf64_Half sh_name; /* section name */
+ Elf64_Half sh_type; /* section type */
+ Elf64_Xword sh_flags; /* section flags */
+ Elf64_Addr sh_addr; /* virtual address */
+ Elf64_Off sh_offset; /* file offset */
+ Elf64_Xword sh_size; /* section size */
+ Elf64_Half sh_link; /* link to another */
+ Elf64_Half sh_info; /* misc info */
+ Elf64_Xword sh_addralign; /* memory alignment */
+ Elf64_Xword sh_entsize; /* table entry size */
+} Elf64_Shdr;
+
+/* Special Section Indexes */
+#define SHN_UNDEF 0 /* undefined */
+#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */
+#define SHN_LOPROC 0xff00 /* reserved range for processor */
+#define SHN_HIPROC 0xff1f /* specific section indexes */
+#define SHN_ABS 0xfff1 /* absolute value */
+#define SHN_COMMON 0xfff2 /* common symbol */
+#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */
+
+/* sh_type */
+#define SHT_NULL 0 /* inactive */
+#define SHT_PROGBITS 1 /* program defined information */
+#define SHT_SYMTAB 2 /* symbol table section */
+#define SHT_STRTAB 3 /* string table section */
+#define SHT_RELA 4 /* relocation section with addends*/
+#define SHT_HASH 5 /* symbol hash table section */
+#define SHT_DYNAMIC 6 /* dynamic section */
+#define SHT_NOTE 7 /* note section */
+#define SHT_NOBITS 8 /* no space section */
+#define SHT_REL 9 /* relation section without addends */
+#define SHT_SHLIB 10 /* reserved - purpose unknown */
+#define SHT_DYNSYM 11 /* dynamic symbol table section */
+#define SHT_NUM 12 /* number of section types */
+#define SHT_LOPROC 0x70000000 /* reserved range for processor */
+#define SHT_HIPROC 0x7fffffff /* specific section header types */
+#define SHT_LOUSER 0x80000000 /* reserved range for application */
+#define SHT_HIUSER 0xffffffff /* specific indexes */
+
+/* Section names */
+#define ELF_BSS ".bss" /* uninitialized data */
+#define ELF_DATA ".data" /* initialized data */
+#define ELF_DEBUG ".debug" /* debug */
+#define ELF_DYNAMIC ".dynamic" /* dynamic linking information */
+#define ELF_DYNSTR ".dynstr" /* dynamic string table */
+#define ELF_DYNSYM ".dynsym" /* dynamic symbol table */
+#define ELF_FINI ".fini" /* termination code */
+#define ELF_GOT ".got" /* global offset table */
+#define ELF_HASH ".hash" /* symbol hash table */
+#define ELF_INIT ".init" /* initialization code */
+#define ELF_REL_DATA ".rel.data" /* relocation data */
+#define ELF_REL_FINI ".rel.fini" /* relocation termination code */
+#define ELF_REL_INIT ".rel.init" /* relocation initialization code */
+#define ELF_REL_DYN ".rel.dyn" /* relocaltion dynamic link info */
+#define ELF_REL_RODATA ".rel.rodata" /* relocation read-only data */
+#define ELF_REL_TEXT ".rel.text" /* relocation code */
+#define ELF_RODATA ".rodata" /* read-only data */
+#define ELF_SHSTRTAB ".shstrtab" /* section header string table */
+#define ELF_STRTAB ".strtab" /* string table */
+#define ELF_SYMTAB ".symtab" /* symbol table */
+#define ELF_TEXT ".text" /* code */
+
+
+/* Section Attribute Flags - sh_flags */
+#define SHF_WRITE 0x1 /* Writable */
+#define SHF_ALLOC 0x2 /* occupies memory */
+#define SHF_EXECINSTR 0x4 /* executable */
+#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */
+ /* specific section attributes */
+
+/* Symbol Table Entry */
+typedef struct elf32_sym {
+ Elf32_Word st_name; /* name - index into string table */
+ Elf32_Addr st_value; /* symbol value */
+ Elf32_Word st_size; /* symbol size */
+ unsigned char st_info; /* type and binding */
+ unsigned char st_other; /* 0 - no defined meaning */
+ Elf32_Half st_shndx; /* section header index */
+} Elf32_Sym;
+
+typedef struct {
+ Elf64_Half st_name; /* Symbol name index in str table */
+ Elf_Byte st_info; /* type / binding attrs */
+ Elf_Byte st_other; /* unused */
+ Elf64_Quarter st_shndx; /* section index of symbol */
+ Elf64_Xword st_value; /* value of symbol */
+ Elf64_Xword st_size; /* size of symbol */
+} Elf64_Sym;
+
+/* Symbol table index */
+#define STN_UNDEF 0 /* undefined */
+
+/* Extract symbol info - st_info */
+#define ELF32_ST_BIND(x) ((x) >> 4)
+#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf)
+#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf))
+
+#define ELF64_ST_BIND(x) ((x) >> 4)
+#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf)
+#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf))
+
+/* Symbol Binding - ELF32_ST_BIND - st_info */
+#define STB_LOCAL 0 /* Local symbol */
+#define STB_GLOBAL 1 /* Global symbol */
+#define STB_WEAK 2 /* like global - lower precedence */
+#define STB_NUM 3 /* number of symbol bindings */
+#define STB_LOPROC 13 /* reserved range for processor */
+#define STB_HIPROC 15 /* specific symbol bindings */
+
+/* Symbol type - ELF32_ST_TYPE - st_info */
+#define STT_NOTYPE 0 /* not specified */
+#define STT_OBJECT 1 /* data object */
+#define STT_FUNC 2 /* function */
+#define STT_SECTION 3 /* section */
+#define STT_FILE 4 /* file */
+#define STT_NUM 5 /* number of symbol types */
+#define STT_LOPROC 13 /* reserved range for processor */
+#define STT_HIPROC 15 /* specific symbol types */
+
+/* Relocation entry with implicit addend */
+typedef struct {
+ Elf32_Addr r_offset; /* offset of relocation */
+ Elf32_Word r_info; /* symbol table index and type */
+} Elf32_Rel;
+
+/* Relocation entry with explicit addend */
+typedef struct {
+ Elf32_Addr r_offset; /* offset of relocation */
+ Elf32_Word r_info; /* symbol table index and type */
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+/* Extract relocation info - r_info */
+#define ELF32_R_SYM(i) ((i) >> 8)
+#define ELF32_R_TYPE(i) ((unsigned char) (i))
+#define ELF32_R_INFO(s,t) (((s) << 8) + (unsigned char)(t))
+
+typedef struct {
+ Elf64_Xword r_offset; /* where to do it */
+ Elf64_Xword r_info; /* index & type of relocation */
+} Elf64_Rel;
+
+typedef struct {
+ Elf64_Xword r_offset; /* where to do it */
+ Elf64_Xword r_info; /* index & type of relocation */
+ Elf64_Sxword r_addend; /* adjustment value */
+} Elf64_Rela;
+
+#define ELF64_R_SYM(info) ((info) >> 32)
+#define ELF64_R_TYPE(info) ((info) & 0xFFFFFFFF)
+#define ELF64_R_INFO(s,t) (((s) << 32) + (u_int32_t)(t))
+
+/* Program Header */
+typedef struct {
+ Elf32_Word p_type; /* segment type */
+ Elf32_Off p_offset; /* segment offset */
+ Elf32_Addr p_vaddr; /* virtual address of segment */
+ Elf32_Addr p_paddr; /* physical address - ignored? */
+ Elf32_Word p_filesz; /* number of bytes in file for seg. */
+ Elf32_Word p_memsz; /* number of bytes in mem. for seg. */
+ Elf32_Word p_flags; /* flags */
+ Elf32_Word p_align; /* memory alignment */
+} Elf32_Phdr;
+
+typedef struct {
+ Elf64_Half p_type; /* entry type */
+ Elf64_Half p_flags; /* flags */
+ Elf64_Off p_offset; /* offset */
+ Elf64_Addr p_vaddr; /* virtual address */
+ Elf64_Addr p_paddr; /* physical address */
+ Elf64_Xword p_filesz; /* file size */
+ Elf64_Xword p_memsz; /* memory size */
+ Elf64_Xword p_align; /* memory & file alignment */
+} Elf64_Phdr;
+
+/* Segment types - p_type */
+#define PT_NULL 0 /* unused */
+#define PT_LOAD 1 /* loadable segment */
+#define PT_DYNAMIC 2 /* dynamic linking section */
+#define PT_INTERP 3 /* the RTLD */
+#define PT_NOTE 4 /* auxiliary information */
+#define PT_SHLIB 5 /* reserved - purpose undefined */
+#define PT_PHDR 6 /* program header */
+#define PT_NUM 7 /* Number of segment types */
+#define PT_LOPROC 0x70000000 /* reserved range for processor */
+#define PT_HIPROC 0x7fffffff /* specific segment types */
+
+/* Segment flags - p_flags */
+#define PF_X 0x1 /* Executable */
+#define PF_W 0x2 /* Writable */
+#define PF_R 0x4 /* Readable */
+#define PF_MASKPROC 0xf0000000 /* reserved bits for processor */
+ /* specific segment flags */
+
+/* Dynamic structure */
+typedef struct {
+ Elf32_Sword d_tag; /* controls meaning of d_val */
+ union {
+ Elf32_Word d_val; /* Multiple meanings - see d_tag */
+ Elf32_Addr d_ptr; /* program virtual address */
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Xword d_tag; /* controls meaning of d_val */
+ union {
+ Elf64_Addr d_ptr;
+ Elf64_Xword d_val;
+ } d_un;
+} Elf64_Dyn;
+
+/* Dynamic Array Tags - d_tag */
+#define DT_NULL 0 /* marks end of _DYNAMIC array */
+#define DT_NEEDED 1 /* string table offset of needed lib */
+#define DT_PLTRELSZ 2 /* size of relocation entries in PLT */
+#define DT_PLTGOT 3 /* address PLT/GOT */
+#define DT_HASH 4 /* address of symbol hash table */
+#define DT_STRTAB 5 /* address of string table */
+#define DT_SYMTAB 6 /* address of symbol table */
+#define DT_RELA 7 /* address of relocation table */
+#define DT_RELASZ 8 /* size of relocation table */
+#define DT_RELAENT 9 /* size of relocation entry */
+#define DT_STRSZ 10 /* size of string table */
+#define DT_SYMENT 11 /* size of symbol table entry */
+#define DT_INIT 12 /* address of initialization func. */
+#define DT_FINI 13 /* address of termination function */
+#define DT_SONAME 14 /* string table offset of shared obj */
+#define DT_RPATH 15 /* string table offset of library
+ search path */
+#define DT_SYMBOLIC 16 /* start sym search in shared obj. */
+#define DT_REL 17 /* address of rel. tbl. w addends */
+#define DT_RELSZ 18 /* size of DT_REL relocation table */
+#define DT_RELENT 19 /* size of DT_REL relocation entry */
+#define DT_PLTREL 20 /* PLT referenced relocation entry */
+#define DT_DEBUG 21 /* bugger */
+#define DT_TEXTREL 22 /* Allow rel. mod. to unwritable seg */
+#define DT_JMPREL 23 /* add. of PLT's relocation entries */
+#define DT_BIND_NOW 24 /* Bind now regardless of env setting */
+#define DT_NUM 25 /* Number used. */
+#define DT_LOPROC 0x70000000 /* reserved range for processor */
+#define DT_HIPROC 0x7fffffff /* specific dynamic array tags */
+
+/* Standard ELF hashing function */
+unsigned int elf_hash(const unsigned char *name);
+
+/*
+ * Note Definitions
+ */
+typedef struct {
+ Elf32_Word namesz;
+ Elf32_Word descsz;
+ Elf32_Word type;
+} Elf32_Note;
+
+typedef struct {
+ Elf64_Half namesz;
+ Elf64_Half descsz;
+ Elf64_Half type;
+} Elf64_Note;
+
+
+#if defined(ELFSIZE)
+#define CONCAT(x,y) __CONCAT(x,y)
+#define ELFNAME(x) CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x)))
+#define ELFNAME2(x,y) CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y))))
+#define ELFNAMEEND(x) CONCAT(x,CONCAT(_elf,ELFSIZE))
+#define ELFDEFNNAME(x) CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x)))
+#endif
+
+#if defined(ELFSIZE) && (ELFSIZE == 32)
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Phdr Elf32_Phdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Rel Elf32_Rel
+#define Elf_RelA Elf32_Rela
+#define Elf_Dyn Elf32_Dyn
+#define Elf_Word Elf32_Word
+#define Elf_Sword Elf32_Sword
+#define Elf_Addr Elf32_Addr
+#define Elf_Off Elf32_Off
+#define Elf_Nhdr Elf32_Nhdr
+#define Elf_Note Elf32_Note
+
+#define ELF_R_SYM ELF32_R_SYM
+#define ELF_R_TYPE ELF32_R_TYPE
+#define ELF_R_INFO ELF32_R_INFO
+#define ELFCLASS ELFCLASS32
+
+#define ELF_ST_BIND ELF32_ST_BIND
+#define ELF_ST_TYPE ELF32_ST_TYPE
+#define ELF_ST_INFO ELF32_ST_INFO
+
+#define AuxInfo Aux32Info
+#elif defined(ELFSIZE) && (ELFSIZE == 64)
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Phdr Elf64_Phdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Rel Elf64_Rel
+#define Elf_RelA Elf64_Rela
+#define Elf_Dyn Elf64_Dyn
+#define Elf_Word Elf64_Word
+#define Elf_Sword Elf64_Sword
+#define Elf_Addr Elf64_Addr
+#define Elf_Off Elf64_Off
+#define Elf_Nhdr Elf64_Nhdr
+#define Elf_Note Elf64_Note
+
+#define ELF_R_SYM ELF64_R_SYM
+#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_INFO ELF64_R_INFO
+#define ELFCLASS ELFCLASS64
+
+#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_ST_TYPE ELF64_ST_TYPE
+#define ELF_ST_INFO ELF64_ST_INFO
+
+#define AuxInfo Aux64Info
+#endif
+
+#endif /* __XEN_PUBLIC_ELFSTRUCTS_H__ */
diff --git a/xen/public/event_channel.h b/xen/public/event_channel.h
new file mode 100644
index 00000000..d35cce53
--- /dev/null
+++ b/xen/public/event_channel.h
@@ -0,0 +1,264 @@
+/******************************************************************************
+ * event_channel.h
+ *
+ * Event channels between domains.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+/*
+ * Prototype for this hypercall is:
+ * int event_channel_op(int cmd, void *args)
+ * @cmd == EVTCHNOP_??? (event-channel operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+typedef uint32_t evtchn_port_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ * 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ * 2. <rdom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_alloc_unbound 6
+struct evtchn_alloc_unbound {
+ /* IN parameters */
+ domid_t dom, remote_dom;
+ /* OUT parameters */
+ evtchn_port_t port;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ * NOTES:
+ * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_bind_interdomain 0
+struct evtchn_bind_interdomain {
+ /* IN parameters. */
+ domid_t remote_dom;
+ evtchn_port_t remote_port;
+ /* OUT parameters. */
+ evtchn_port_t local_port;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
+ * in xen.h for the classification of each VIRQ.
+ * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be
+ * re-bound via EVTCHNOP_bind_vcpu.
+ * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
+ * The allocated event channel is bound to the specified vcpu and the
+ * binding cannot be changed.
+ */
+#define EVTCHNOP_bind_virq 1
+struct evtchn_bind_virq {
+ /* IN parameters. */
+ uint32_t virq;
+ uint32_t vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
+ * NOTES:
+ * 1. A physical IRQ may be bound to at most one event channel per domain.
+ * 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+#define EVTCHNOP_bind_pirq 2
+struct evtchn_bind_pirq {
+ /* IN parameters. */
+ uint32_t pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+ uint32_t flags; /* BIND_PIRQ__* */
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ * 1. The allocated event channel is bound to the specified vcpu. The binding
+ * may not be changed.
+ */
+#define EVTCHNOP_bind_ipi 7
+struct evtchn_bind_ipi {
+ uint32_t vcpu;
+ /* OUT parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+#define EVTCHNOP_close 3
+struct evtchn_close {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+#define EVTCHNOP_send 4
+struct evtchn_send {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may obtain the status of an event
+ * channel for which <dom> is not DOMID_SELF.
+ */
+#define EVTCHNOP_status 5
+struct evtchn_status {
+ /* IN parameters */
+ domid_t dom;
+ evtchn_port_t port;
+ /* OUT parameters */
+#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
+#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
+#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
+ uint32_t status;
+ uint32_t vcpu; /* VCPU to which this channel is bound. */
+ union {
+ struct {
+ domid_t dom;
+ } unbound; /* EVTCHNSTAT_unbound */
+ struct {
+ domid_t dom;
+ evtchn_port_t port;
+ } interdomain; /* EVTCHNSTAT_interdomain */
+ uint32_t pirq; /* EVTCHNSTAT_pirq */
+ uint32_t virq; /* EVTCHNSTAT_virq */
+ } u;
+};
+typedef struct evtchn_status evtchn_status_t;
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ * 1. IPI-bound channels always notify the vcpu specified at bind time.
+ * This binding cannot be changed.
+ * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
+ * This binding cannot be changed.
+ * 3. All other channels notify vcpu0 by default. This default is set when
+ * the channel is allocated (a port that is freed and subsequently reused
+ * has its binding reset to vcpu0).
+ */
+#define EVTCHNOP_bind_vcpu 8
+struct evtchn_bind_vcpu {
+ /* IN parameters. */
+ evtchn_port_t port;
+ uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+#define EVTCHNOP_unmask 9
+struct evtchn_unmask {
+ /* IN parameters. */
+ evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset 10
+struct evtchn_reset {
+ /* IN parameters. */
+ domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * Argument to event_channel_op_compat() hypercall. Superceded by new
+ * event_channel_op() hypercall since 0x00030202.
+ */
+struct evtchn_op {
+ uint32_t cmd; /* EVTCHNOP_* */
+ union {
+ struct evtchn_alloc_unbound alloc_unbound;
+ struct evtchn_bind_interdomain bind_interdomain;
+ struct evtchn_bind_virq bind_virq;
+ struct evtchn_bind_pirq bind_pirq;
+ struct evtchn_bind_ipi bind_ipi;
+ struct evtchn_close close;
+ struct evtchn_send send;
+ struct evtchn_status status;
+ struct evtchn_bind_vcpu bind_vcpu;
+ struct evtchn_unmask unmask;
+ } u;
+};
+typedef struct evtchn_op evtchn_op_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/features.h b/xen/public/features.h
new file mode 100644
index 00000000..879131cd
--- /dev/null
+++ b/xen/public/features.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * features.h
+ *
+ * Feature flags, reported by XENVER_get_features.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_FEATURES_H__
+#define __XEN_PUBLIC_FEATURES_H__
+
+/*
+ * If set, the guest does not need to write-protect its pagetables, and can
+ * update them via direct writes.
+ */
+#define XENFEAT_writable_page_tables 0
+
+/*
+ * If set, the guest does not need to write-protect its segment descriptor
+ * tables, and can update them via direct writes.
+ */
+#define XENFEAT_writable_descriptor_tables 1
+
+/*
+ * If set, translation between the guest's 'pseudo-physical' address space
+ * and the host's machine address space are handled by the hypervisor. In this
+ * mode the guest does not need to perform phys-to/from-machine translations
+ * when performing page table operations.
+ */
+#define XENFEAT_auto_translated_physmap 2
+
+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
+#define XENFEAT_supervisor_mode_kernel 3
+
+/*
+ * If set, the guest does not need to allocate x86 PAE page directories
+ * below 4GB. This flag is usually implied by auto_translated_physmap.
+ */
+#define XENFEAT_pae_pgdir_above_4gb 4
+
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad 5
+
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist 6
+
+/*
+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
+ * available pte bits.
+ */
+#define XENFEAT_gnttab_map_avail_bits 7
+
+#define XENFEAT_NR_SUBMAPS 1
+
+#endif /* __XEN_PUBLIC_FEATURES_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/grant_table.h b/xen/public/grant_table.h
new file mode 100644
index 00000000..ad116e71
--- /dev/null
+++ b/xen/public/grant_table.h
@@ -0,0 +1,438 @@
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
+ *
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ * compiler barrier will still be required.
+ *
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ *
+ * Invalidating an unused GTF_permit_access entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ * This cannot be done directly. Request assistance from the domain controller
+ * which can set a timeout on the use of a grant entry and take necessary
+ * action. (NB. This is not yet implemented!).
+ *
+ * Invalidating an unused GTF_accept_transfer entry:
+ * 1. flags = ent->flags.
+ * 2. Observe that !(flags & GTF_transfer_committed). [*]
+ * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ * NB. No need for WMB as reuse of entry is control-dependent on success of
+ * step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ * [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ * The guest must /not/ modify the grant entry until the address of the
+ * transferred frame is written. It is safe for the guest to spin waiting
+ * for this to occur (detect by observing GTF_transfer_completed in
+ * ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ * 1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ *
+ * Changing a GTF_permit_access from read-only to writable:
+ * Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+struct grant_entry {
+ /* GTF_xxx: various type and flag information. [XEN,GST] */
+ uint16_t flags;
+ /* The domain being granted foreign privileges. [GST] */
+ domid_t domid;
+ /*
+ * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
+ * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+ */
+ uint32_t frame;
+};
+typedef struct grant_entry grant_entry_t;
+
+/*
+ * Type of grant entry.
+ * GTF_invalid: This grant entry grants no privileges.
+ * GTF_permit_access: Allow @domid to map/access @frame.
+ * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ * to this guest. Xen writes the page number to @frame.
+ */
+#define GTF_invalid (0U<<0)
+#define GTF_permit_access (1U<<0)
+#define GTF_accept_transfer (2U<<0)
+#define GTF_type_mask (3U<<0)
+
+/*
+ * Subflags for GTF_permit_access.
+ * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
+ */
+#define _GTF_readonly (2)
+#define GTF_readonly (1U<<_GTF_readonly)
+#define _GTF_reading (3)
+#define GTF_reading (1U<<_GTF_reading)
+#define _GTF_writing (4)
+#define GTF_writing (1U<<_GTF_writing)
+#define _GTF_PWT (5)
+#define GTF_PWT (1U<<_GTF_PWT)
+#define _GTF_PCD (6)
+#define GTF_PCD (1U<<_GTF_PCD)
+#define _GTF_PAT (7)
+#define GTF_PAT (1U<<_GTF_PAT)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ * GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ * to transferring ownership of a page frame. When a guest sees this flag
+ * it must /not/ modify the grant entry until GTF_transfer_completed is
+ * set by Xen.
+ * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ * after reading GTF_transfer_committed. Xen will always write the frame
+ * address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed (1U<<_GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef uint32_t grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <handle>
+ * is a negative status code.
+ * NOTES:
+ * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ * via which I/O devices may access the granted frame.
+ * 2. If GNTMAP_host_map is specified then a mapping will be added at
+ * either a host virtual address in the current address space, or at
+ * a PTE at the specified machine address. The type of mapping to
+ * perform is selected through the GNTMAP_contains_pte flag, and the
+ * address is specified in <host_addr>.
+ * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ * host mapping is destroyed by other means then it is *NOT* guaranteed
+ * to be accounted to the correct grant reference!
+ */
+#define GNTTABOP_map_grant_ref 0
+struct gnttab_map_grant_ref {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint32_t flags; /* GNTMAP_* */
+ grant_ref_t ref;
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+ grant_handle_t handle;
+ uint64_t dev_bus_addr;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 3. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_grant_ref 1
+struct gnttab_unmap_grant_ref {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint64_t dev_bus_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ * 3. Xen may not support more than a single grant-table page per domain.
+ */
+#define GNTTABOP_setup_table 2
+struct gnttab_setup_table {
+ /* IN parameters. */
+ domid_t dom;
+ uint32_t nr_frames;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+ XEN_GUEST_HANDLE(ulong) frame_list;
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+#define GNTTABOP_dump_table 3
+struct gnttab_dump_table {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
+
+/*
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
+ * foreign domain has previously registered its interest in the transfer via
+ * <domid, ref>.
+ *
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ */
+#define GNTTABOP_transfer 4
+struct gnttab_transfer {
+ /* IN parameters. */
+ xen_pfn_t mfn;
+ domid_t domid;
+ grant_ref_t ref;
+ /* OUT parameters. */
+ int16_t status;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref (0)
+#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref (1)
+#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref)
+
+#define GNTTABOP_copy 5
+typedef struct gnttab_copy {
+ /* IN parameters. */
+ struct {
+ union {
+ grant_ref_t ref;
+ xen_pfn_t gmfn;
+ } u;
+ domid_t domid;
+ uint16_t offset;
+ } source, dest;
+ uint16_t len;
+ uint16_t flags; /* GNTCOPY_* */
+ /* OUT parameters. */
+ int16_t status;
+} gnttab_copy_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_query_size 6
+struct gnttab_query_size {
+ /* IN parameters. */
+ domid_t dom;
+ /* OUT parameters. */
+ uint32_t nr_frames;
+ uint32_t max_nr_frames;
+ int16_t status; /* GNTST_* */
+};
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>. <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ * 1. The call may fail in an undefined manner if either mapping is not
+ * tracked by <handle>.
+ * 2. After executing a batch of unmaps, it is guaranteed that no stale
+ * mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace 7
+struct gnttab_unmap_and_replace {
+ /* IN parameters. */
+ uint64_t host_addr;
+ uint64_t new_addr;
+ grant_handle_t handle;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
+
+/*
+ * Bitfield values for gnttab_map_grant_ref.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map (0)
+#define GNTMAP_device_map (1<<_GNTMAP_device_map)
+ /* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map (1)
+#define GNTMAP_host_map (1<<_GNTMAP_host_map)
+ /* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly (2)
+#define GNTMAP_readonly (1<<_GNTMAP_readonly)
+ /*
+ * GNTMAP_host_map subflag:
+ * 0 => The host mapping is usable only by the guest OS.
+ * 1 => The host mapping is usable by guest OS + current application.
+ */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map (1<<_GNTMAP_application_map)
+
+ /*
+ * GNTMAP_contains_pte subflag:
+ * 0 => This map request contains a host virtual address.
+ * 1 => This map request contains the machine addess of the PTE to update.
+ */
+#define _GNTMAP_contains_pte (4)
+#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0 (16)
+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+#define GNTST_okay (0) /* Normal return. */
+#define GNTST_general_error (-1) /* General undefined error. */
+#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
+#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
+#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
+#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */
+#define GNTST_address_too_big (-11) /* transfer page address too large. */
+
+#define GNTTABOP_error_msgs { \
+ "okay", \
+ "undefined error", \
+ "unrecognised domain id", \
+ "invalid grant reference", \
+ "invalid mapping handle", \
+ "invalid virtual address", \
+ "invalid device address", \
+ "no spare translation slot in the I/O MMU", \
+ "permission denied", \
+ "bad page", \
+ "copy arguments cross page boundary", \
+ "page address size too large" \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/blkif.h b/xen/public/io/blkif.h
new file mode 100644
index 00000000..2380066b
--- /dev/null
+++ b/xen/public/io/blkif.h
@@ -0,0 +1,141 @@
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+/*
+ * Recognised only if "feature-barrier" is present in backend xenbus info.
+ * The "feature-barrier" node contains a boolean indicating whether barrier
+ * requests are likely to succeed or fail. Either way, a barrier request
+ * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
+ * the underlying block-device hardware. The boolean simply indicates whether
+ * or not it is worthwhile for the frontend to attempt barrier requests.
+ * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
+ * create the "feature-barrier" node!
+ */
+#define BLKIF_OP_WRITE_BARRIER 2
+/*
+ * Recognised if "feature-flush-cache" is present in backend xenbus
+ * info. A flush will ask the underlying storage hardware to flush its
+ * non-volatile caches as appropriate. The "feature-flush-cache" node
+ * contains a boolean indicating whether flush requests are likely to
+ * succeed or fail. Either way, a flush request may fail at any time
+ * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
+ * block-device hardware. The boolean simply indicates whether or not it
+ * is worthwhile for the frontend to attempt flushes. If a backend does
+ * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
+ * "feature-flush-cache" node!
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE 3
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+struct blkif_request_segment {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ uint8_t first_sect, last_sect;
+};
+
+struct blkif_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+struct blkif_response {
+ uint64_t id; /* copied from request */
+ uint8_t operation; /* copied from request */
+ int16_t status; /* BLKIF_RSP_??? */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY 0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM 0x1
+#define VDISK_REMOVABLE 0x2
+#define VDISK_READONLY 0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/console.h b/xen/public/io/console.h
new file mode 100644
index 00000000..4b8c01a4
--- /dev/null
+++ b/xen/public/io/console.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+ char in[1024];
+ char out[2048];
+ XENCONS_RING_IDX in_cons, in_prod;
+ XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/fbif.h b/xen/public/io/fbif.h
new file mode 100644
index 00000000..95377a0e
--- /dev/null
+++ b/xen/public/io/fbif.h
@@ -0,0 +1,176 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE */
+ int32_t x; /* source x */
+ int32_t y; /* source y */
+ int32_t width; /* rect width */
+ int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+ uint8_t type; /* XENFB_TYPE_RESIZE */
+ int32_t width; /* width in pixels */
+ int32_t height; /* height in pixels */
+ int32_t stride; /* stride in bytes */
+ int32_t depth; /* depth in bits */
+ int32_t offset; /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+ uint8_t type;
+ struct xenfb_update update;
+ struct xenfb_resize resize;
+ char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh. Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it. Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */
+ uint32_t period; /* period of refresh, in ms,
+ * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+ uint8_t type;
+ struct xenfb_refresh_period refresh_period;
+ char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+ ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+ (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+ ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+ (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+
+ int32_t width; /* the width of the framebuffer (in pixels) */
+ int32_t height; /* the height of the framebuffer (in pixels) */
+ uint32_t line_length; /* the length of a row of pixels (in bytes) */
+ uint32_t mem_length; /* the length of the framebuffer (in bytes) */
+ uint8_t depth; /* the depth of a pixel (in bits) */
+
+ /*
+ * Framebuffer page directory
+ *
+ * Each directory page holds PAGE_SIZE / sizeof(*pd)
+ * framebuffer pages, and can thus map up to PAGE_SIZE *
+ * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
+ * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+ * 64 bit. 256 directories give enough room for a 512 Meg
+ * framebuffer with a max resolution of 12,800x10,240. Should
+ * be enough for a while with room leftover for expansion.
+ */
+ unsigned long pd[256];
+};
+
+/*
+ * Wart: xenkbd needs to know default resolution. Put it here until a
+ * better solution is found, but don't leak it to the backend.
+ */
+#ifdef __KERNEL__
+#define XENFB_WIDTH 800
+#define XENFB_HEIGHT 600
+#define XENFB_DEPTH 32
+#endif
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/fsif.h b/xen/public/io/fsif.h
new file mode 100644
index 00000000..04ef9284
--- /dev/null
+++ b/xen/public/io/fsif.h
@@ -0,0 +1,191 @@
+/******************************************************************************
+ * fsif.h
+ *
+ * Interface to FS level split device drivers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007, Grzegorz Milos, <gm281@cam.ac.uk>.
+ */
+
+#ifndef __XEN_PUBLIC_IO_FSIF_H__
+#define __XEN_PUBLIC_IO_FSIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+#define REQ_FILE_OPEN 1
+#define REQ_FILE_CLOSE 2
+#define REQ_FILE_READ 3
+#define REQ_FILE_WRITE 4
+#define REQ_STAT 5
+#define REQ_FILE_TRUNCATE 6
+#define REQ_REMOVE 7
+#define REQ_RENAME 8
+#define REQ_CREATE 9
+#define REQ_DIR_LIST 10
+#define REQ_CHMOD 11
+#define REQ_FS_SPACE 12
+#define REQ_FILE_SYNC 13
+
+struct fsif_open_request {
+ grant_ref_t gref;
+};
+
+struct fsif_close_request {
+ uint32_t fd;
+};
+
+struct fsif_read_request {
+ uint32_t fd;
+ int32_t pad;
+ uint64_t len;
+ uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
+};
+
+struct fsif_write_request {
+ uint32_t fd;
+ int32_t pad;
+ uint64_t len;
+ uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
+};
+
+struct fsif_stat_request {
+ uint32_t fd;
+};
+
+/* This structure is a copy of some fields from stat structure, returned
+ * via the ring. */
+struct fsif_stat_response {
+ int32_t stat_mode;
+ uint32_t stat_uid;
+ uint32_t stat_gid;
+ int32_t stat_ret;
+ int64_t stat_size;
+ int64_t stat_atime;
+ int64_t stat_mtime;
+ int64_t stat_ctime;
+};
+
+struct fsif_truncate_request {
+ uint32_t fd;
+ int32_t pad;
+ int64_t length;
+};
+
+struct fsif_remove_request {
+ grant_ref_t gref;
+};
+
+struct fsif_rename_request {
+ uint16_t old_name_offset;
+ uint16_t new_name_offset;
+ grant_ref_t gref;
+};
+
+struct fsif_create_request {
+ int8_t directory;
+ int8_t pad;
+ int16_t pad2;
+ int32_t mode;
+ grant_ref_t gref;
+};
+
+struct fsif_list_request {
+ uint32_t offset;
+ grant_ref_t gref;
+};
+
+#define NR_FILES_SHIFT 0
+#define NR_FILES_SIZE 16 /* 16 bits for the number of files mask */
+#define NR_FILES_MASK (((1ULL << NR_FILES_SIZE) - 1) << NR_FILES_SHIFT)
+#define ERROR_SIZE 32 /* 32 bits for the error mask */
+#define ERROR_SHIFT (NR_FILES_SIZE + NR_FILES_SHIFT)
+#define ERROR_MASK (((1ULL << ERROR_SIZE) - 1) << ERROR_SHIFT)
+#define HAS_MORE_SHIFT (ERROR_SHIFT + ERROR_SIZE)
+#define HAS_MORE_FLAG (1ULL << HAS_MORE_SHIFT)
+
+struct fsif_chmod_request {
+ uint32_t fd;
+ int32_t mode;
+};
+
+struct fsif_space_request {
+ grant_ref_t gref;
+};
+
+struct fsif_sync_request {
+ uint32_t fd;
+};
+
+
+/* FS operation request */
+struct fsif_request {
+ uint8_t type; /* Type of the request */
+ uint8_t pad;
+ uint16_t id; /* Request ID, copied to the response */
+ uint32_t pad2;
+ union {
+ struct fsif_open_request fopen;
+ struct fsif_close_request fclose;
+ struct fsif_read_request fread;
+ struct fsif_write_request fwrite;
+ struct fsif_stat_request fstat;
+ struct fsif_truncate_request ftruncate;
+ struct fsif_remove_request fremove;
+ struct fsif_rename_request frename;
+ struct fsif_create_request fcreate;
+ struct fsif_list_request flist;
+ struct fsif_chmod_request fchmod;
+ struct fsif_space_request fspace;
+ struct fsif_sync_request fsync;
+ } u;
+};
+typedef struct fsif_request fsif_request_t;
+
+/* FS operation response */
+struct fsif_response {
+ uint16_t id;
+ uint16_t pad1;
+ uint32_t pad2;
+ union {
+ uint64_t ret_val;
+ struct fsif_stat_response fstat;
+ };
+};
+
+typedef struct fsif_response fsif_response_t;
+
+#define FSIF_RING_ENTRY_SIZE 64
+
+#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \
+ sizeof(grant_ref_t) + 1)
+#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \
+ sizeof(grant_ref_t) + 1)
+
+DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response);
+
+#define STATE_INITIALISED "init"
+#define STATE_READY "ready"
+
+
+
+#endif
diff --git a/xen/public/io/kbdif.h b/xen/public/io/kbdif.h
new file mode 100644
index 00000000..e1d66a51
--- /dev/null
+++ b/xen/public/io/kbdif.h
@@ -0,0 +1,132 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/* Pointer movement event */
+#define XENKBD_TYPE_MOTION 1
+/* Event type 2 currently not used */
+/* Key event (includes pointer buttons) */
+#define XENKBD_TYPE_KEY 3
+/*
+ * Pointer position event
+ * Capable backend sets feature-abs-pointer in xenstore.
+ * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
+ * request-abs-update in xenstore.
+ */
+#define XENKBD_TYPE_POS 4
+
+struct xenkbd_motion
+{
+ uint8_t type; /* XENKBD_TYPE_MOTION */
+ int32_t rel_x; /* relative X motion */
+ int32_t rel_y; /* relative Y motion */
+ int32_t rel_z; /* relative Z motion (wheel) */
+};
+
+struct xenkbd_key
+{
+ uint8_t type; /* XENKBD_TYPE_KEY */
+ uint8_t pressed; /* 1 if pressed; 0 otherwise */
+ uint32_t keycode; /* KEY_* from linux/input.h */
+};
+
+struct xenkbd_position
+{
+ uint8_t type; /* XENKBD_TYPE_POS */
+ int32_t abs_x; /* absolute X position (in FB pixels) */
+ int32_t abs_y; /* absolute Y position (in FB pixels) */
+ int32_t rel_z; /* relative Z motion (wheel) */
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+ uint8_t type;
+ struct xenkbd_motion motion;
+ struct xenkbd_key key;
+ struct xenkbd_position pos;
+ char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+ uint8_t type;
+ char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+ ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+ (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+ ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+ (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+};
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/netif.h b/xen/public/io/netif.h
new file mode 100644
index 00000000..fbb5c272
--- /dev/null
+++ b/xen/public/io/netif.h
@@ -0,0 +1,205 @@
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * This is the 'wire' format for packets:
+ * Request 1: netif_tx_request -- NETTXF_* (any flags)
+ * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
+ * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
+ * Request 4: netif_tx_request -- NETTXF_more_data
+ * Request 5: netif_tx_request -- NETTXF_more_data
+ * ...
+ * Request N: netif_tx_request -- 0
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank (0)
+#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data (2)
+#define NETTXF_more_data (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info (3)
+#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
+
+struct netif_tx_request {
+ grant_ref_t gref; /* Reference to buffer page */
+ uint16_t offset; /* Offset within buffer page */
+ uint16_t flags; /* NETTXF_* */
+ uint16_t id; /* Echoed in response message. */
+ uint16_t size; /* Packet size in bytes. */
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MAX (4)
+
+/* netif_extra_info flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types - only TCPv4 currently supported. */
+#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
+
+/*
+ * This structure needs to fit within both netif_tx_request and
+ * netif_rx_response for compatibility.
+ */
+struct netif_extra_info {
+ uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */
+ uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
+
+ union {
+ /*
+ * XEN_NETIF_EXTRA_TYPE_GSO:
+ */
+ struct {
+ /*
+ * Maximum payload size of each segment. For example, for TCP this
+ * is just the path MSS.
+ */
+ uint16_t size;
+
+ /*
+ * GSO type. This determines the protocol of the packet and any
+ * extra features required to segment the packet properly.
+ */
+ uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
+
+ /* Future expansion. */
+ uint8_t pad;
+
+ /*
+ * GSO features. This specifies any extra GSO features required
+ * to process this packet, such as ECN support for TCPv4.
+ */
+ uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
+ } gso;
+
+ /*
+ * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+ * Backend advertises availability via 'feature-multicast-control'
+ * xenbus node containing value '1'.
+ * Frontend requests this feature by advertising
+ * 'request-multicast-control' xenbus node containing value '1'.
+ * If multicast control is requested then multicast flooding is
+ * disabled and the frontend must explicitly register its interest
+ * in multicast groups using dummy transmit requests containing
+ * MCAST_{ADD,DEL} extra-info fragments.
+ */
+ struct {
+ uint8_t addr[6]; /* Address to add/remove. */
+ } mcast;
+
+ uint16_t pad[3];
+ } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+ uint16_t id;
+ int16_t status; /* NETIF_RSP_* */
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+ uint16_t id; /* Echoed in response message. */
+ grant_ref_t gref; /* Reference to incoming granted frame */
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank (1)
+#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data (2)
+#define NETRXF_more_data (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info (3)
+#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+
+struct netif_rx_response {
+ uint16_t id;
+ uint16_t offset; /* Offset in page of start of received packet */
+ uint16_t flags; /* NETRXF_* */
+ int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED -2
+#define NETIF_RSP_ERROR -1
+#define NETIF_RSP_OKAY 0
+/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
+#define NETIF_RSP_NULL 1
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/pciif.h b/xen/public/io/pciif.h
new file mode 100644
index 00000000..0a0ffcc6
--- /dev/null
+++ b/xen/public/io/pciif.h
@@ -0,0 +1,101 @@
+/*
+ * PCI Backend/Frontend Common Data Structures & Macros
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#ifndef __XEN_PCI_COMMON_H__
+#define __XEN_PCI_COMMON_H__
+
+/* Be sure to bump this number if you change this file */
+#define XEN_PCI_MAGIC "7"
+
+/* xen_pci_sharedinfo flags */
+#define _XEN_PCIF_active (0)
+#define XEN_PCIF_active (1<<_XEN_PCI_active)
+
+/* xen_pci_op commands */
+#define XEN_PCI_OP_conf_read (0)
+#define XEN_PCI_OP_conf_write (1)
+#define XEN_PCI_OP_enable_msi (2)
+#define XEN_PCI_OP_disable_msi (3)
+#define XEN_PCI_OP_enable_msix (4)
+#define XEN_PCI_OP_disable_msix (5)
+
+/* xen_pci_op error numbers */
+#define XEN_PCI_ERR_success (0)
+#define XEN_PCI_ERR_dev_not_found (-1)
+#define XEN_PCI_ERR_invalid_offset (-2)
+#define XEN_PCI_ERR_access_denied (-3)
+#define XEN_PCI_ERR_not_implemented (-4)
+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
+#define XEN_PCI_ERR_op_failed (-5)
+
+/*
+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
+ * Should not exceed 128
+ */
+#define SH_INFO_MAX_VEC 128
+
+struct xen_msix_entry {
+ uint16_t vector;
+ uint16_t entry;
+};
+struct xen_pci_op {
+ /* IN: what action to perform: XEN_PCI_OP_* */
+ uint32_t cmd;
+
+ /* OUT: will contain an error number (if any) from errno.h */
+ int32_t err;
+
+ /* IN: which device to touch */
+ uint32_t domain; /* PCI Domain/Segment */
+ uint32_t bus;
+ uint32_t devfn;
+
+ /* IN: which configuration registers to touch */
+ int32_t offset;
+ int32_t size;
+
+ /* IN/OUT: Contains the result after a READ or the value to WRITE */
+ uint32_t value;
+ /* IN: Contains extra infor for this operation */
+ uint32_t info;
+ /*IN: param for msi-x */
+ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
+};
+
+struct xen_pci_sharedinfo {
+ /* flags - XEN_PCIF_* */
+ uint32_t flags;
+ struct xen_pci_op op;
+};
+
+#endif /* __XEN_PCI_COMMON_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/protocols.h b/xen/public/io/protocols.h
new file mode 100644
index 00000000..77bd1bdd
--- /dev/null
+++ b/xen/public/io/protocols.h
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
+#define XEN_IO_PROTO_ABI_IA64 "ia64-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__ia64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/xen/public/io/ring.h b/xen/public/io/ring.h
new file mode 100644
index 00000000..6ce1d0d4
--- /dev/null
+++ b/xen/public/io/ring.h
@@ -0,0 +1,307 @@
+/******************************************************************************
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+#include "../xen-compat.h"
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#define xen_mb() mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#endif
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
+#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x))
+#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __RING_SIZE(_s, _sz) \
+ (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ * DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ * mytag_sring_t - The shared ring.
+ * mytag_front_ring_t - The 'front' half of the ring.
+ * mytag_back_ring_t - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ * mytag_front_ring_t front_ring;
+ * SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ * mytag_back_ring_t back_ring;
+ * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
+ \
+/* Shared ring entry */ \
+union __name##_sring_entry { \
+ __req_t req; \
+ __rsp_t rsp; \
+}; \
+ \
+/* Shared ring page */ \
+struct __name##_sring { \
+ RING_IDX req_prod, req_event; \
+ RING_IDX rsp_prod, rsp_event; \
+ uint8_t pad[48]; \
+ union __name##_sring_entry ring[1]; /* variable-length */ \
+}; \
+ \
+/* "Front" end's private variables */ \
+struct __name##_front_ring { \
+ RING_IDX req_prod_pvt; \
+ RING_IDX rsp_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* "Back" end's private variables */ \
+struct __name##_back_ring { \
+ RING_IDX rsp_prod_pvt; \
+ RING_IDX req_cons; \
+ unsigned int nr_ents; \
+ struct __name##_sring *sring; \
+}; \
+ \
+/* Syntactic sugar */ \
+typedef struct __name##_sring __name##_sring_t; \
+typedef struct __name##_front_ring __name##_front_ring_t; \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do { \
+ (_s)->req_prod = (_s)->rsp_prod = 0; \
+ (_s)->req_event = (_s)->rsp_event = 1; \
+ (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do { \
+ (_r)->req_prod_pvt = 0; \
+ (_r)->rsp_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do { \
+ (_r)->rsp_prod_pvt = 0; \
+ (_r)->req_cons = 0; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+ (_r)->sring = (_s); \
+} while (0)
+
+/* Initialize to existing shared indexes -- for recovery */
+#define FRONT_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->req_prod_pvt = (_s)->req_prod; \
+ (_r)->rsp_cons = (_s)->rsp_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, __size) do { \
+ (_r)->sring = (_s); \
+ (_r)->rsp_prod_pvt = (_s)->rsp_prod; \
+ (_r)->req_cons = (_s)->req_prod; \
+ (_r)->nr_ents = __RING_SIZE(_s, __size); \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r) \
+ ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r) \
+ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r) \
+ (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
+ ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \
+ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ ((_r)->req_cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
+ ((((_r)->sring->req_prod - (_r)->req_cons) < \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \
+ ((_r)->sring->req_prod - (_r)->req_cons) : \
+ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx) \
+ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
+ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do { \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = (_r)->req_prod_pvt; \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do { \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ * is a boolean return value. True indicates that the receiver requires an
+ * asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ * The second argument is a boolean return value. True indicates that there
+ * are pending messages on the ring (i.e., the connection should not be put
+ * to sleep).
+ *
+ * These macros will set the req_event/rsp_event field to trigger a
+ * notification on the very next message that is enqueued. If you want to
+ * create batches of work (i.e., only receive a notification after several
+ * messages have been enqueued) then you will need to create a customised
+ * version of the FINAL_CHECK macro in your own code, which sets the event
+ * field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->req_prod; \
+ RING_IDX __new = (_r)->req_prod_pvt; \
+ xen_wmb(); /* back sees requests /before/ updated producer index */ \
+ (_r)->sring->req_prod = __new; \
+ xen_mb(); /* back sees new requests /before/ we check req_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
+ RING_IDX __old = (_r)->sring->rsp_prod; \
+ RING_IDX __new = (_r)->rsp_prod_pvt; \
+ xen_wmb(); /* front sees resps /before/ updated producer index */ \
+ (_r)->sring->rsp_prod = __new; \
+ xen_mb(); /* front sees new resps /before/ we check rsp_event */ \
+ (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
+ (RING_IDX)(__new - __old)); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->req_event = (_r)->req_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+ if (_work_to_do) break; \
+ (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
+ xen_mb(); \
+ (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
+} while (0)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/tpmif.h b/xen/public/io/tpmif.h
new file mode 100644
index 00000000..02ccdab4
--- /dev/null
+++ b/xen/public/io/tpmif.h
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * tpmif.h
+ *
+ * TPM I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, IBM Corporation
+ *
+ * Author: Stefan Berger, stefanb@us.ibm.com
+ * Grant table support: Mahadevan Gomathisankaran
+ *
+ * This code has been derived from tools/libxc/xen/io/netif.h
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_TPMIF_H__
+#define __XEN_PUBLIC_IO_TPMIF_H__
+
+#include "../grant_table.h"
+
+struct tpmif_tx_request {
+ unsigned long addr; /* Machine address of packet. */
+ grant_ref_t ref; /* grant table access reference */
+ uint16_t unused;
+ uint16_t size; /* Packet size in bytes. */
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
+
+/*
+ * The TPMIF_TX_RING_SIZE defines the number of pages the
+ * front-end and backend can exchange (= size of array).
+ */
+typedef uint32_t TPMIF_RING_IDX;
+
+#define TPMIF_TX_RING_SIZE 1
+
+/* This structure must fit in a memory page. */
+
+struct tpmif_ring {
+ struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
+
+struct tpmif_tx_interface {
+ struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/xenbus.h b/xen/public/io/xenbus.h
new file mode 100644
index 00000000..4a053df2
--- /dev/null
+++ b/xen/public/io/xenbus.h
@@ -0,0 +1,80 @@
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
+
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6,
+
+ /*
+ * Reconfiguring: The device is being reconfigured.
+ */
+ XenbusStateReconfiguring = 7,
+
+ XenbusStateReconfigured = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/io/xs_wire.h b/xen/public/io/xs_wire.h
new file mode 100644
index 00000000..dd2d9667
--- /dev/null
+++ b/xen/public/io/xs_wire.h
@@ -0,0 +1,132 @@
+/*
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type
+{
+ XS_DEBUG,
+ XS_DIRECTORY,
+ XS_READ,
+ XS_GET_PERMS,
+ XS_WATCH,
+ XS_UNWATCH,
+ XS_TRANSACTION_START,
+ XS_TRANSACTION_END,
+ XS_INTRODUCE,
+ XS_RELEASE,
+ XS_GET_DOMAIN_PATH,
+ XS_WRITE,
+ XS_MKDIR,
+ XS_RM,
+ XS_SET_PERMS,
+ XS_WATCH_EVENT,
+ XS_ERROR,
+ XS_IS_DOMAIN_INTRODUCED,
+ XS_RESUME,
+ XS_SET_TARGET
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+#ifdef linux_specific
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+ int errnum;
+ const char *errstring;
+};
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+ = {
+ XSD_ERROR(EINVAL),
+ XSD_ERROR(EACCES),
+ XSD_ERROR(EEXIST),
+ XSD_ERROR(EISDIR),
+ XSD_ERROR(ENOENT),
+ XSD_ERROR(ENOMEM),
+ XSD_ERROR(ENOSPC),
+ XSD_ERROR(EIO),
+ XSD_ERROR(ENOTEMPTY),
+ XSD_ERROR(ENOSYS),
+ XSD_ERROR(EROFS),
+ XSD_ERROR(EBUSY),
+ XSD_ERROR(EAGAIN),
+ XSD_ERROR(EISCONN)
+};
+#endif
+
+struct xsd_sockmsg
+{
+ uint32_t type; /* XS_??? */
+ uint32_t req_id;/* Request identifier, echoed in daemon's response. */
+ uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
+ uint32_t len; /* Length of data following this. */
+
+ /* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type
+{
+ XS_WATCH_PATH = 0,
+ XS_WATCH_TOKEN
+};
+
+/* Inter-domain shared memory communications. */
+#define XENSTORE_RING_SIZE 1024
+typedef uint32_t XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
+struct xenstore_domain_interface {
+ char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+ char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+ XENSTORE_RING_IDX req_cons, req_prod;
+ XENSTORE_RING_IDX rsp_cons, rsp_prod;
+};
+
+/* Violating this is very bad. See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/kexec.h b/xen/public/kexec.h
new file mode 100644
index 00000000..fc19f2fe
--- /dev/null
+++ b/xen/public/kexec.h
@@ -0,0 +1,189 @@
+/******************************************************************************
+ * kexec.h - Public portion
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@verge.net.au>
+ * - Magnus Damm <magnus@valinux.co.jp>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+
+/* This file describes the Kexec / Kdump hypercall interface for Xen.
+ *
+ * Kexec under vanilla Linux allows a user to reboot the physical machine
+ * into a new user-specified kernel. The Xen port extends this idea
+ * to allow rebooting of the machine from dom0. When kexec for dom0
+ * is used to reboot, both the hypervisor and the domains get replaced
+ * with some other kernel. It is possible to kexec between vanilla
+ * Linux and Xen and back again. Xen to Xen works well too.
+ *
+ * The hypercall interface for kexec can be divided into three main
+ * types of hypercall operations:
+ *
+ * 1) Range information:
+ * This is used by the dom0 kernel to ask the hypervisor about various
+ * address information. This information is needed to allow kexec-tools
+ * to fill in the ELF headers for /proc/vmcore properly.
+ *
+ * 2) Load and unload of images:
+ * There are no big surprises here, the kexec binary from kexec-tools
+ * runs in userspace in dom0. The tool loads/unloads data into the
+ * dom0 kernel such as new kernel, initramfs and hypervisor. When
+ * loaded the dom0 kernel performs a load hypercall operation, and
+ * before releasing all page references the dom0 kernel calls unload.
+ *
+ * 3) Kexec operation:
+ * This is used to start a previously loaded kernel.
+ */
+
+#include "xen.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#define KEXEC_XEN_NO_PAGES 17
+#endif
+
+/*
+ * Prototype for this hypercall is:
+ * int kexec_op(int cmd, void *args)
+ * @cmd == KEXEC_CMD_...
+ * KEXEC operation to perform
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Kexec supports two types of operation:
+ * - kexec into a regular kernel, very similar to a standard reboot
+ * - KEXEC_TYPE_DEFAULT is used to specify this type
+ * - kexec into a special "crash kernel", aka kexec-on-panic
+ * - KEXEC_TYPE_CRASH is used to specify this type
+ * - parts of our system may be broken at kexec-on-panic time
+ * - the code should be kept as simple and self-contained as possible
+ */
+
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH 1
+
+
+/* The kexec implementation for Xen allows the user to load two
+ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.
+ * All data needed for a kexec reboot is kept in one xen_kexec_image_t
+ * per "instance". The data mainly consists of machine address lists to pages
+ * together with destination addresses. The data in xen_kexec_image_t
+ * is passed to the "code page" which is one page of code that performs
+ * the final relocations before jumping to the new kernel.
+ */
+
+typedef struct xen_kexec_image {
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned long page_list[KEXEC_XEN_NO_PAGES];
+#endif
+#if defined(__ia64__)
+ unsigned long reboot_code_buffer;
+#endif
+ unsigned long indirection_page;
+ unsigned long start_address;
+} xen_kexec_image_t;
+
+/*
+ * Perform kexec having previously loaded a kexec or kdump kernel
+ * as appropriate.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ */
+#define KEXEC_CMD_kexec 0
+typedef struct xen_kexec_exec {
+ int type;
+} xen_kexec_exec_t;
+
+/*
+ * Load/Unload kernel image for kexec or kdump.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ * image == relocation information for kexec (ignored for unload) [in]
+ */
+#define KEXEC_CMD_kexec_load 1
+#define KEXEC_CMD_kexec_unload 2
+typedef struct xen_kexec_load {
+ int type;
+ xen_kexec_image_t image;
+} xen_kexec_load_t;
+
+#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */
+#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */
+#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */
+#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap
+ * Note that although this is adjacent
+ * to Xen it exists in a separate EFI
+ * region on ia64, and thus needs to be
+ * inserted into iomem_machine separately */
+#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of
+ * the ia64_boot_param */
+#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of
+ * of the EFI Memory Map */
+#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */
+
+/*
+ * Find the address and size of certain memory areas
+ * range == KEXEC_RANGE_... [in]
+ * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]
+ * size == number of bytes reserved in window [out]
+ * start == address of the first byte in the window [out]
+ */
+#define KEXEC_CMD_kexec_get_range 3
+typedef struct xen_kexec_range {
+ int range;
+ int nr;
+ unsigned long size;
+ unsigned long start;
+} xen_kexec_range_t;
+
+/* vmcoreinfo stuff */
+#define VMCOREINFO_BYTES (4096)
+#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN"
+void arch_crash_save_vmcoreinfo(void);
+void vmcoreinfo_append_str(const char *fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+#define VMCOREINFO_PAGESIZE(value) \
+ vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name)
+#define VMCOREINFO_STRUCT_SIZE(name) \
+ vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \
+ (unsigned long)offsetof(struct name, field))
+
+#endif /* _XEN_PUBLIC_KEXEC_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/libelf.h b/xen/public/libelf.h
new file mode 100644
index 00000000..d2383301
--- /dev/null
+++ b/xen/public/libelf.h
@@ -0,0 +1,265 @@
+/******************************************************************************
+ * libelf.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XC_LIBELF__
+#define __XC_LIBELF__ 1
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
+#define XEN_ELF_LITTLE_ENDIAN
+#else
+#error define architectural endianness
+#endif
+
+#undef ELFSIZE
+#include "elfnote.h"
+#include "elfstructs.h"
+#include "features.h"
+
+/* ------------------------------------------------------------------------ */
+
+typedef union {
+ Elf32_Ehdr e32;
+ Elf64_Ehdr e64;
+} elf_ehdr;
+
+typedef union {
+ Elf32_Phdr e32;
+ Elf64_Phdr e64;
+} elf_phdr;
+
+typedef union {
+ Elf32_Shdr e32;
+ Elf64_Shdr e64;
+} elf_shdr;
+
+typedef union {
+ Elf32_Sym e32;
+ Elf64_Sym e64;
+} elf_sym;
+
+typedef union {
+ Elf32_Rel e32;
+ Elf64_Rel e64;
+} elf_rel;
+
+typedef union {
+ Elf32_Rela e32;
+ Elf64_Rela e64;
+} elf_rela;
+
+typedef union {
+ Elf32_Note e32;
+ Elf64_Note e64;
+} elf_note;
+
+struct elf_binary {
+ /* elf binary */
+ const char *image;
+ size_t size;
+ char class;
+ char data;
+
+ const elf_ehdr *ehdr;
+ const char *sec_strtab;
+ const elf_shdr *sym_tab;
+ const char *sym_strtab;
+
+ /* loaded to */
+ char *dest;
+ uint64_t pstart;
+ uint64_t pend;
+ uint64_t reloc_offset;
+
+ uint64_t bsd_symtab_pstart;
+ uint64_t bsd_symtab_pend;
+
+#ifndef __XEN__
+ /* misc */
+ FILE *log;
+#endif
+ int verbose;
+};
+
+/* ------------------------------------------------------------------------ */
+/* accessing elf header fields */
+
+#ifdef XEN_ELF_BIG_ENDIAN
+# define NATIVE_ELFDATA ELFDATA2MSB
+#else
+# define NATIVE_ELFDATA ELFDATA2LSB
+#endif
+
+#define elf_32bit(elf) (ELFCLASS32 == (elf)->class)
+#define elf_64bit(elf) (ELFCLASS64 == (elf)->class)
+#define elf_msb(elf) (ELFDATA2MSB == (elf)->data)
+#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data)
+#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data)
+
+#define elf_uval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_sval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_size(elf, str) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? sizeof((str)->e64) : sizeof((str)->e32))
+
+uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr,
+ uint64_t offset, size_t size);
+int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
+ uint64_t offset, size_t size);
+
+uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_tools.c */
+
+int elf_shdr_count(struct elf_binary *elf);
+int elf_phdr_count(struct elf_binary *elf);
+
+const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name);
+const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index);
+const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index);
+
+const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr);
+
+const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr);
+const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr);
+
+const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol);
+const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index);
+
+const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
+const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
+
+int elf_is_elfbinary(const void *image);
+int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_loader.c */
+
+int elf_init(struct elf_binary *elf, const char *image, size_t size);
+#ifdef __XEN__
+void elf_set_verbose(struct elf_binary *elf);
+#else
+void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose);
+#endif
+
+void elf_parse_binary(struct elf_binary *elf);
+void elf_load_binary(struct elf_binary *elf);
+
+void *elf_get_ptr(struct elf_binary *elf, unsigned long addr);
+uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol);
+
+void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_relocate.c */
+
+int elf_reloc(struct elf_binary *elf);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_dominfo.c */
+
+#define UNSET_ADDR ((uint64_t)-1)
+
+enum xen_elfnote_type {
+ XEN_ENT_NONE = 0,
+ XEN_ENT_LONG = 1,
+ XEN_ENT_STR = 2
+};
+
+struct xen_elfnote {
+ enum xen_elfnote_type type;
+ const char *name;
+ union {
+ const char *str;
+ uint64_t num;
+ } data;
+};
+
+struct elf_dom_parms {
+ /* raw */
+ const char *guest_info;
+ const void *elf_note_start;
+ const void *elf_note_end;
+ struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1];
+
+ /* parsed */
+ char guest_os[16];
+ char guest_ver[16];
+ char xen_ver[16];
+ char loader[16];
+ int pae;
+ int bsd_symtab;
+ uint64_t virt_base;
+ uint64_t virt_entry;
+ uint64_t virt_hypercall;
+ uint64_t virt_hv_start_low;
+ uint64_t elf_paddr_offset;
+ uint32_t f_supported[XENFEAT_NR_SUBMAPS];
+ uint32_t f_required[XENFEAT_NR_SUBMAPS];
+
+ /* calculated */
+ uint64_t virt_offset;
+ uint64_t virt_kstart;
+ uint64_t virt_kend;
+};
+
+static inline void elf_xen_feature_set(int nr, uint32_t * addr)
+{
+ addr[nr >> 5] |= 1 << (nr & 31);
+}
+static inline int elf_xen_feature_get(int nr, uint32_t * addr)
+{
+ return !!(addr[nr >> 5] & (1 << (nr & 31)));
+}
+
+int elf_xen_parse_features(const char *features,
+ uint32_t *supported,
+ uint32_t *required);
+int elf_xen_parse_note(struct elf_binary *elf,
+ struct elf_dom_parms *parms,
+ const elf_note *note);
+int elf_xen_parse_guest_info(struct elf_binary *elf,
+ struct elf_dom_parms *parms);
+int elf_xen_parse(struct elf_binary *elf,
+ struct elf_dom_parms *parms);
+
+#endif /* __XC_LIBELF__ */
diff --git a/xen/public/memory.h b/xen/public/memory.h
new file mode 100644
index 00000000..d7b9fff9
--- /dev/null
+++ b/xen/public/memory.h
@@ -0,0 +1,312 @@
+/******************************************************************************
+ * memory.h
+ *
+ * Memory reservation and information.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns the
+ * number of extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap 6
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+/*
+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O
+ * devices often have a 32-bit limitation even in 64-bit systems). If zero
+ * then the user has no addressing restriction. This field is not used by
+ * XENMEM_decrease_reservation.
+ */
+#define XENMEMF_address_bits(x) (x)
+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
+/* NUMA node to allocate from. */
+#define XENMEMF_node(x) (((x) + 1) << 8)
+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+#endif
+
+struct xen_memory_reservation {
+
+ /*
+ * XENMEM_increase_reservation:
+ * OUT: MFN (*not* GMFN) bases of extents that were allocated
+ * XENMEM_decrease_reservation:
+ * IN: GMFN bases of extents to free
+ * XENMEM_populate_physmap:
+ * IN: GPFN bases of extents to populate with memory
+ * OUT: GMFN bases of extents that were allocated
+ * (NB. This command also updates the mach_to_phys translation table)
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+ /* Number of extents, and size/alignment of each (2^extent_order pages). */
+ xen_ulong_t nr_extents;
+ unsigned int extent_order;
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+ /* XENMEMF flags. */
+ unsigned int mem_flags;
+#else
+ unsigned int address_bits;
+#endif
+
+ /*
+ * Domain whose reservation is being changed.
+ * Unprivileged domains can specify only DOMID_SELF.
+ */
+ domid_t domid;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange 11
+struct xen_memory_exchange {
+ /*
+ * [IN] Details of memory extents to be exchanged (GMFN bases).
+ * Note that @in.address_bits is ignored and unused.
+ */
+ struct xen_memory_reservation in;
+
+ /*
+ * [IN/OUT] Details of new memory extents.
+ * We require that:
+ * 1. @in.domid == @out.domid
+ * 2. @in.nr_extents << @in.extent_order ==
+ * @out.nr_extents << @out.extent_order
+ * 3. @in.extent_start and @out.extent_start lists must not overlap
+ * 4. @out.extent_start lists GPFN bases to be populated
+ * 5. @out.extent_start is overwritten with allocated GMFN bases
+ */
+ struct xen_memory_reservation out;
+
+ /*
+ * [OUT] Number of input extents that were successfully exchanged:
+ * 1. The first @nr_exchanged input extents were successfully
+ * deallocated.
+ * 2. The corresponding first entries in the output extent list correctly
+ * indicate the GMFNs that were successfully exchanged.
+ * 3. All other input and output extents are untouched.
+ * 4. If not all input exents are exchanged then the return code of this
+ * command will be non-zero.
+ * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+ */
+ xen_ulong_t nr_exchanged;
+};
+typedef struct xen_memory_exchange xen_memory_exchange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
+
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page 2
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of domid_t.
+ */
+#define XENMEM_current_reservation 3
+#define XENMEM_maximum_reservation 4
+
+/*
+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.
+ */
+#define XENMEM_maximum_gpfn 14
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list 5
+struct xen_machphys_mfn_list {
+ /*
+ * Size of the 'extent_start' array. Fewer entries will be filled if the
+ * machphys table is smaller than max_extents * 2MB.
+ */
+ unsigned int max_extents;
+
+ /*
+ * Pointer to buffer to fill with list of extent starts. If there are
+ * any large discontiguities in the machine address space, 2MB gaps in
+ * the machphys table will be represented by an MFN base of zero.
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+ /*
+ * Number of extents written to the above array. This will be smaller
+ * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+ */
+ unsigned int nr_extents;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping 12
+struct xen_machphys_mapping {
+ xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */
+ xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap 7
+struct xen_add_to_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* Source mapping space. */
+#define XENMAPSPACE_shared_info 0 /* shared info page */
+#define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_mfn 2 /* usual MFN */
+ unsigned int space;
+
+ /* Index into source mapping space. */
+ xen_ulong_t idx;
+
+ /* GPFN where the source mapping page should appear. */
+ xen_pfn_t gpfn;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ xen_pfn_t gpfn;
+};
+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
+
+/*
+ * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
+ * code on failure. This call only works for auto-translated guests.
+ */
+#define XENMEM_translate_gpfn_list 8
+struct xen_translate_gpfn_list {
+ /* Which domain to translate for? */
+ domid_t domid;
+
+ /* Length of list. */
+ xen_ulong_t nr_gpfns;
+
+ /* List of GPFNs to translate. */
+ XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
+
+ /*
+ * Output list to contain MFN translations. May be the same as the input
+ * list (in which case each input GPFN is overwritten with the output MFN).
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
+};
+typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_memory_map 9
+struct xen_memory_map {
+ /*
+ * On call the number of entries which can be stored in buffer. On
+ * return the number of entries which have been stored in
+ * buffer.
+ */
+ unsigned int nr_entries;
+
+ /*
+ * Entries in the buffer are in the same format as returned by the
+ * BIOS INT 0x15 EAX=0xE820 call.
+ */
+ XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_machine_memory_map 10
+
+/*
+ * Set the pseudo-physical memory map of a domain, as returned by
+ * XENMEM_memory_map.
+ * arg == addr of xen_foreign_memory_map_t.
+ */
+#define XENMEM_set_memory_map 13
+struct xen_foreign_memory_map {
+ domid_t domid;
+ struct xen_memory_map map;
+};
+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/nmi.h b/xen/public/nmi.h
new file mode 100644
index 00000000..b2b84018
--- /dev/null
+++ b/xen/public/nmi.h
@@ -0,0 +1,78 @@
+/******************************************************************************
+ * nmi.h
+ *
+ * NMI callback registration and reason codes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error 0
+#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error)
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown 2
+#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == pointer to xennmi_callback structure.
+ */
+#define XENNMI_register_callback 0
+struct xennmi_callback {
+ unsigned long handler_address;
+ unsigned long pad;
+};
+typedef struct xennmi_callback xennmi_callback_t;
+DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/physdev.h b/xen/public/physdev.h
new file mode 100644
index 00000000..8057277b
--- /dev/null
+++ b/xen/public/physdev.h
@@ -0,0 +1,219 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_PHYSDEV_H__
+#define __XEN_PUBLIC_PHYSDEV_H__
+
+/*
+ * Prototype for this hypercall is:
+ * int physdev_op(int cmd, void *args)
+ * @cmd == PHYSDEVOP_??? (physdev operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Notify end-of-interrupt (EOI) for the specified IRQ.
+ * @arg == pointer to physdev_eoi structure.
+ */
+#define PHYSDEVOP_eoi 12
+struct physdev_eoi {
+ /* IN */
+ uint32_t irq;
+};
+typedef struct physdev_eoi physdev_eoi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+
+/*
+ * Query the status of an IRQ line.
+ * @arg == pointer to physdev_irq_status_query structure.
+ */
+#define PHYSDEVOP_irq_status_query 5
+struct physdev_irq_status_query {
+ /* IN */
+ uint32_t irq;
+ /* OUT */
+ uint32_t flags; /* XENIRQSTAT_* */
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
+
+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
+#define _XENIRQSTAT_needs_eoi (0)
+#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi)
+
+/* IRQ shared by multiple guests? */
+#define _XENIRQSTAT_shared (1)
+#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared)
+
+/*
+ * Set the current VCPU's I/O privilege level.
+ * @arg == pointer to physdev_set_iopl structure.
+ */
+#define PHYSDEVOP_set_iopl 6
+struct physdev_set_iopl {
+ /* IN */
+ uint32_t iopl;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
+
+/*
+ * Set the current VCPU's I/O-port permissions bitmap.
+ * @arg == pointer to physdev_set_iobitmap structure.
+ */
+#define PHYSDEVOP_set_iobitmap 7
+struct physdev_set_iobitmap {
+ /* IN */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+ XEN_GUEST_HANDLE(uint8) bitmap;
+#else
+ uint8_t *bitmap;
+#endif
+ uint32_t nr_ports;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
+
+/*
+ * Read or write an IO-APIC register.
+ * @arg == pointer to physdev_apic structure.
+ */
+#define PHYSDEVOP_apic_read 8
+#define PHYSDEVOP_apic_write 9
+struct physdev_apic {
+ /* IN */
+ unsigned long apic_physbase;
+ uint32_t reg;
+ /* IN or OUT */
+ uint32_t value;
+};
+typedef struct physdev_apic physdev_apic_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
+
+/*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_alloc_irq_vector 10
+#define PHYSDEVOP_free_irq_vector 11
+struct physdev_irq {
+ /* IN */
+ uint32_t irq;
+ /* IN or OUT */
+ uint32_t vector;
+};
+typedef struct physdev_irq physdev_irq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
+
+#define MAP_PIRQ_TYPE_MSI 0x0
+#define MAP_PIRQ_TYPE_GSI 0x1
+#define MAP_PIRQ_TYPE_UNKNOWN 0x2
+
+#define PHYSDEVOP_map_pirq 13
+struct physdev_map_pirq {
+ domid_t domid;
+ /* IN */
+ int type;
+ /* IN */
+ int index;
+ /* IN or OUT */
+ int pirq;
+ /* IN */
+ int bus;
+ /* IN */
+ int devfn;
+ /* IN */
+ int entry_nr;
+ /* IN */
+ uint64_t table_base;
+};
+typedef struct physdev_map_pirq physdev_map_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
+
+#define PHYSDEVOP_unmap_pirq 14
+struct physdev_unmap_pirq {
+ domid_t domid;
+ /* IN */
+ int pirq;
+};
+
+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
+
+#define PHYSDEVOP_manage_pci_add 15
+#define PHYSDEVOP_manage_pci_remove 16
+struct physdev_manage_pci {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+
+typedef struct physdev_manage_pci physdev_manage_pci_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+
+/*
+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
+ * hypercall since 0x00030202.
+ */
+struct physdev_op {
+ uint32_t cmd;
+ union {
+ struct physdev_irq_status_query irq_status_query;
+ struct physdev_set_iopl set_iopl;
+ struct physdev_set_iobitmap set_iobitmap;
+ struct physdev_apic apic_op;
+ struct physdev_irq irq_op;
+ } u;
+};
+typedef struct physdev_op physdev_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
+
+/*
+ * Notify that some PIRQ-bound event channels have been unmasked.
+ * ** This command is obsolete since interface version 0x00030202 and is **
+ * ** unsupported by newer versions of Xen. **
+ */
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
+
+/*
+ * These all-capitals physdev operation names are superceded by the new names
+ * (defined above) since interface version 0x00030202.
+ */
+#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query
+#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl
+#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap
+#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read
+#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write
+#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector
+#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
+#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared
+
+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/platform.h b/xen/public/platform.h
new file mode 100644
index 00000000..eee047be
--- /dev/null
+++ b/xen/public/platform.h
@@ -0,0 +1,346 @@
+/******************************************************************************
+ * platform.h
+ *
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime 17
+struct xenpf_settime {
+ /* IN variables. */
+ uint32_t secs;
+ uint32_t nsecs;
+ uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype 31
+struct xenpf_add_memtype {
+ /* IN variables. */
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
+ uint32_t type;
+ /* OUT variables. */
+ uint32_t handle;
+ uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype 32
+struct xenpf_del_memtype {
+ /* IN variables. */
+ uint32_t handle;
+ uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype 33
+struct xenpf_read_memtype {
+ /* IN variables. */
+ uint32_t reg;
+ /* OUT variables. */
+ xen_pfn_t mfn;
+ uint64_t nr_mfns;
+ uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update 35
+struct xenpf_microcode_update {
+ /* IN variables. */
+ XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */
+ uint32_t length; /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk 39
+#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */
+#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */
+struct xenpf_platform_quirk {
+ /* IN variables. */
+ uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
+
+#define XENPF_firmware_info 50
+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+ /* IN variables. */
+ uint32_t type;
+ uint32_t index;
+ /* OUT variables. */
+ union {
+ struct {
+ /* Int13, Fn48: Check Extensions Present. */
+ uint8_t device; /* %dl: bios device number */
+ uint8_t version; /* %ah: major version */
+ uint16_t interface_support; /* %cx: support bitmap */
+ /* Int13, Fn08: Legacy Get Device Parameters. */
+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
+ uint8_t legacy_max_head; /* %dh: max head # */
+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ /* NB. First uint16_t of buffer must be set to buffer size. */
+ XEN_GUEST_HANDLE(void) edd_params;
+ } disk_info; /* XEN_FW_DISK_INFO */
+ struct {
+ uint8_t device; /* bios device number */
+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+ struct {
+ /* Int10, AX=4F15: Get EDID info. */
+ uint8_t capabilities;
+ uint8_t edid_transfer_time;
+ /* must refer to 128-byte buffer */
+ XEN_GUEST_HANDLE(uint8) edid;
+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+ } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep 51
+struct xenpf_enter_acpi_sleep {
+ /* IN variables */
+ uint16_t pm1a_cnt_val; /* PM1a control value. */
+ uint16_t pm1b_cnt_val; /* PM1b control value. */
+ uint32_t sleep_state; /* Which state to enter (Sn). */
+ uint32_t flags; /* Must be zero. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq 52
+struct xenpf_change_freq {
+ /* IN variables */
+ uint32_t flags; /* Must be zero. */
+ uint32_t cpu; /* Physical cpu. */
+ uint64_t freq; /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime 53
+struct xenpf_getidletime {
+ /* IN/OUT variables */
+ /* IN: CPUs to interrogate; OUT: subset of IN which are present */
+ XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
+ /* IN variables */
+ /* Size of cpumap bitmap. */
+ uint32_t cpumap_nr_cpus;
+ /* Must be indexable for every cpu in cpumap_bitmap. */
+ XEN_GUEST_HANDLE(uint64) idletime;
+ /* OUT variables */
+ /* System time when the idletime snapshots were taken. */
+ uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
+
+#define XENPF_set_processor_pminfo 54
+
+/* ability bits */
+#define XEN_PROCESSOR_PM_CX 1
+#define XEN_PROCESSOR_PM_PX 2
+#define XEN_PROCESSOR_PM_TX 4
+
+/* cmd type */
+#define XEN_PM_CX 0
+#define XEN_PM_PX 1
+#define XEN_PM_TX 2
+
+/* Px sub info type */
+#define XEN_PX_PCT 1
+#define XEN_PX_PSS 2
+#define XEN_PX_PPC 4
+#define XEN_PX_PSD 8
+
+struct xen_power_register {
+ uint32_t space_id;
+ uint32_t bit_width;
+ uint32_t bit_offset;
+ uint32_t access_size;
+ uint64_t address;
+};
+
+struct xen_processor_csd {
+ uint32_t domain; /* domain number of one dependent group */
+ uint32_t coord_type; /* coordination type */
+ uint32_t num; /* number of processors in same domain */
+};
+typedef struct xen_processor_csd xen_processor_csd_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
+
+struct xen_processor_cx {
+ struct xen_power_register reg; /* GAS for Cx trigger register */
+ uint8_t type; /* cstate value, c0: 0, c1: 1, ... */
+ uint32_t latency; /* worst latency (ms) to enter/exit this cstate */
+ uint32_t power; /* average power consumption(mW) */
+ uint32_t dpcnt; /* number of dependency entries */
+ XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
+};
+typedef struct xen_processor_cx xen_processor_cx_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
+
+struct xen_processor_flags {
+ uint32_t bm_control:1;
+ uint32_t bm_check:1;
+ uint32_t has_cst:1;
+ uint32_t power_setup_done:1;
+ uint32_t bm_rld_set:1;
+};
+
+struct xen_processor_power {
+ uint32_t count; /* number of C state entries in array below */
+ struct xen_processor_flags flags; /* global flags of this processor */
+ XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */
+};
+
+struct xen_pct_register {
+ uint8_t descriptor;
+ uint16_t length;
+ uint8_t space_id;
+ uint8_t bit_width;
+ uint8_t bit_offset;
+ uint8_t reserved;
+ uint64_t address;
+};
+
+struct xen_processor_px {
+ uint64_t core_frequency; /* megahertz */
+ uint64_t power; /* milliWatts */
+ uint64_t transition_latency; /* microseconds */
+ uint64_t bus_master_latency; /* microseconds */
+ uint64_t control; /* control value */
+ uint64_t status; /* success indicator */
+};
+typedef struct xen_processor_px xen_processor_px_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
+
+struct xen_psd_package {
+ uint64_t num_entries;
+ uint64_t revision;
+ uint64_t domain;
+ uint64_t coord_type;
+ uint64_t num_processors;
+};
+
+struct xen_processor_performance {
+ uint32_t flags; /* flag for Px sub info type */
+ uint32_t platform_limit; /* Platform limitation on freq usage */
+ struct xen_pct_register control_register;
+ struct xen_pct_register status_register;
+ uint32_t state_count; /* total available performance states */
+ XEN_GUEST_HANDLE(xen_processor_px_t) states;
+ struct xen_psd_package domain_info;
+ uint32_t shared_type; /* coordination type of this processor */
+};
+typedef struct xen_processor_performance xen_processor_performance_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);
+
+struct xenpf_set_processor_pminfo {
+ /* IN variables */
+ uint32_t id; /* ACPI CPU ID */
+ uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */
+ union {
+ struct xen_processor_power power;/* Cx: _CST/_CSD */
+ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ };
+};
+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
+
+struct xen_platform_op {
+ uint32_t cmd;
+ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+ union {
+ struct xenpf_settime settime;
+ struct xenpf_add_memtype add_memtype;
+ struct xenpf_del_memtype del_memtype;
+ struct xenpf_read_memtype read_memtype;
+ struct xenpf_microcode_update microcode;
+ struct xenpf_platform_quirk platform_quirk;
+ struct xenpf_firmware_info firmware_info;
+ struct xenpf_enter_acpi_sleep enter_acpi_sleep;
+ struct xenpf_change_freq change_freq;
+ struct xenpf_getidletime getidletime;
+ struct xenpf_set_processor_pminfo set_pminfo;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/sched.h b/xen/public/sched.h
new file mode 100644
index 00000000..2227a95b
--- /dev/null
+++ b/xen/public/sched.h
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * sched.h
+ *
+ * Scheduler state interactions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include "event_channel.h"
+
+/*
+ * The prototype for this hypercall is:
+ * long sched_op(int cmd, void *arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ *
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ * long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
+ * == SHUTDOWN_* code (SCHEDOP_shutdown)
+ * This legacy version is available to new guests as sched_op_compat().
+ */
+
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield 0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block 1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown structure.
+ */
+#define SCHEDOP_shutdown 2
+struct sched_shutdown {
+ unsigned int reason; /* SHUTDOWN_* */
+};
+typedef struct sched_shutdown sched_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll structure.
+ */
+#define SCHEDOP_poll 3
+struct sched_poll {
+ XEN_GUEST_HANDLE(evtchn_port_t) ports;
+ unsigned int nr_ports;
+ uint64_t timeout;
+};
+typedef struct sched_poll sched_poll_t;
+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains. A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown structure.
+ */
+#define SCHEDOP_remote_shutdown 4
+struct sched_remote_shutdown {
+ domid_t domain_id; /* Remote domain ID */
+ unsigned int reason; /* SHUTDOWN_xxx reason */
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
+#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
+#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/sysctl.h b/xen/public/sysctl.h
new file mode 100644
index 00000000..6b10954a
--- /dev/null
+++ b/xen/public/sysctl.h
@@ -0,0 +1,308 @@
+/******************************************************************************
+ * sysctl.h
+ *
+ * System management operations. For use by node control stack.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_SYSCTL_H__
+#define __XEN_PUBLIC_SYSCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "sysctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+#include "domctl.h"
+
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006
+
+/*
+ * Read console content from Xen buffer ring.
+ */
+#define XEN_SYSCTL_readconsole 1
+struct xen_sysctl_readconsole {
+ /* IN: Non-zero -> clear after reading. */
+ uint8_t clear;
+ /* IN: Non-zero -> start index specified by @index field. */
+ uint8_t incremental;
+ uint8_t pad0, pad1;
+ /*
+ * IN: Start index for consuming from ring buffer (if @incremental);
+ * OUT: End index after consuming from ring buffer.
+ */
+ uint32_t index;
+ /* IN: Virtual address to write console data. */
+ XEN_GUEST_HANDLE_64(char) buffer;
+ /* IN: Size of buffer; OUT: Bytes written to buffer. */
+ uint32_t count;
+};
+typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);
+
+/* Get trace buffers machine base address */
+#define XEN_SYSCTL_tbuf_op 2
+struct xen_sysctl_tbuf_op {
+ /* IN variables */
+#define XEN_SYSCTL_TBUFOP_get_info 0
+#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1
+#define XEN_SYSCTL_TBUFOP_set_evt_mask 2
+#define XEN_SYSCTL_TBUFOP_set_size 3
+#define XEN_SYSCTL_TBUFOP_enable 4
+#define XEN_SYSCTL_TBUFOP_disable 5
+ uint32_t cmd;
+ /* IN/OUT variables */
+ struct xenctl_cpumap cpu_mask;
+ uint32_t evt_mask;
+ /* OUT variables */
+ uint64_aligned_t buffer_mfn;
+ uint32_t size;
+};
+typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
+
+/*
+ * Get physical information about the host machine
+ */
+#define XEN_SYSCTL_physinfo 3
+ /* (x86) The platform supports HVM guests. */
+#define _XEN_SYSCTL_PHYSCAP_hvm 0
+#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm)
+ /* (x86) The platform supports HVM-guest direct access to I/O devices. */
+#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1
+#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio)
+struct xen_sysctl_physinfo {
+ uint32_t threads_per_core;
+ uint32_t cores_per_socket;
+ uint32_t nr_cpus;
+ uint32_t nr_nodes;
+ uint32_t cpu_khz;
+ uint64_aligned_t total_pages;
+ uint64_aligned_t free_pages;
+ uint64_aligned_t scrub_pages;
+ uint32_t hw_cap[8];
+
+ /*
+ * IN: maximum addressable entry in the caller-provided cpu_to_node array.
+ * OUT: largest cpu identifier in the system.
+ * If OUT is greater than IN then the cpu_to_node array is truncated!
+ */
+ uint32_t max_cpu_id;
+ /*
+ * If not NULL, this array is filled with node identifier for each cpu.
+ * If a cpu has no node information (e.g., cpu not present) then the
+ * sentinel value ~0u is written.
+ * The size of this array is specified by the caller in @max_cpu_id.
+ * If the actual @max_cpu_id is smaller than the array then the trailing
+ * elements of the array will not be written by the sysctl.
+ */
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+
+ /* XEN_SYSCTL_PHYSCAP_??? */
+ uint32_t capabilities;
+};
+typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
+
+/*
+ * Get the ID of the current scheduler.
+ */
+#define XEN_SYSCTL_sched_id 4
+struct xen_sysctl_sched_id {
+ /* OUT variable */
+ uint32_t sched_id;
+};
+typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t);
+
+/* Interface for controlling Xen software performance counters. */
+#define XEN_SYSCTL_perfc_op 5
+/* Sub-operations: */
+#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */
+#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */
+struct xen_sysctl_perfc_desc {
+ char name[80]; /* name of perf counter */
+ uint32_t nr_vals; /* number of values for this counter */
+};
+typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t);
+typedef uint32_t xen_sysctl_perfc_val_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t);
+
+struct xen_sysctl_perfc_op {
+ /* IN variables. */
+ uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */
+ /* OUT variables. */
+ uint32_t nr_counters; /* number of counters description */
+ uint32_t nr_vals; /* number of values */
+ /* counter information (or NULL) */
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+ /* counter values (or NULL) */
+ XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+};
+typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
+
+#define XEN_SYSCTL_getdomaininfolist 6
+struct xen_sysctl_getdomaininfolist {
+ /* IN variables. */
+ domid_t first_domain;
+ uint32_t max_domains;
+ XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+ /* OUT variables. */
+ uint32_t num_domains;
+};
+typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
+
+/* Inject debug keys into Xen. */
+#define XEN_SYSCTL_debug_keys 7
+struct xen_sysctl_debug_keys {
+ /* IN variables. */
+ XEN_GUEST_HANDLE_64(char) keys;
+ uint32_t nr_keys;
+};
+typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo 8
+struct xen_sysctl_cpuinfo {
+ uint64_aligned_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);
+struct xen_sysctl_getcpuinfo {
+ /* IN variables. */
+ uint32_t max_cpus;
+ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+ /* OUT variables. */
+ uint32_t nr_cpus;
+};
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);
+
+#define XEN_SYSCTL_availheap 9
+struct xen_sysctl_availheap {
+ /* IN variables. */
+ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */
+ uint32_t max_bitwidth; /* Largest address width (zero if don't care). */
+ int32_t node; /* NUMA node of interest (-1 for all nodes). */
+ /* OUT variables. */
+ uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */
+};
+typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
+
+#define XEN_SYSCTL_get_pmstat 10
+struct pm_px_val {
+ uint64_aligned_t freq; /* Px core frequency */
+ uint64_aligned_t residency; /* Px residency time */
+ uint64_aligned_t count; /* Px transition count */
+};
+typedef struct pm_px_val pm_px_val_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_val_t);
+
+struct pm_px_stat {
+ uint8_t total; /* total Px states */
+ uint8_t usable; /* usable Px states */
+ uint8_t last; /* last Px state */
+ uint8_t cur; /* current Px state */
+ XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */
+ XEN_GUEST_HANDLE_64(pm_px_val_t) pt;
+};
+typedef struct pm_px_stat pm_px_stat_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);
+
+struct pm_cx_stat {
+ uint32_t nr; /* entry nr in triggers & residencies, including C0 */
+ uint32_t last; /* last Cx state */
+ uint64_aligned_t idle_time; /* idle time from boot */
+ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */
+ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
+};
+
+struct xen_sysctl_get_pmstat {
+#define PMSTAT_CATEGORY_MASK 0xf0
+#define PMSTAT_PX 0x10
+#define PMSTAT_CX 0x20
+#define PMSTAT_get_max_px (PMSTAT_PX | 0x1)
+#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2)
+#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3)
+#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1)
+#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2)
+#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3)
+ uint32_t type;
+ uint32_t cpuid;
+ union {
+ struct pm_px_stat getpx;
+ struct pm_cx_stat getcx;
+ /* other struct for tx, etc */
+ } u;
+};
+typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
+
+#define XEN_SYSCTL_cpu_hotplug 11
+struct xen_sysctl_cpu_hotplug {
+ /* IN variables */
+ uint32_t cpu; /* Physical cpu. */
+#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0
+#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1
+ uint32_t op; /* hotplug opcode */
+};
+typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
+
+
+struct xen_sysctl {
+ uint32_t cmd;
+ uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
+ union {
+ struct xen_sysctl_readconsole readconsole;
+ struct xen_sysctl_tbuf_op tbuf_op;
+ struct xen_sysctl_physinfo physinfo;
+ struct xen_sysctl_sched_id sched_id;
+ struct xen_sysctl_perfc_op perfc_op;
+ struct xen_sysctl_getdomaininfolist getdomaininfolist;
+ struct xen_sysctl_debug_keys debug_keys;
+ struct xen_sysctl_getcpuinfo getcpuinfo;
+ struct xen_sysctl_availheap availheap;
+ struct xen_sysctl_get_pmstat get_pmstat;
+ struct xen_sysctl_cpu_hotplug cpu_hotplug;
+ uint8_t pad[128];
+ } u;
+};
+typedef struct xen_sysctl xen_sysctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
+
+#endif /* __XEN_PUBLIC_SYSCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/trace.h b/xen/public/trace.h
new file mode 100644
index 00000000..0fc864dd
--- /dev/null
+++ b/xen/public/trace.h
@@ -0,0 +1,206 @@
+/******************************************************************************
+ * include/public/trace.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
+ */
+
+#ifndef __XEN_PUBLIC_TRACE_H__
+#define __XEN_PUBLIC_TRACE_H__
+
+#define TRACE_EXTRA_MAX 7
+#define TRACE_EXTRA_SHIFT 28
+
+/* Trace classes */
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN 0x0001f000 /* General trace */
+#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */
+#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */
+#define TRC_HVM 0x0008f000 /* Xen HVM trace */
+#define TRC_MEM 0x0010f000 /* Xen memory trace */
+#define TRC_PV 0x0020f000 /* Xen PV traces */
+#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */
+#define TRC_PM 0x0080f000 /* Xen power management trace */
+#define TRC_ALL 0x0ffff000
+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
+#define TRC_HD_CYCLE_FLAG (1UL<<31)
+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
+#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+
+/* trace subclasses for SVM */
+#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */
+#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */
+
+#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */
+#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */
+
+/* Trace events per class */
+#define TRC_LOST_RECORDS (TRC_GEN + 1)
+#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2)
+#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3)
+
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1)
+#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2)
+#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3)
+#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4)
+#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5)
+#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6)
+#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7)
+#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8)
+#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9)
+#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
+
+#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1)
+#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2)
+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
+
+#define TRC_PV_HYPERCALL (TRC_PV + 1)
+#define TRC_PV_TRAP (TRC_PV + 3)
+#define TRC_PV_PAGE_FAULT (TRC_PV + 4)
+#define TRC_PV_FORCED_INVALID_OP (TRC_PV + 5)
+#define TRC_PV_EMULATE_PRIVOP (TRC_PV + 6)
+#define TRC_PV_EMULATE_4GB (TRC_PV + 7)
+#define TRC_PV_MATH_STATE_RESTORE (TRC_PV + 8)
+#define TRC_PV_PAGING_FIXUP (TRC_PV + 9)
+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10)
+#define TRC_PV_PTWR_EMULATION (TRC_PV + 11)
+#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV + 12)
+ /* Indicates that addresses in trace record are 64 bits */
+#define TRC_64_FLAG (0x100)
+
+#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1)
+#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2)
+#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3)
+#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4)
+#define TRC_SHADOW_MMIO (TRC_SHADOW + 5)
+#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6)
+#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7)
+#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15)
+
+/* trace events per subclass */
+#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01)
+#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02)
+#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)
+#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01)
+#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)
+#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02)
+#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)
+#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03)
+#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04)
+#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05)
+#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06)
+#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07)
+#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08)
+#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)
+#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09)
+#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)
+#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A)
+#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B)
+#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C)
+#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D)
+#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E)
+#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F)
+#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10)
+#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11)
+#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12)
+#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13)
+#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
+#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
+#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_IO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x16)
+#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_MMIO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x17)
+#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18)
+#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19)
+#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
+
+/* trace subclasses for power management */
+#define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */
+#define TRC_PM_IDLE 0x00802000 /* xen cpu idle events */
+
+/* trace events for per class */
+#define TRC_PM_FREQ_CHANGE (TRC_PM_FREQ + 0x01)
+#define TRC_PM_IDLE_ENTRY (TRC_PM_IDLE + 0x01)
+#define TRC_PM_IDLE_EXIT (TRC_PM_IDLE + 0x02)
+
+/* This structure represents a single trace buffer record. */
+struct t_rec {
+ uint32_t event:28;
+ uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */
+ uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */
+ union {
+ struct {
+ uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
+ uint32_t extra_u32[7]; /* event data items */
+ } cycles;
+ struct {
+ uint32_t extra_u32[7]; /* event data items */
+ } nocycles;
+ } u;
+};
+
+/*
+ * This structure contains the metadata for a single trace buffer. The head
+ * field, indexes into an array of struct t_rec's.
+ */
+struct t_buf {
+ /* Assume the data buffer size is X. X is generally not a power of 2.
+ * CONS and PROD are incremented modulo (2*X):
+ * 0 <= cons < 2*X
+ * 0 <= prod < 2*X
+ * This is done because addition modulo X breaks at 2^32 when X is not a
+ * power of 2:
+ * (((2^32 - 1) % X) + 1) % X != (2^32) % X
+ */
+ uint32_t cons; /* Offset of next item to be consumed by control tools. */
+ uint32_t prod; /* Offset of next item to be produced by Xen. */
+ /* Records follow immediately after the meta-data header. */
+};
+
+#endif /* __XEN_PUBLIC_TRACE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+ */
diff --git a/xen/public/vcpu.h b/xen/public/vcpu.h
new file mode 100644
index 00000000..ab654937
--- /dev/null
+++ b/xen/public/vcpu.h
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * vcpu.h
+ *
+ * VCPU initialisation, query, and hotplug.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VCPU_H__
+#define __XEN_PUBLIC_VCPU_H__
+
+/*
+ * Prototype for this hypercall is:
+ * int vcpu_op(int cmd, int vcpuid, void *extra_args)
+ * @cmd == VCPUOP_??? (VCPU operation).
+ * @vcpuid == VCPU to operate on.
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Initialise a VCPU. Each VCPU can be initialised only once. A
+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
+ *
+ * @extra_arg == pointer to vcpu_guest_context structure containing initial
+ * state for the VCPU.
+ */
+#define VCPUOP_initialise 0
+
+/*
+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
+ * if the VCPU has not been initialised (VCPUOP_initialise).
+ */
+#define VCPUOP_up 1
+
+/*
+ * Bring down a VCPU (i.e., make it non-runnable).
+ * There are a few caveats that callers should observe:
+ * 1. This operation may return, and VCPU_is_up may return false, before the
+ * VCPU stops running (i.e., the command is asynchronous). It is a good
+ * idea to ensure that the VCPU has entered a non-critical loop before
+ * bringing it down. Alternatively, this operation is guaranteed
+ * synchronous if invoked by the VCPU itself.
+ * 2. After a VCPU is initialised, there is currently no way to drop all its
+ * references to domain memory. Even a VCPU that is down still holds
+ * memory references via its pagetable base pointer and GDT. It is good
+ * practise to move a VCPU onto an 'idle' or default page table, LDT and
+ * GDT before bringing it down.
+ */
+#define VCPUOP_down 2
+
+/* Returns 1 if the given VCPU is up. */
+#define VCPUOP_is_up 3
+
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
+ */
+#define VCPUOP_get_runstate_info 4
+struct vcpu_runstate_info {
+ /* VCPU's current state (RUNSTATE_*). */
+ int state;
+ /* When was current state entered (system time, ns)? */
+ uint64_t state_entry_time;
+ /*
+ * Time spent in each RUNSTATE_* (ns). The sum of these times is
+ * guaranteed not to drift from system time.
+ */
+ uint64_t time[4];
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running 0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked 2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline 3
+
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ * 1. The registered address may be virtual or physical or guest handle,
+ * depending on the platform. Virtual address or guest handle should be
+ * registered on x86 systems.
+ * 2. Only one shared area may be registered per VCPU. The shared area is
+ * updated by the hypervisor each time the VCPU is scheduled. Thus
+ * runstate.state will always be RUNSTATE_running and
+ * runstate.state_entry_time will indicate the system time at which the
+ * VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+struct vcpu_register_runstate_memory_area {
+ union {
+ XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
+ struct vcpu_runstate_info *v;
+ uint64_t p;
+ } addr;
+};
+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
+
+/*
+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
+ * which can be set via these commands. Periods smaller than one millisecond
+ * may not be supported.
+ */
+#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */
+struct vcpu_set_periodic_timer {
+ uint64_t period_ns;
+};
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+
+/*
+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
+ * timer which can be set via these commands.
+ */
+#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
+struct vcpu_set_singleshot_timer {
+ uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */
+ uint32_t flags; /* VCPU_SSHOTTMR_??? */
+};
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+/* Flags to VCPUOP_set_singleshot_timer. */
+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
+#define _VCPU_SSHOTTMR_future (0)
+#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
+
+/*
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure. This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
+ */
+#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */
+struct vcpu_register_vcpu_info {
+ uint64_t mfn; /* mfn of page to place vcpu_info */
+ uint32_t offset; /* offset within page */
+ uint32_t rsvd; /* unused */
+};
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi 11
+
+/*
+ * Get the physical ID information for a pinned vcpu's underlying physical
+ * processor. The physical ID informmation is architecture-specific.
+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and
+ * greater are reserved.
+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
+ */
+#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */
+struct vcpu_get_physid {
+ uint64_t phys_id;
+};
+typedef struct vcpu_get_physid vcpu_get_physid_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
+#define xen_vcpu_physid_to_x86_apicid(physid) \
+ ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid)))
+#define xen_vcpu_physid_to_x86_acpiid(physid) \
+ ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32)))
+
+#endif /* __XEN_PUBLIC_VCPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/version.h b/xen/public/version.h
new file mode 100644
index 00000000..944ca620
--- /dev/null
+++ b/xen/public/version.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * version.h
+ *
+ * Xen version, type, and compile information.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VERSION_H__
+#define __XEN_PUBLIC_VERSION_H__
+
+/* NB. All ops return zero on success, except XENVER_{version,pagesize} */
+
+/* arg == NULL; returns major:minor (16:16). */
+#define XENVER_version 0
+
+/* arg == xen_extraversion_t. */
+#define XENVER_extraversion 1
+typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
+
+/* arg == xen_compile_info_t. */
+#define XENVER_compile_info 2
+struct xen_compile_info {
+ char compiler[64];
+ char compile_by[16];
+ char compile_domain[32];
+ char compile_date[32];
+};
+typedef struct xen_compile_info xen_compile_info_t;
+
+#define XENVER_capabilities 3
+typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
+
+#define XENVER_changeset 4
+typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
+
+#define XENVER_platform_parameters 5
+struct xen_platform_parameters {
+ unsigned long virt_start;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
+
+#define XENVER_get_features 6
+struct xen_feature_info {
+ unsigned int submap_idx; /* IN: which 32-bit submap to return */
+ uint32_t submap; /* OUT: 32-bit submap */
+};
+typedef struct xen_feature_info xen_feature_info_t;
+
+/* Declares the features reported by XENVER_get_features. */
+#include "features.h"
+
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
+/* arg == xen_domain_handle_t. */
+#define XENVER_guest_handle 8
+
+#endif /* __XEN_PUBLIC_VERSION_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/xen-compat.h b/xen/public/xen-compat.h
new file mode 100644
index 00000000..329be076
--- /dev/null
+++ b/xen/public/xen-compat.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * xen-compat.h
+ *
+ * Guest OS interface to Xen. Compatibility layer.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Christian Limpach
+ */
+
+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
+#define __XEN_PUBLIC_XEN_COMPAT_H__
+
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Xen is built with matching headers and implements the latest interface. */
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+#elif !defined(__XEN_INTERFACE_VERSION__)
+/* Guests which do not specify a version get the legacy interface. */
+#define __XEN_INTERFACE_VERSION__ 0x00000000
+#endif
+
+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__
+#error "These header files do not support the requested interface version."
+#endif
+
+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */
diff --git a/xen/public/xen.h b/xen/public/xen.h
new file mode 100644
index 00000000..084bb90a
--- /dev/null
+++ b/xen/public/xen.h
@@ -0,0 +1,657 @@
+/******************************************************************************
+ * xen.h
+ *
+ * Guest OS interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include <sys/types.h>
+
+#include "xen-compat.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/xen.h"
+#elif defined(__ia64__)
+#include "arch-ia64.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+DEFINE_XEN_GUEST_HANDLE(char);
+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
+DEFINE_XEN_GUEST_HANDLE(int);
+__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int);
+DEFINE_XEN_GUEST_HANDLE(long);
+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+DEFINE_XEN_GUEST_HANDLE(void);
+
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+#endif
+
+/*
+ * HYPERCALLS
+ */
+
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_mmu_update 1
+#define __HYPERVISOR_set_gdt 2
+#define __HYPERVISOR_stack_switch 3
+#define __HYPERVISOR_set_callbacks 4
+#define __HYPERVISOR_fpu_taskswitch 5
+#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */
+#define __HYPERVISOR_platform_op 7
+#define __HYPERVISOR_set_debugreg 8
+#define __HYPERVISOR_get_debugreg 9
+#define __HYPERVISOR_update_descriptor 10
+#define __HYPERVISOR_memory_op 12
+#define __HYPERVISOR_multicall 13
+#define __HYPERVISOR_update_va_mapping 14
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
+#define __HYPERVISOR_xen_version 17
+#define __HYPERVISOR_console_io 18
+#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */
+#define __HYPERVISOR_grant_table_op 20
+#define __HYPERVISOR_vm_assist 21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_xsm_op 27
+#define __HYPERVISOR_nmi_op 28
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_callback_op 30
+#define __HYPERVISOR_xenoprof_op 31
+#define __HYPERVISOR_event_channel_op 32
+#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_hvm_op 34
+#define __HYPERVISOR_sysctl 35
+#define __HYPERVISOR_domctl 36
+#define __HYPERVISOR_kexec_op 37
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0 48
+#define __HYPERVISOR_arch_1 49
+#define __HYPERVISOR_arch_2 50
+#define __HYPERVISOR_arch_3 51
+#define __HYPERVISOR_arch_4 52
+#define __HYPERVISOR_arch_5 53
+#define __HYPERVISOR_arch_6 54
+#define __HYPERVISOR_arch_7 55
+
+/*
+ * HYPERCALL COMPATIBILITY.
+ */
+
+/* New sched_op hypercall introduced in 0x00030101. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030101
+#undef __HYPERVISOR_sched_op
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
+#endif
+
+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030202
+#undef __HYPERVISOR_event_channel_op
+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
+#undef __HYPERVISOR_physdev_op
+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
+#endif
+
+/* New platform_op hypercall introduced in 0x00030204. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030204
+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
+#endif
+
+/*
+ * VIRTUAL INTERRUPTS
+ *
+ * Virtual interrupts that a guest OS may receive from Xen.
+ *
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
+ */
+#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
+#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
+#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
+#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
+#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
+#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
+
+/* Architecture-specific VIRQ definitions. */
+#define VIRQ_ARCH_0 16
+#define VIRQ_ARCH_1 17
+#define VIRQ_ARCH_2 18
+#define VIRQ_ARCH_3 19
+#define VIRQ_ARCH_4 20
+#define VIRQ_ARCH_5 21
+#define VIRQ_ARCH_6 22
+#define VIRQ_ARCH_7 23
+
+#define NR_VIRQS 24
+
+/*
+ * MMU-UPDATE REQUESTS
+ *
+ * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ * ptr[1:0] specifies the appropriate MMU_* command.
+ *
+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
+ * Updates an entry in a page table. If updating an L1 table, and the new
+ * table entry is valid/present, the mapped frame must belong to the FD, if
+ * an FD has been specified. If attempting to map an I/O page then the
+ * caller assumes the privilege of the FD.
+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
+ * ptr[:2] -- Machine address of the page-table entry to modify.
+ * val -- Value to write.
+ *
+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
+ * Updates an entry in the machine->pseudo-physical mapping table.
+ * ptr[:2] -- Machine address within the frame whose mapping to modify.
+ * The frame must belong to the FD, if one is specified.
+ * val -- Value to write into the mapping entry.
+ *
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
+ * with those in @val.
+ */
+#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
+#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */
+
+/*
+ * MMU EXTENDED OPERATIONS
+ *
+ * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ *
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ * The frame must belong to the FD, if one is specified.
+ *
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
+ *
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ * when in user space.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
+ *
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ *
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
+ *
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
+ *
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
+ */
+#define MMUEXT_PIN_L1_TABLE 0
+#define MMUEXT_PIN_L2_TABLE 1
+#define MMUEXT_PIN_L3_TABLE 2
+#define MMUEXT_PIN_L4_TABLE 3
+#define MMUEXT_UNPIN_TABLE 4
+#define MMUEXT_NEW_BASEPTR 5
+#define MMUEXT_TLB_FLUSH_LOCAL 6
+#define MMUEXT_INVLPG_LOCAL 7
+#define MMUEXT_TLB_FLUSH_MULTI 8
+#define MMUEXT_INVLPG_MULTI 9
+#define MMUEXT_TLB_FLUSH_ALL 10
+#define MMUEXT_INVLPG_ALL 11
+#define MMUEXT_FLUSH_CACHE 12
+#define MMUEXT_SET_LDT 13
+#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
+
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+ unsigned int cmd;
+ union {
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE */
+ xen_pfn_t mfn;
+ /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+ unsigned long linear_addr;
+ } arg1;
+ union {
+ /* SET_LDT */
+ unsigned int nr_ents;
+ /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+ XEN_GUEST_HANDLE(void) vcpumask;
+#else
+ void *vcpumask;
+#endif
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
+ } arg2;
+};
+typedef struct mmuext_op mmuext_op_t;
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
+#endif
+
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */
+#define UVMF_NONE (0UL<<0) /* No flushing at all. */
+#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */
+#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK (3UL<<0)
+#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */
+#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */
+#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */
+
+/*
+ * Commands to HYPERVISOR_console_io().
+ */
+#define CONSOLEIO_write 0
+#define CONSOLEIO_read 1
+
+/*
+ * Commands to HYPERVISOR_vm_assist().
+ */
+#define VMASST_CMD_enable 0
+#define VMASST_CMD_disable 1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
+#define VMASST_TYPE_4gb_segments 0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
+#define VMASST_TYPE_4gb_segments_notify 1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
+#define VMASST_TYPE_writable_pagetables 2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3 3
+
+#define MAX_VMASST_TYPE 3
+
+#ifndef __ASSEMBLY__
+
+typedef uint16_t domid_t;
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED (0x7FF0U)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF (0x7FF0U)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
+ * be specified by any calling domain.
+ */
+#define DOMID_IO (0x7FF1U)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
+ * the caller is privileged.
+ */
+#define DOMID_XEN (0x7FF2U)
+
+/*
+ * Send an array of these to HYPERVISOR_mmu_update().
+ * NB. The fields are natural pointer/address size for this architecture.
+ */
+struct mmu_update {
+ uint64_t ptr; /* Machine address of PTE. */
+ uint64_t val; /* New contents of PTE. */
+};
+typedef struct mmu_update mmu_update_t;
+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
+
+/*
+ * Send an array of these to HYPERVISOR_multicall().
+ * NB. The fields are natural register size for this architecture.
+ */
+struct multicall_entry {
+ unsigned long op, result;
+ unsigned long args[6];
+};
+typedef struct multicall_entry multicall_entry_t;
+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
+
+/*
+ * Event channel endpoints per domain:
+ * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ */
+#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+
+struct vcpu_time_info {
+ /*
+ * Updates to the following values are preceded and followed by an
+ * increment of 'version'. The guest can therefore detect updates by
+ * looking for changes to 'version'. If the least-significant bit of
+ * the version number is set then an update is in progress and the guest
+ * must wait to read a consistent set of values.
+ * The correct way to interact with the version number is similar to
+ * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
+ */
+ uint32_t version;
+ uint32_t pad0;
+ uint64_t tsc_timestamp; /* TSC at last update of time vals. */
+ uint64_t system_time; /* Time, in nanosecs, since boot. */
+ /*
+ * Current system time:
+ * system_time +
+ * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
+ * CPU frequency (Hz):
+ * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+ */
+ uint32_t tsc_to_system_mul;
+ int8_t tsc_shift;
+ int8_t pad1[3];
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
+ /*
+ * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+ * a pending notification for a particular VCPU. It is then cleared
+ * by the guest OS /before/ checking for pending work, thus avoiding
+ * a set-and-check race. Note that the mask is only accessed by Xen
+ * on the CPU that is currently hosting the VCPU. This means that the
+ * pending and mask flags can be updated by the guest without special
+ * synchronisation (i.e., no need for the x86 LOCK prefix).
+ * This may seem suboptimal because if the pending flag is set by
+ * a different CPU then an IPI may be scheduled even when the mask
+ * is set. However, note:
+ * 1. The task of 'interrupt holdoff' is covered by the per-event-
+ * channel mask bits. A 'noisy' event that is continually being
+ * triggered can be masked at source at this very precise
+ * granularity.
+ * 2. The main purpose of the per-VCPU mask is therefore to restrict
+ * reentrant execution: whether for concurrency control, or to
+ * prevent unbounded stack usage. Whatever the purpose, we expect
+ * that the mask will be asserted only for short periods at a time,
+ * and so the likelihood of a 'spurious' IPI is suitably small.
+ * The mask is read before making an event upcall to the guest: a
+ * non-zero mask therefore guarantees that the VCPU will not receive
+ * an upcall activation. The mask is cleared when the VCPU requests
+ * to block: this avoids wakeup-waiting races.
+ */
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+#ifndef __XEN__
+typedef struct vcpu_info vcpu_info_t;
+#endif
+
+/*
+ * Xen/kernel shared data -- pointer provided in start_info.
+ *
+ * This structure is defined to be both smaller than a page, and the
+ * only data on the shared page, but may vary in actual size even within
+ * compatible Xen versions; guests should not rely on the size
+ * of this structure remaining constant.
+ */
+struct shared_info {
+ struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
+
+ /*
+ * A domain can create "event channels" on which it can send and receive
+ * asynchronous event notifications. There are three classes of event that
+ * are delivered by this mechanism:
+ * 1. Bi-directional inter- and intra-domain connections. Domains must
+ * arrange out-of-band to set up a connection (usually by allocating
+ * an unbound 'listener' port and avertising that via a storage service
+ * such as xenstore).
+ * 2. Physical interrupts. A domain with suitable hardware-access
+ * privileges can bind an event-channel port to a physical interrupt
+ * source.
+ * 3. Virtual interrupts ('events'). A domain can bind an event-channel
+ * port to a virtual interrupt source, such as the virtual-timer
+ * device or the emergency console.
+ *
+ * Event channels are addressed by a "port index". Each channel is
+ * associated with two bits of information:
+ * 1. PENDING -- notifies the domain that there is a pending notification
+ * to be processed. This bit is cleared by the guest.
+ * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+ * will cause an asynchronous upcall to be scheduled. This bit is only
+ * updated by the guest. It is read-only within Xen. If a channel
+ * becomes pending while the channel is masked then the 'edge' is lost
+ * (i.e., when the channel is unmasked, the guest must manually handle
+ * pending notifications as no upcall will be scheduled by Xen).
+ *
+ * To expedite scanning of pending notifications, any 0->1 pending
+ * transition on an unmasked channel causes a corresponding bit in a
+ * per-vcpu selector word to be set. Each bit in the selector covers a
+ * 'C long' in the PENDING bitfield array.
+ */
+ unsigned long evtchn_pending[sizeof(unsigned long) * 8];
+ unsigned long evtchn_mask[sizeof(unsigned long) * 8];
+
+ /*
+ * Wallclock time: updated only by control software. Guests should base
+ * their gettimeofday() syscall on this wallclock-base value.
+ */
+ uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
+ uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
+ uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
+
+ struct arch_shared_info arch;
+
+};
+#ifndef __XEN__
+typedef struct shared_info shared_info_t;
+#endif
+
+/*
+ * Start-of-day memory layout:
+ * 1. The domain is started within contiguous virtual-memory region.
+ * 2. The contiguous region ends on an aligned 4MB boundary.
+ * 3. This the order of bootstrap elements in the initial virtual region:
+ * a. relocated kernel image
+ * b. initial ram disk [mod_start, mod_len]
+ * c. list of allocated page frames [mfn_list, nr_pages]
+ * d. start_info_t structure [register ESI (x86)]
+ * e. bootstrap page tables [pt_base, CR3 (x86)]
+ * f. bootstrap stack [register ESP (x86)]
+ * 4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ * 5. The initial ram disk may be omitted.
+ * 6. The list of page frames forms a contiguous 'pseudo-physical' memory
+ * layout for the domain. In particular, the bootstrap virtual-memory
+ * region is a 1:1 mapping to the first section of the pseudo-physical map.
+ * 7. All bootstrap elements are mapped read-writable for the guest OS. The
+ * only exception is the bootstrap page table, which is mapped read-only.
+ * 8. There is guaranteed to be at least 512kB padding after the final
+ * bootstrap element. If necessary, the bootstrap virtual region is
+ * extended by an extra 4MB to ensure this.
+ */
+
+#define MAX_GUEST_CMDLINE 1024
+struct start_info {
+ /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
+ char magic[32]; /* "xen-<version>-<platform>". */
+ unsigned long nr_pages; /* Total pages allocated to this domain. */
+ unsigned long shared_info; /* MACHINE address of shared info struct. */
+ uint32_t flags; /* SIF_xxx flags. */
+ xen_pfn_t store_mfn; /* MACHINE page number of shared page. */
+ uint32_t store_evtchn; /* Event channel for store communication. */
+ union {
+ struct {
+ xen_pfn_t mfn; /* MACHINE page number of console page. */
+ uint32_t evtchn; /* Event channel for console page. */
+ } domU;
+ struct {
+ uint32_t info_off; /* Offset of console_info struct. */
+ uint32_t info_size; /* Size of console_info struct from start.*/
+ } dom0;
+ } console;
+ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
+ unsigned long pt_base; /* VIRTUAL address of page directory. */
+ unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */
+ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */
+ unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
+ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
+ int8_t cmd_line[MAX_GUEST_CMDLINE];
+
+ /* hackish, for multiboot compatibility */
+ unsigned mods_count;
+};
+typedef struct start_info start_info_t;
+
+/* New console union for dom0 introduced in 0x00030203. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+#define console_mfn console.domU.mfn
+#define console_evtchn console.domU.evtchn
+#endif
+
+/* These flags are passed in the 'flags' field of start_info_t. */
+#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
+#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD (1<<2) /* Is this the initial control domain? */
+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
+
+typedef struct dom0_vga_console_info {
+ uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB 0x23
+
+ union {
+ struct {
+ /* Font height, in pixels. */
+ uint16_t font_height;
+ /* Cursor location (column, row). */
+ uint16_t cursor_x, cursor_y;
+ /* Number of rows and columns (dimensions in characters). */
+ uint16_t rows, columns;
+ } text_mode_3;
+
+ struct {
+ /* Width and height, in pixels. */
+ uint16_t width, height;
+ /* Bytes per scan line. */
+ uint16_t bytes_per_line;
+ /* Bits per pixel. */
+ uint16_t bits_per_pixel;
+ /* LFB physical address, and size (in units of 64kB). */
+ uint32_t lfb_base;
+ uint32_t lfb_size;
+ /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+ uint8_t red_pos, red_size;
+ uint8_t green_pos, green_size;
+ uint8_t blue_pos, blue_size;
+ uint8_t rsvd_pos, rsvd_size;
+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
+ /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+ uint32_t gbl_caps;
+ /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+ uint16_t mode_attrs;
+#endif
+ } vesa_lfb;
+ } u;
+} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t
+
+typedef uint8_t xen_domain_handle_t[16];
+
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t);
+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
+#endif /* !__ASSEMBLY__ */
+
+/* Default definitions for macros used by domctl/sysctl. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+#endif
+
+#endif /* __XEN_PUBLIC_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/public/xencomm.h b/xen/public/xencomm.h
new file mode 100644
index 00000000..ac45e071
--- /dev/null
+++ b/xen/public/xencomm.h
@@ -0,0 +1,41 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) IBM Corp. 2006
+ */
+
+#ifndef _XEN_XENCOMM_H_
+#define _XEN_XENCOMM_H_
+
+/* A xencomm descriptor is a scatter/gather list containing physical
+ * addresses corresponding to a virtually contiguous memory area. The
+ * hypervisor translates these physical addresses to machine addresses to copy
+ * to and from the virtually contiguous area.
+ */
+
+#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */
+#define XENCOMM_INVALID (~0UL)
+
+struct xencomm_desc {
+ uint32_t magic;
+ uint32_t nr_addrs; /* the number of entries in address[] */
+ uint64_t address[0];
+};
+
+#endif /* _XEN_XENCOMM_H_ */
diff --git a/xen/public/xenoprof.h b/xen/public/xenoprof.h
new file mode 100644
index 00000000..183078d8
--- /dev/null
+++ b/xen/public/xenoprof.h
@@ -0,0 +1,138 @@
+/******************************************************************************
+ * xenoprof.h
+ *
+ * Interface for enabling system wide profiling based on hardware performance
+ * counters
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_PUBLIC_XENOPROF_H__
+#define __XEN_PUBLIC_XENOPROF_H__
+
+#include "xen.h"
+
+/*
+ * Commands to HYPERVISOR_xenoprof_op().
+ */
+#define XENOPROF_init 0
+#define XENOPROF_reset_active_list 1
+#define XENOPROF_reset_passive_list 2
+#define XENOPROF_set_active 3
+#define XENOPROF_set_passive 4
+#define XENOPROF_reserve_counters 5
+#define XENOPROF_counter 6
+#define XENOPROF_setup_events 7
+#define XENOPROF_enable_virq 8
+#define XENOPROF_start 9
+#define XENOPROF_stop 10
+#define XENOPROF_disable_virq 11
+#define XENOPROF_release_counters 12
+#define XENOPROF_shutdown 13
+#define XENOPROF_get_buffer 14
+#define XENOPROF_set_backtrace 15
+#define XENOPROF_last_op 15
+
+#define MAX_OPROF_EVENTS 32
+#define MAX_OPROF_DOMAINS 25
+#define XENOPROF_CPU_TYPE_SIZE 64
+
+/* Xenoprof performance events (not Xen events) */
+struct event_log {
+ uint64_t eip;
+ uint8_t mode;
+ uint8_t event;
+};
+
+/* PC value that indicates a special code */
+#define XENOPROF_ESCAPE_CODE ~0UL
+/* Transient events for the xenoprof->oprofile cpu buf */
+#define XENOPROF_TRACE_BEGIN 1
+
+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
+struct xenoprof_buf {
+ uint32_t event_head;
+ uint32_t event_tail;
+ uint32_t event_size;
+ uint32_t vcpu_id;
+ uint64_t xen_samples;
+ uint64_t kernel_samples;
+ uint64_t user_samples;
+ uint64_t lost_samples;
+ struct event_log event_log[1];
+};
+#ifndef __XEN__
+typedef struct xenoprof_buf xenoprof_buf_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
+#endif
+
+struct xenoprof_init {
+ int32_t num_events;
+ int32_t is_primary;
+ char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+};
+typedef struct xenoprof_init xenoprof_init_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
+
+struct xenoprof_get_buffer {
+ int32_t max_samples;
+ int32_t nbuf;
+ int32_t bufsize;
+ uint64_t buf_gmaddr;
+};
+typedef struct xenoprof_get_buffer xenoprof_get_buffer_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t);
+
+struct xenoprof_counter {
+ uint32_t ind;
+ uint64_t count;
+ uint32_t enabled;
+ uint32_t event;
+ uint32_t hypervisor;
+ uint32_t kernel;
+ uint32_t user;
+ uint64_t unit_mask;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
+
+typedef struct xenoprof_passive {
+ uint16_t domain_id;
+ int32_t max_samples;
+ int32_t nbuf;
+ int32_t bufsize;
+ uint64_t buf_gmaddr;
+} xenoprof_passive_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t);
+
+
+#endif /* __XEN_PUBLIC_XENOPROF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/ring.c b/xen/ring.c
new file mode 100644
index 00000000..8644059e
--- /dev/null
+++ b/xen/ring.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include "ring.h"
+
+/* dest is ring */
+void hyp_ring_store(void *dest, const void *src, size_t size, void *start, void *end)
+{
+ if (dest + size > end) {
+ size_t first_size = end - dest;
+ memcpy(dest, src, first_size);
+ src += first_size;
+ dest = start;
+ size -= first_size;
+ }
+ memcpy(dest, src, size);
+}
+
+/* src is ring */
+void hyp_ring_fetch(void *dest, const void *src, size_t size, void *start, void *end)
+{
+ if (src + size > end) {
+ size_t first_size = end - src;
+ memcpy(dest, src, first_size);
+ dest += first_size;
+ src = start;
+ size -= first_size;
+ }
+ memcpy(dest, src, size);
+}
+
+size_t hyp_ring_next_word(char **c, void *start, void *end)
+{
+ size_t n = 0;
+
+ while (**c) {
+ n++;
+ if (++(*c) == end)
+ *c = start;
+ }
+ (*c)++;
+
+ return n;
+}
diff --git a/xen/ring.h b/xen/ring.h
new file mode 100644
index 00000000..6ed00acd
--- /dev/null
+++ b/xen/ring.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_RING_H
+#define XEN_RING_H
+
+typedef unsigned32_t hyp_ring_pos_t;
+
+#define hyp_ring_idx(ring, pos) (((unsigned)(pos)) & (sizeof(ring)-1))
+#define hyp_ring_cell(ring, pos) (ring)[hyp_ring_idx((ring), (pos))]
+#define hyp_ring_smash(ring, prod, cons) (hyp_ring_idx((ring), (prod) + 1) == \
+ hyp_ring_idx((ring), (cons)))
+#define hyp_ring_available(ring, prod, cons) hyp_ring_idx((ring), (cons)-(prod)-1)
+
+void hyp_ring_store(void *dest, const void *src, size_t size, void *start, void *end);
+void hyp_ring_fetch(void *dest, const void *src, size_t size, void *start, void *end);
+size_t hyp_ring_next_word(char **c, void *start, void *end);
+
+#endif /* XEN_RING_H */
diff --git a/xen/store.c b/xen/store.c
new file mode 100644
index 00000000..3c6baebf
--- /dev/null
+++ b/xen/store.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <mach/mig_support.h>
+#include <machine/pmap.h>
+#include <machine/ipl.h>
+#include <stdarg.h>
+#include <string.h>
+#include <alloca.h>
+#include <xen/public/xen.h>
+#include <xen/public/io/xs_wire.h>
+#include <util/atoi.h>
+#include "store.h"
+#include "ring.h"
+#include "evt.h"
+#include "xen.h"
+
+/* TODO use events instead of just yielding */
+
+/* Hypervisor part */
+
+decl_simple_lock_data(static, lock);
+
+static struct xenstore_domain_interface *store;
+
+struct store_req {
+ const char *data;
+ unsigned len;
+};
+
+/* Send a request */
+static void store_put(hyp_store_transaction_t t, unsigned32_t type, struct store_req *req, unsigned nr_reqs) {
+ struct xsd_sockmsg head = {
+ .type = type,
+ .req_id = 0,
+ .tx_id = t,
+ };
+ unsigned totlen, len;
+ unsigned i;
+
+ totlen = 0;
+ for (i = 0; i < nr_reqs; i++)
+ totlen += req[i].len;
+ head.len = totlen;
+ totlen += sizeof(head);
+
+ if (totlen > sizeof(store->req) - 1)
+ panic("too big store message %d, max %d", totlen, sizeof(store->req));
+
+ while (hyp_ring_available(store->req, store->req_prod, store->req_cons) < totlen)
+ hyp_yield();
+
+ mb();
+ hyp_ring_store(&hyp_ring_cell(store->req, store->req_prod), &head, sizeof(head), store->req, store->req + sizeof(store->req));
+ len = sizeof(head);
+ for (i=0; i<nr_reqs; i++) {
+ hyp_ring_store(&hyp_ring_cell(store->req, store->req_prod + len), req[i].data, req[i].len, store->req, store->req + sizeof(store->req));
+ len += req[i].len;
+ }
+
+ wmb();
+ store->req_prod += totlen;
+ hyp_event_channel_send(boot_info.store_evtchn);
+}
+
+static const char *errors[] = {
+ "EINVAL",
+ "EACCES",
+ "EEXIST",
+ "EISDIR",
+ "ENOENT",
+ "ENOMEM",
+ "ENOSPC",
+ "EIO",
+ "ENOTEMPTY",
+ "ENOSYS",
+ "EROFS",
+ "EBUSY",
+ "EAGAIN",
+ "EISCONN",
+ NULL,
+};
+
+/* Send a request and wait for a reply, whose header is put in head, and
+ * data is returned (beware, that's in the ring !)
+ * On error, returns NULL. Else takes the lock and return pointer on data and
+ * store_put_wait_end shall be called after reading it. */
+static struct xsd_sockmsg head;
+const char *hyp_store_error;
+
+static void *store_put_wait(hyp_store_transaction_t t, unsigned32_t type, struct store_req *req, unsigned nr_reqs) {
+ unsigned len;
+ const char **error;
+ void *data;
+
+ simple_lock(&lock);
+ store_put(t, type, req, nr_reqs);
+again:
+ while (store->rsp_prod - store->rsp_cons < sizeof(head))
+ hyp_yield();
+ rmb();
+ hyp_ring_fetch(&head, &hyp_ring_cell(store->rsp, store->rsp_cons), sizeof(head), store->rsp, store->rsp + sizeof(store->rsp));
+ len = sizeof(head) + head.len;
+ while (store->rsp_prod - store->rsp_cons < len)
+ hyp_yield();
+ rmb();
+ if (head.type == XS_WATCH_EVENT) {
+ /* Spurious watch event, drop */
+ store->rsp_cons += sizeof(head) + head.len;
+ hyp_event_channel_send(boot_info.store_evtchn);
+ goto again;
+ }
+ data = &hyp_ring_cell(store->rsp, store->rsp_cons + sizeof(head));
+ if (head.len <= 10) {
+ char c[10];
+ hyp_ring_fetch(c, data, head.len, store->rsp, store->rsp + sizeof(store->rsp));
+ for (error = errors; *error; error++) {
+ if (head.len == strlen(*error) + 1 && !memcmp(*error, c, head.len)) {
+ hyp_store_error = *error;
+ store->rsp_cons += len;
+ hyp_event_channel_send(boot_info.store_evtchn);
+ simple_unlock(&lock);
+ return NULL;
+ }
+ }
+ }
+ return data;
+}
+
+/* Must be called after each store_put_wait. Releases lock. */
+static void store_put_wait_end(void) {
+ mb();
+ store->rsp_cons += sizeof(head) + head.len;
+ hyp_event_channel_send(boot_info.store_evtchn);
+ simple_unlock(&lock);
+}
+
+/* Start a transaction. */
+hyp_store_transaction_t hyp_store_transaction_start(void) {
+ struct store_req req = {
+ .data = "",
+ .len = 1,
+ };
+ char *rep;
+ char *s;
+ int i;
+
+ rep = store_put_wait(0, XS_TRANSACTION_START, &req, 1);
+ if (!rep)
+ panic("couldn't start transaction (%s)", hyp_store_error);
+ s = alloca(head.len);
+ hyp_ring_fetch(s, rep, head.len, store->rsp, store->rsp + sizeof(store->rsp));
+ mach_atoi((u_char*) s, &i);
+ if (i == MACH_ATOI_DEFAULT)
+ panic("bogus transaction id len %d '%s'", head.len, s);
+ store_put_wait_end();
+ return i;
+}
+
+/* Stop a transaction. */
+int hyp_store_transaction_stop(hyp_store_transaction_t t) {
+ struct store_req req = {
+ .data = "T",
+ .len = 2,
+ };
+ int ret = 1;
+ void *rep;
+ rep = store_put_wait(t, XS_TRANSACTION_END, &req, 1);
+ if (!rep)
+ return 0;
+ store_put_wait_end();
+ return ret;
+}
+
+/* List a directory: returns an array to file names, terminated by NULL. Free
+ * with kfree. */
+char **hyp_store_ls(hyp_store_transaction_t t, int n, ...) {
+ struct store_req req[n];
+ va_list listp;
+ int i;
+ char *rep;
+ char *c;
+ char **res, **rsp;
+
+ va_start (listp, n);
+ for (i = 0; i < n; i++) {
+ req[i].data = va_arg(listp, char *);
+ req[i].len = strlen(req[i].data);
+ }
+ req[n - 1].len++;
+ va_end (listp);
+
+ rep = store_put_wait(t, XS_DIRECTORY, req, n);
+ if (!rep)
+ return NULL;
+ i = 0;
+ for ( c = rep, n = 0;
+ n < head.len;
+ n += hyp_ring_next_word(&c, store->rsp, store->rsp + sizeof(store->rsp)) + 1)
+ i++;
+ res = (void*) kalloc((i + 1) * sizeof(char*) + head.len);
+ if (!res)
+ hyp_store_error = "ENOMEM";
+ else {
+ hyp_ring_fetch(res + (i + 1), rep, head.len, store->rsp, store->rsp + sizeof(store->rsp));
+ rsp = res;
+ for (c = (char*) (res + (i + 1)); i; i--, c += strlen(c) + 1)
+ *rsp++ = c;
+ *rsp = NULL;
+ }
+ store_put_wait_end();
+ return res;
+}
+
+/* Get the value of an entry, va version. */
+static void *hyp_store_read_va(hyp_store_transaction_t t, int n, va_list listp) {
+ struct store_req req[n];
+ int i;
+ void *rep;
+ char *res;
+
+ for (i = 0; i < n; i++) {
+ req[i].data = va_arg(listp, char *);
+ req[i].len = strlen(req[i].data);
+ }
+ req[n - 1].len++;
+
+ rep = store_put_wait(t, XS_READ, req, n);
+ if (!rep)
+ return NULL;
+ res = (void*) kalloc(head.len + 1);
+ if (!res)
+ hyp_store_error = "ENOMEM";
+ else {
+ hyp_ring_fetch(res, rep, head.len, store->rsp, store->rsp + sizeof(store->rsp));
+ res[head.len] = 0;
+ }
+ store_put_wait_end();
+ return res;
+}
+
+/* Get the value of an entry. Free with kfree. */
+void *hyp_store_read(hyp_store_transaction_t t, int n, ...) {
+ va_list listp;
+ char *res;
+
+ va_start(listp, n);
+ res = hyp_store_read_va(t, n, listp);
+ va_end(listp);
+ return res;
+}
+
+/* Get the integer value of an entry, -1 on error. */
+int hyp_store_read_int(hyp_store_transaction_t t, int n, ...) {
+ va_list listp;
+ char *res;
+ int i;
+
+ va_start(listp, n);
+ res = hyp_store_read_va(t, n, listp);
+ va_end(listp);
+ if (!res)
+ return -1;
+ mach_atoi((u_char *) res, &i);
+ if (i == MACH_ATOI_DEFAULT)
+ printf("bogus integer '%s'\n", res);
+ kfree((vm_offset_t) res, strlen(res)+1);
+ return i;
+}
+
+/* Set the value of an entry. */
+char *hyp_store_write(hyp_store_transaction_t t, const char *data, int n, ...) {
+ struct store_req req[n + 1];
+ va_list listp;
+ int i;
+ void *rep;
+ char *res;
+
+ va_start (listp, n);
+ for (i = 0; i < n; i++) {
+ req[i].data = va_arg(listp, char *);
+ req[i].len = strlen(req[i].data);
+ }
+ req[n - 1].len++;
+ req[n].data = data;
+ req[n].len = strlen (data);
+ va_end (listp);
+
+ rep = store_put_wait (t, XS_WRITE, req, n + 1);
+ if (!rep)
+ return NULL;
+ res = (void*) kalloc(head.len + 1);
+ if (!res)
+ hyp_store_error = NULL;
+ else {
+ hyp_ring_fetch(res, rep, head.len, store->rsp, store->rsp + sizeof(store->rsp));
+ res[head.len] = 0;
+ }
+ store_put_wait_end();
+ return res;
+}
+
+static void hyp_store_handler(int unit)
+{
+ thread_wakeup(&boot_info.store_evtchn);
+}
+
+/* Map store's shared page. */
+void hyp_store_init(void)
+{
+ if (store)
+ return;
+ simple_lock_init(&lock);
+ store = (void*) mfn_to_kv(boot_info.store_mfn);
+ pmap_set_page_readwrite(store);
+ /* SPL sched */
+ hyp_evt_handler(boot_info.store_evtchn, hyp_store_handler, 0, SPL7);
+}
diff --git a/xen/store.h b/xen/store.h
new file mode 100644
index 00000000..4b3ee187
--- /dev/null
+++ b/xen/store.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_STORE_H
+#define XEN_STORE_H
+#include <machine/xen.h>
+#include <xen/public/io/xenbus.h>
+
+typedef unsigned32_t hyp_store_transaction_t;
+
+#define hyp_store_state_unknown "0"
+#define hyp_store_state_initializing "1"
+#define hyp_store_state_init_wait "2"
+#define hyp_store_state_initialized "3"
+#define hyp_store_state_connected "4"
+#define hyp_store_state_closing "5"
+#define hyp_store_state_closed "6"
+
+void hyp_store_init(void);
+
+extern const char *hyp_store_error;
+
+/* Start a transaction. */
+hyp_store_transaction_t hyp_store_transaction_start(void);
+/* Stop a transaction. Returns 1 if the transactions succeeded, 0 else. */
+int hyp_store_transaction_stop(hyp_store_transaction_t t);
+
+/* List a directory: returns an array to file names, terminated by NULL. Free
+ * with kfree. */
+char **hyp_store_ls(hyp_store_transaction_t t, int n, ...);
+
+/* Get the value of an entry. Free with kfree. */
+void *hyp_store_read(hyp_store_transaction_t t, int n, ...);
+/* Get the integer value of an entry, -1 on error. */
+int hyp_store_read_int(hyp_store_transaction_t t, int n, ...);
+/* Set the value of an entry. */
+char *hyp_store_write(hyp_store_transaction_t t, const char *data, int n, ...);
+
+#endif /* XEN_STORE_H */
diff --git a/xen/time.c b/xen/time.c
new file mode 100644
index 00000000..4c5cc351
--- /dev/null
+++ b/xen/time.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <mach/mach_types.h>
+#include <kern/mach_clock.h>
+#include <mach/xen.h>
+#include <machine/xen.h>
+#include <machine/spl.h>
+#include <machine/ipl.h>
+#include <mach/machine/eflags.h>
+#include <xen/evt.h>
+#include "time.h"
+#include "store.h"
+
+static unsigned64_t lastnsec;
+
+/* 2^64 nanoseconds ~= 500 years */
+static unsigned64_t hyp_get_stime(void) {
+ unsigned32_t version;
+ unsigned64_t cpu_clock, last_cpu_clock, delta, system_time;
+ unsigned32_t mul;
+ signed8_t shift;
+ volatile struct vcpu_time_info *time = &hyp_shared_info.vcpu_info[0].time;
+
+ do {
+ version = time->version;
+ rmb();
+ cpu_clock = hyp_cpu_clock();
+ last_cpu_clock = time->tsc_timestamp;
+ system_time = time->system_time;
+ mul = time->tsc_to_system_mul;
+ shift = time->tsc_shift;
+ rmb();
+ } while (version != time->version);
+
+ delta = cpu_clock - last_cpu_clock;
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+ return system_time + ((delta * (unsigned64_t) mul) >> 32);
+}
+
+unsigned64_t hyp_get_time(void) {
+ unsigned32_t version;
+ unsigned32_t sec, nsec;
+
+ do {
+ version = hyp_shared_info.wc_version;
+ rmb();
+ sec = hyp_shared_info.wc_sec;
+ nsec = hyp_shared_info.wc_nsec;
+ rmb();
+ } while (version != hyp_shared_info.wc_version);
+
+ return sec*1000000000ULL + nsec + hyp_get_stime();
+}
+
+static void hypclock_intr(int unit, int old_ipl, void *ret_addr, struct i386_interrupt_state *regs) {
+ unsigned64_t nsec, delta;
+
+ if (!lastnsec)
+ return;
+
+ nsec = hyp_get_stime();
+ if (nsec < lastnsec) {
+ printf("warning: nsec 0x%08lx%08lx < lastnsec 0x%08lx%08lx\n",(unsigned long)(nsec>>32), (unsigned long)nsec, (unsigned long)(lastnsec>>32), (unsigned long)lastnsec);
+ nsec = lastnsec;
+ }
+ delta = nsec-lastnsec;
+
+ lastnsec += (delta/1000)*1000;
+ hypclock_machine_intr(old_ipl, ret_addr, regs, delta);
+ /* 10ms tick rest */
+ hyp_do_set_timer_op(hyp_get_stime()+10*1000*1000);
+
+#if 0
+ char *c = hyp_store_read(0, 1, "control/shutdown");
+ if (c) {
+ static int go_down = 0;
+ if (!go_down) {
+ printf("uh oh, shutdown: %s\n", c);
+ go_down = 1;
+ /* TODO: somehow send startup_reboot notification to init */
+ if (!strcmp(c, "reboot")) {
+ /* this is just a reboot */
+ }
+ }
+ }
+#endif
+}
+
+extern struct timeval time;
+extern struct timezone tz;
+
+int
+readtodc(tp)
+ u_int *tp;
+{
+ unsigned64_t t = hyp_get_time();
+ u_int n = t / 1000000000;
+
+#ifndef MACH_KERNEL
+ n += tz.tz_minuteswest * 60;
+ if (tz.tz_dsttime)
+ n -= 3600;
+#endif /* MACH_KERNEL */
+ *tp = n;
+
+ return(0);
+}
+
+int
+writetodc()
+{
+ /* Not allowed in Xen */
+ return(-1);
+}
+
+void
+clkstart()
+{
+ evtchn_port_t port = hyp_event_channel_bind_virq(VIRQ_TIMER, 0);
+ hyp_evt_handler(port, hypclock_intr, 0, SPLHI);
+
+ /* first clock tick */
+ clock_interrupt(0, 0, 0);
+ lastnsec = hyp_get_stime();
+
+ /* 10ms tick rest */
+ hyp_do_set_timer_op(hyp_get_stime()+10*1000*1000);
+}
diff --git a/xen/time.h b/xen/time.h
new file mode 100644
index 00000000..f8755884
--- /dev/null
+++ b/xen/time.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_TIME_H
+#define XEN_TIME_H
+
+#include <mach/mach_types.h>
+unsigned64_t hyp_get_time(void);
+
+#endif /* XEN_TIME_H */
diff --git a/xen/xen.c b/xen/xen.c
new file mode 100644
index 00000000..062ee4d5
--- /dev/null
+++ b/xen/xen.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2007 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+#include <mach/xen.h>
+#include <machine/xen.h>
+#include <machine/ipl.h>
+#include <xen/block.h>
+#include <xen/console.h>
+#include <xen/grant.h>
+#include <xen/net.h>
+#include <xen/store.h>
+#include <xen/time.h>
+#include "xen.h"
+#include "evt.h"
+
+void hyp_invalidate_pte(pt_entry_t *pte)
+{
+ if (!hyp_mmu_update_pte(kv_to_ma(pte), (*pte) & ~INTEL_PTE_VALID))
+ panic("%s:%d could not set pte %p(%p) to %p(%p)\n",__FILE__,__LINE__,pte,(vm_offset_t) kv_to_ma(pte),*pte,ma_to_pa(*pte));
+ hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL);
+}
+
+void hyp_debug()
+{
+ panic("debug");
+}
+
+void hyp_init(void)
+{
+ hyp_grant_init();
+ hyp_store_init();
+ /* these depend on the above */
+ hyp_block_init();
+ hyp_net_init();
+ evtchn_port_t port = hyp_event_channel_bind_virq(VIRQ_DEBUG, 0);
+ hyp_evt_handler(port, hyp_debug, 0, SPL7);
+}
+
+void _hyp_halt(void)
+{
+ hyp_halt();
+}
+
+void _hyp_todo(unsigned long from)
+{
+ printf("TODO: at %lx\n",from);
+ hyp_halt();
+}
+
+extern int int_mask[];
+void hyp_idle(void)
+{
+ int cpu = 0;
+ hyp_shared_info.vcpu_info[cpu].evtchn_upcall_mask = 0xff;
+ barrier();
+ /* Avoid blocking if there are pending events */
+ if (!hyp_shared_info.vcpu_info[cpu].evtchn_upcall_pending &&
+ !hyp_shared_info.evtchn_pending[cpu])
+ hyp_block();
+ while (1) {
+ hyp_shared_info.vcpu_info[cpu].evtchn_upcall_mask = 0x00;
+ barrier();
+ if (!hyp_shared_info.vcpu_info[cpu].evtchn_upcall_pending &&
+ !hyp_shared_info.evtchn_pending[cpu])
+ /* Didn't miss any event, can return to threads. */
+ break;
+ hyp_shared_info.vcpu_info[cpu].evtchn_upcall_mask = 0xff;
+ hyp_c_callback(NULL,NULL);
+ }
+}
diff --git a/xen/xen.h b/xen/xen.h
new file mode 100644
index 00000000..87e1256c
--- /dev/null
+++ b/xen/xen.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2006 Samuel Thibault <samuel.thibault@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef XEN_XEN_H
+#define XEN_XEN_H
+
+void hyp_init(void);
+void hyp_invalidate_pte(pt_entry_t *pte);
+void hyp_idle(void);
+void hyp_p2m_init(void);
+
+#endif /* XEN_XEN_H */