summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2012-09-30 19:31:58 +0200
committerRichard Braun <rbraun@sceen.net>2012-09-30 19:31:58 +0200
commit69504fc63720b4bf2677d6074285b82256bc9b83 (patch)
tree47fad139526df60554e3fd26a7b8b1577f29d2d0
Initial commit
-rw-r--r--.gitignore15
-rw-r--r--AUTHORS2
-rw-r--r--COPYING674
-rw-r--r--ChangeLog1
-rw-r--r--INSTALL248
-rw-r--r--Makefile.am40
-rw-r--r--Makefrag.am52
-rw-r--r--NEWS1
-rw-r--r--README5
-rw-r--r--README-alpha14
-rw-r--r--arch/i386/Makefrag.am34
-rw-r--r--arch/i386/configfrag.ac15
-rw-r--r--arch/i386/machine/acpimp.c494
-rw-r--r--arch/i386/machine/acpimp.h33
-rw-r--r--arch/i386/machine/asm.h42
-rw-r--r--arch/i386/machine/biosmem.c687
-rw-r--r--arch/i386/machine/biosmem.h72
-rw-r--r--arch/i386/machine/boot.S232
-rw-r--r--arch/i386/machine/boot.h85
-rw-r--r--arch/i386/machine/cpu.c453
-rw-r--r--arch/i386/machine/cpu.h406
-rw-r--r--arch/i386/machine/cpu_asm.S61
-rw-r--r--arch/i386/machine/init.c265
-rw-r--r--arch/i386/machine/init.h82
-rw-r--r--arch/i386/machine/io.h44
-rw-r--r--arch/i386/machine/lapic.c322
-rw-r--r--arch/i386/machine/lapic.h45
-rw-r--r--arch/i386/machine/mps.c446
-rw-r--r--arch/i386/machine/mps.h33
-rw-r--r--arch/i386/machine/multiboot.h90
-rw-r--r--arch/i386/machine/param.h108
-rw-r--r--arch/i386/machine/pit.c88
-rw-r--r--arch/i386/machine/pit.h31
-rw-r--r--arch/i386/machine/pmap.c272
-rw-r--r--arch/i386/machine/pmap.h197
-rw-r--r--arch/i386/machine/trap.h47
-rw-r--r--arch/i386/machine/types.h27
-rw-r--r--arch/i386/machine/vga.c184
-rw-r--r--arch/i386/machine/vga.h33
-rw-r--r--arch/i386/x15.lds.S88
-rw-r--r--configure.ac70
-rw-r--r--kern/error.h23
-rw-r--r--kern/init.h31
-rw-r--r--kern/kernel.c31
-rw-r--r--kern/kernel.h37
-rw-r--r--kern/kmem.c1307
-rw-r--r--kern/kmem.h306
-rw-r--r--kern/panic.c40
-rw-r--r--kern/panic.h28
-rw-r--r--kern/param.h26
-rw-r--r--kern/printk.c64
-rw-r--r--kern/printk.h37
-rw-r--r--kern/types.h23
-rw-r--r--lib/assert.h40
-rw-r--r--lib/limits.h23
-rw-r--r--lib/list.h364
-rw-r--r--lib/macros.h64
-rw-r--r--lib/rbtree.c489
-rw-r--r--lib/rbtree.h299
-rw-r--r--lib/rbtree_i.h187
-rw-r--r--lib/sprintf.c550
-rw-r--r--lib/sprintf.h43
-rw-r--r--lib/stddef.h35
-rw-r--r--lib/stdint.h38
-rw-r--r--lib/string.c138
-rw-r--r--lib/string.h31
-rw-r--r--vm/vm_inherit.h29
-rw-r--r--vm/vm_kmem.c220
-rw-r--r--vm/vm_kmem.h86
-rw-r--r--vm/vm_map.c644
-rw-r--r--vm/vm_map.h141
-rw-r--r--vm/vm_page.h53
-rw-r--r--vm/vm_phys.c625
-rw-r--r--vm/vm_phys.h87
-rw-r--r--vm/vm_prot.h31
-rw-r--r--vm/vm_setup.c33
-rw-r--r--vm/vm_setup.h28
77 files changed, 12369 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..66ddd837
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+*.o
+*.a
+.deps/
+.dirstamp
+aclocal.m4
+autom4te.cache/
+build-aux/
+config.*
+configure
+tags
+stamp-h1
+x15.lds
+x15
+Makefile
+Makefile.in
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 00000000..d5dfaa72
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,2 @@
+X15 is currently being written by Richard Braun. Comments and various
+tips and fixes have been provided by the Hurd community.
diff --git a/COPYING b/COPYING
new file mode 100644
index 00000000..94a9ed02
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 00000000..bc5a4fc7
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1 @@
+Use git log to examine the project history.
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 00000000..b6b65e5d
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,248 @@
+Installation Instructions
+*************************
+
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006 Free Software Foundation, Inc.
+
+This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+Compilers and Options
+=====================
+
+Some systems require unusual options for compilation or linking that the
+`configure' script does not know about. Run `./configure --help' for
+details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+Installation Names
+==================
+
+By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+There may be some features `configure' cannot figure out automatically,
+but needs to determine by the type of machine the package will run on.
+Usually, assuming the package is built to be run on the _same_
+architectures, `configure' can figure that out, but if it prints a
+message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+If you want to set default values for `configure' scripts to share, you
+can create a site shell script called `config.site' that gives default
+values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug. Until the bug is fixed you can use this workaround:
+
+ CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+`configure' recognizes the following options to control how it operates.
+
+`--help'
+`-h'
+ Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
+
+
+X15 Options
+================
+
+`--disable-debug'
+ Disable all debugging facilities. By default, debugging is enabled.
+
+
+i386 options
+------------
+
+`--enable-pae'
+ Use the PAE (Physical Address Extension) processor feature to address
+ physical memory beyond 4 GiB.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 00000000..39a7f68a
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,40 @@
+noinst_LIBRARIES =
+MOSTLYCLEANFILES =
+
+AM_CPPFLAGS = \
+ -pipe \
+ -std=gnu99 \
+ -imacros config.h \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/arch/$(systype)
+
+AM_CFLAGS = \
+ -Wall \
+ -Wextra \
+ -Wmissing-prototypes \
+ -Wstrict-prototypes \
+ -fsigned-char \
+ -ffreestanding \
+ -fno-stack-protector
+
+# Silent build support.
+LDS_V = $(LDS_V_$(V))
+LDS_V_ = $(LDS_V_$(AM_DEFAULT_VERBOSITY))
+LDS_V_0 = @echo " LDS $@";
+
+SUFFIXES = .lds .lds.S
+
+.lds.S.lds:
+ $(LDS_V) $(CPP) -P $(AM_CPPFLAGS) -o $@ $<
+
+exec_bootdir = $(exec_prefix)/boot
+exec_boot_PROGRAMS = x15
+x15_DEPENDENCIES = arch/$(systype)/x15.lds
+MOSTLYCLEANFILES += arch/$(systype)/x15.lds
+x15_SOURCES =
+nodist_x15_SOURCES =
+x15_LDFLAGS = -nostartfiles -nostdlib -T arch/$(systype)/x15.lds
+x15_LDADD = -lgcc
+
+# Sources
+include Makefrag.am
diff --git a/Makefrag.am b/Makefrag.am
new file mode 100644
index 00000000..dbda2000
--- /dev/null
+++ b/Makefrag.am
@@ -0,0 +1,52 @@
+#
+# 'arch/xxx' sources
+#
+include arch/i386/Makefrag.am
+
+#
+# 'kern/' sources
+#
+x15_SOURCES += \
+ kern/init.h \
+ kern/kernel.c \
+ kern/kernel.h \
+ kern/kmem.c \
+ kern/kmem.h \
+ kern/panic.c \
+ kern/panic.h \
+ kern/printk.c \
+ kern/printk.h
+
+#
+# 'lib/' sources
+#
+x15_SOURCES += \
+ lib/assert.h \
+ lib/limits.h \
+ lib/list.h \
+ lib/macros.h \
+ lib/rbtree.c \
+ lib/rbtree.h \
+ lib/rbtree_i.h \
+ lib/sprintf.c \
+ lib/sprintf.h \
+ lib/stddef.h \
+ lib/stdint.h \
+ lib/string.c \
+ lib/string.h
+
+#
+# 'vm/' sources
+#
+x15_SOURCES += \
+ vm/vm_kmem.c \
+ vm/vm_kmem.h \
+ vm/vm_map.c \
+ vm/vm_map.h \
+ vm/vm_page.h \
+ vm/vm_param.h \
+ vm/vm_phys.c \
+ vm/vm_phys.h \
+ vm/vm_setup.c \
+ vm/vm_setup.h \
+ vm/vm_types.h
diff --git a/NEWS b/NEWS
new file mode 100644
index 00000000..8a8947c3
--- /dev/null
+++ b/NEWS
@@ -0,0 +1 @@
+No version released.
diff --git a/README b/README
new file mode 100644
index 00000000..a946b2ae
--- /dev/null
+++ b/README
@@ -0,0 +1,5 @@
+X15 is a free microkernel. Its purpose is to provide a foundation for a
+Hurd-like operating system.
+
+Bug reports relating to this distribution should be sent to
+rbraun@sceen.net.
diff --git a/README-alpha b/README-alpha
new file mode 100644
index 00000000..f32e2ff1
--- /dev/null
+++ b/README-alpha
@@ -0,0 +1,14 @@
+X15 is unstable. You should expect it to not work. You should expect it to have
+security problems. The API will likely change. This is the development release.
+A proper release will be made in due time.
+
+Some of the files that are included in the X15 tarball, such as files generated
+by autotools, are not stored in the repository. Therefore, to build X15 you
+need to run "autoreconf" before of anything else:
+
+ $ autoreconf -i
+ $ ./configure
+ ...
+
+This will generate all directories and files needed by the build system. For
+instructions about the build system, check the INSTALL file.
diff --git a/arch/i386/Makefrag.am b/arch/i386/Makefrag.am
new file mode 100644
index 00000000..2d87b70d
--- /dev/null
+++ b/arch/i386/Makefrag.am
@@ -0,0 +1,34 @@
+if I386
+
+# Force 32-bits builds.
+AM_CPPFLAGS += -m32
+x15_LDFLAGS += -m32
+
+x15_SOURCES += \
+ arch/i386/machine/acpimp.c \
+ arch/i386/machine/acpimp.h \
+ arch/i386/machine/biosmem.c \
+ arch/i386/machine/biosmem.h \
+ arch/i386/machine/boot.h \
+ arch/i386/machine/boot.S \
+ arch/i386/machine/cpu.c \
+ arch/i386/machine/cpu_asm.S \
+ arch/i386/machine/cpu.h \
+ arch/i386/machine/endian.h \
+ arch/i386/machine/init.c \
+ arch/i386/machine/init.h \
+ arch/i386/machine/io.h \
+ arch/i386/machine/lapic.c \
+ arch/i386/machine/lapic.h \
+ arch/i386/machine/mps.c \
+ arch/i386/machine/mps.h \
+ arch/i386/machine/multiboot.h \
+ arch/i386/machine/param.h \
+ arch/i386/machine/pit.c \
+ arch/i386/machine/pit.h \
+ arch/i386/machine/pmap.c \
+ arch/i386/machine/pmap.h \
+ arch/i386/machine/vga.c \
+ arch/i386/machine/vga.h
+
+endif I386
diff --git a/arch/i386/configfrag.ac b/arch/i386/configfrag.ac
new file mode 100644
index 00000000..770c625e
--- /dev/null
+++ b/arch/i386/configfrag.ac
@@ -0,0 +1,15 @@
+# SYSTYPE
+systype=i386
+
+#
+# Arguments to configure
+#
+AC_ARG_ENABLE([pae],
+ [AS_HELP_STRING([--enable-pae],
+ [enable PAE to address physical memory beyond 4 GiB])])
+
+#
+# Options
+#
+AS_IF([test x"$enable_pae" = xyes],
+ [AC_DEFINE([PAE], [1], [use PAE page tables])])
diff --git a/arch/i386/machine/acpimp.c b/arch/i386/machine/acpimp.c
new file mode 100644
index 00000000..e2fc6eb3
--- /dev/null
+++ b/arch/i386/machine/acpimp.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/printk.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/acpimp.h>
+#include <machine/biosmem.h>
+#include <machine/cpu.h>
+#include <machine/io.h>
+#include <machine/lapic.h>
+#include <vm/vm_kmem.h>
+
+/*
+ * Alignment of the RSDP.
+ */
+#define ACPIMP_RSDP_ALIGN 16
+
+/*
+ * Signature of the root system description pointer.
+ */
+#define ACPIMP_RSDP_SIG "RSD PTR "
+
+struct acpimp_rsdp {
+ uint8_t signature[8];
+ uint8_t checksum;
+ uint8_t oem_id[6];
+ uint8_t reserved;
+ uint32_t rsdt_address;
+} __packed;
+
+/*
+ * Size of a buffer which can store a table signature as a string.
+ */
+#define ACPIMP_SIG_SIZE 5
+
+struct acpimp_sdth {
+ uint8_t signature[4];
+ uint32_t length;
+ uint8_t revision;
+ uint8_t checksum;
+ uint8_t oem_id[6];
+ uint8_t oem_table_id[8];
+ uint32_t oem_revision;
+ uint8_t creator_id[4];
+ uint32_t creator_revision;
+} __packed;
+
+struct acpimp_rsdt {
+ struct acpimp_sdth header;
+ uint32_t entries[0];
+} __packed;
+
+/*
+ * MADT entry type codes.
+ */
+#define ACPIMP_MADT_ENTRY_LAPIC 0
+
+struct acpimp_madt_entry_hdr {
+ uint8_t type;
+ uint8_t length;
+} __packed;
+
+#define ACPIMP_MADT_LAPIC_ENABLED 0x1
+
+struct acpimp_madt_entry_lapic {
+ struct acpimp_madt_entry_hdr header;
+ uint8_t processor_id;
+ uint8_t apic_id;
+ uint32_t flags;
+} __packed;
+
+union acpimp_madt_entry {
+ uint8_t type;
+ struct acpimp_madt_entry_hdr header;
+ struct acpimp_madt_entry_lapic lapic;
+} __packed;
+
+struct acpimp_madt {
+ struct acpimp_sdth header;
+ uint32_t lapic_addr;
+ uint32_t flags;
+ union acpimp_madt_entry entries[0];
+} __packed;
+
+struct acpimp_madt_iter {
+ const union acpimp_madt_entry *entry;
+ const union acpimp_madt_entry *end;
+};
+
+#define acpimp_madt_foreach(madt, iter) \
+for (acpimp_madt_iter_init(iter, madt); \
+ acpimp_madt_iter_valid(iter); \
+ acpimp_madt_iter_next(iter))
+
+struct acpimp_table_addr {
+ const char *sig;
+ struct acpimp_sdth *table;
+};
+
+static struct acpimp_table_addr acpimp_table_addrs[] __initdata = {
+ { "RSDT", NULL },
+ { "APIC", NULL }
+};
+
+static void __init
+acpimp_table_sig(const struct acpimp_sdth *table, char sig[ACPIMP_SIG_SIZE])
+{
+ memcpy(sig, table->signature, sizeof(table->signature));
+ sig[4] = '\0';
+}
+
+static int __init
+acpimp_table_required(const struct acpimp_sdth *table)
+{
+ char sig[ACPIMP_SIG_SIZE];
+ size_t i;
+
+ acpimp_table_sig(table, sig);
+
+ for (i = 0; i < ARRAY_SIZE(acpimp_table_addrs); i++)
+ if (strcmp(sig, acpimp_table_addrs[i].sig) == 0)
+ return 1;
+
+ return 0;
+}
+
+static int __init
+acpimp_register_table(struct acpimp_sdth *table)
+{
+ char sig[ACPIMP_SIG_SIZE];
+ size_t i;
+
+ acpimp_table_sig(table, sig);
+
+ for (i = 0; i < ARRAY_SIZE(acpimp_table_addrs); i++)
+ if (strcmp(sig, acpimp_table_addrs[i].sig) == 0) {
+ if (acpimp_table_addrs[i].table != NULL) {
+ printk("acpimp: table %s already registered, aborting\n", sig);
+ return -1;
+ }
+
+ acpimp_table_addrs[i].table = table;
+ return 0;
+ }
+
+ panic("acpimp: attempting to register unknown table '%s'", sig);
+}
+
+static struct acpimp_sdth * __init
+acpimp_lookup_table(const char *sig)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(acpimp_table_addrs); i++)
+ if (strcmp(sig, acpimp_table_addrs[i].sig) == 0)
+ return acpimp_table_addrs[i].table;
+
+ return NULL;
+}
+
+static int __init
+acpimp_check_tables(void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(acpimp_table_addrs); i++)
+ if (acpimp_table_addrs[i].table == NULL) {
+ printk("acpimp: table %s missing, aborting\n",
+ acpimp_table_addrs[i].sig);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void __init
+acpimp_free_tables(void)
+{
+ struct acpimp_sdth *table;
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(acpimp_table_addrs); i++) {
+ table = acpimp_table_addrs[i].table;
+
+ if (table != NULL)
+ kmem_free(table, table->length);
+ }
+}
+
+static unsigned int __init
+acpimp_checksum(const void *ptr, size_t size)
+{
+ const uint8_t *bytes;
+ uint8_t checksum;
+ size_t i;
+
+ bytes = ptr;
+ checksum = 0;
+
+ for (i = 0; i < size; i++)
+ checksum += bytes[i];
+
+ return checksum;
+}
+
+static int __init
+acpimp_check_rsdp(const struct acpimp_rsdp *rsdp)
+{
+ unsigned int checksum;
+
+ if (memcmp(rsdp->signature, ACPIMP_RSDP_SIG, sizeof(rsdp->signature)) != 0)
+ return -1;
+
+ checksum = acpimp_checksum(rsdp, sizeof(*rsdp));
+
+ if (checksum != 0)
+ return -1;
+
+ return 0;
+}
+
+static int __init
+acpimp_get_rsdp(vm_phys_t start, size_t size, struct acpimp_rsdp *rsdp)
+{
+ const struct acpimp_rsdp *src;
+ unsigned long addr, end, map_addr;
+ size_t map_size;
+ int error;
+
+ assert(size > 0);
+ assert(P2ALIGNED(size, ACPIMP_RSDP_ALIGN));
+
+ if (!P2ALIGNED(start, ACPIMP_RSDP_ALIGN))
+ return -1;
+
+ addr = (unsigned long)vm_kmem_map_pa(start, size, &map_addr, &map_size);
+
+ if (addr == 0)
+ panic("acpimp: unable to map bios memory in kernel map");
+
+ for (end = addr + size; addr < end; addr += ACPIMP_RSDP_ALIGN) {
+ src = (const struct acpimp_rsdp *)addr;
+ error = acpimp_check_rsdp(src);
+
+ if (!error)
+ break;
+ }
+
+ if (!(addr < end)) {
+ error = -1;
+ goto out;
+ }
+
+ memcpy(rsdp, src, sizeof(*rsdp));
+ error = 0;
+
+out:
+ vm_kmem_unmap_pa(map_addr, map_size);
+ return error;
+}
+
+static int __init
+acpimp_find_rsdp(struct acpimp_rsdp *rsdp)
+{
+ const uint16_t *ptr;
+ unsigned long base, map_addr;
+ size_t map_size;
+ int error;
+
+ ptr = vm_kmem_map_pa(BIOSMEM_EBDA_PTR, sizeof(*ptr), &map_addr, &map_size);
+
+ if (ptr == NULL)
+ panic("acpimp: unable to map ebda pointer in kernel map");
+
+ base = *((const volatile uint16_t *)ptr);
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ if (base != 0) {
+ base <<= 4;
+ error = acpimp_get_rsdp(base, 1024, rsdp);
+
+ if (!error)
+ return 0;
+ }
+
+ error = acpimp_get_rsdp(BIOSMEM_EXT_ROM, BIOSMEM_END - BIOSMEM_EXT_ROM,
+ rsdp);
+
+ if (!error)
+ return 0;
+
+ printk("acpimp: unable to find root system description pointer\n");
+ return -1;
+}
+
+static void __init
+acpimp_info(void)
+{
+ const struct acpimp_sdth *rsdt;
+
+ rsdt = acpimp_lookup_table("RSDT");
+ assert(rsdt != NULL);
+ printk("acpimp: revision: %u, oem: %.*s\n", rsdt->revision,
+ (int)sizeof(rsdt->oem_id), rsdt->oem_id);
+}
+
+static struct acpimp_sdth * __init
+acpimp_copy_table(uint32_t addr)
+{
+ const struct acpimp_sdth *table;
+ struct acpimp_sdth *copy;
+ unsigned long map_addr;
+ size_t size, map_size;
+ unsigned int checksum;
+
+ table = vm_kmem_map_pa(addr, sizeof(*table), &map_addr, &map_size);
+
+ if (table == NULL)
+ panic("unable to map acpi data in kernel map");
+
+ if (!acpimp_table_required(table)) {
+ copy = NULL;
+ goto out;
+ }
+
+ size = ((const volatile struct acpimp_sdth *)table)->length;
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ table = vm_kmem_map_pa(addr, size, &map_addr, &map_size);
+
+ if (table == NULL)
+ panic("unable to map acpi data in kernel map");
+
+ checksum = acpimp_checksum(table, size);
+
+ if (checksum != 0) {
+ char sig[ACPIMP_SIG_SIZE];
+
+ acpimp_table_sig(table, sig);
+ printk("acpimp: table %s: invalid checksum\n", sig);
+ copy = NULL;
+ goto out;
+ }
+
+ copy = kmem_alloc(size);
+
+ if (copy == NULL)
+ panic("unable to allocate memory for acpi data copy");
+
+ memcpy(copy, table, size);
+
+out:
+ vm_kmem_unmap_pa(map_addr, map_size);
+ return copy;
+}
+
+static int __init
+acpimp_copy_tables(const struct acpimp_rsdp *rsdp)
+{
+ struct acpimp_rsdt *rsdt;
+ struct acpimp_sdth *table;
+ uint32_t *addr, *end;
+ int error;
+
+ table = acpimp_copy_table(rsdp->rsdt_address);
+
+ if (table == NULL)
+ return -1;
+
+ error = acpimp_register_table(table);
+ assert(!error);
+
+ rsdt = structof(table, struct acpimp_rsdt, header);
+ end = (void *)rsdt + rsdt->header.length;
+
+ for (addr = rsdt->entries; addr < end; addr++) {
+ table = acpimp_copy_table(*addr);
+
+ if (table == NULL)
+ continue;
+
+ error = acpimp_register_table(table);
+
+ if (error)
+ goto error;
+ }
+
+ error = acpimp_check_tables();
+
+ if (error)
+ goto error;
+
+ return 0;
+
+error:
+ acpimp_free_tables();
+ return -1;
+}
+
+static void __init
+acpimp_madt_iter_init(struct acpimp_madt_iter *iter,
+ const struct acpimp_madt *madt)
+{
+ iter->entry = madt->entries;
+ iter->end = (void *)madt + madt->header.length;
+}
+
+static int __init
+acpimp_madt_iter_valid(const struct acpimp_madt_iter *iter)
+{
+ return iter->entry < iter->end;
+}
+
+static void __init
+acpimp_madt_iter_next(struct acpimp_madt_iter *iter)
+{
+ iter->entry = (void *)iter->entry + iter->entry->header.length;
+}
+
+static void __init
+acpimp_load_lapic(const struct acpimp_madt_entry_lapic *lapic, int *is_bsp)
+{
+ if (!(lapic->flags & ACPIMP_MADT_LAPIC_ENABLED))
+ return;
+
+ cpu_mp_register_lapic(lapic->apic_id, *is_bsp);
+ *is_bsp = 0;
+}
+
+static void __init
+acpimp_load_madt(void)
+{
+ const struct acpimp_sdth *table;
+ const struct acpimp_madt *madt;
+ struct acpimp_madt_iter iter;
+ int is_bsp;
+
+ table = acpimp_lookup_table("APIC");
+ assert(table != NULL);
+ madt = structof(table, struct acpimp_madt, header);
+ lapic_setup(madt->lapic_addr);
+ is_bsp = 1;
+
+ acpimp_madt_foreach(madt, &iter)
+ switch (iter.entry->type) {
+ case ACPIMP_MADT_ENTRY_LAPIC:
+ acpimp_load_lapic(&iter.entry->lapic, &is_bsp);
+ break;
+ }
+}
+
+int __init
+acpimp_setup(void)
+{
+ struct acpimp_rsdp rsdp;
+ int error;
+
+ error = acpimp_find_rsdp(&rsdp);
+
+ if (error)
+ return error;
+
+ error = acpimp_copy_tables(&rsdp);
+
+ if (error)
+ return error;
+
+ acpimp_info();
+ acpimp_load_madt();
+ acpimp_free_tables();
+ return 0;
+}
diff --git a/arch/i386/machine/acpimp.h b/arch/i386/machine/acpimp.h
new file mode 100644
index 00000000..55c01c91
--- /dev/null
+++ b/arch/i386/machine/acpimp.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Multiprocessor information gathering module, supporting the ACPI
+ * specification v1.0.
+ */
+
+#ifndef _I386_ACPIMP_H
+#define _I386_ACPIMP_H
+
+/*
+ * Load multiprocessor information.
+ *
+ * Return 0 if successful (an error usually means hardware doesn't support
+ * ACPI).
+ */
+int acpimp_setup(void);
+
+#endif /* _I386_ACPIMP_H */
diff --git a/arch/i386/machine/asm.h b/arch/i386/machine/asm.h
new file mode 100644
index 00000000..32a859b9
--- /dev/null
+++ b/arch/i386/machine/asm.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_ASM_H
+#define _I386_ASM_H
+
+#ifdef __ASSEMBLY__
+
+#define TEXT_ALIGN 4
+#define DATA_ALIGN 2
+
+#define ENTRY(x) \
+.p2align TEXT_ALIGN, 0x90; \
+.global x; \
+.type x, STT_FUNC; \
+x:
+
+#define DATA(x) \
+.p2align DATA_ALIGN; \
+.global x; \
+.type x, STT_OBJECT; \
+x:
+
+#define END(x) .size x, . - x;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _I386_ASM_H */
diff --git a/arch/i386/machine/biosmem.c b/arch/i386/machine/biosmem.c
new file mode 100644
index 00000000..9d56e21c
--- /dev/null
+++ b/arch/i386/machine/biosmem.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/biosmem.h>
+#include <machine/boot.h>
+#include <machine/init.h>
+#include <machine/multiboot.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Maximum number of entries in the BIOS memory map.
+ *
+ * Because of adjustments of overlapping ranges, the memory map can grow
+ * to twice this size.
+ */
+#define BIOSMEM_MAX_MAP_SIZE 128
+
+/*
+ * Memory range types.
+ */
+#define BIOSMEM_TYPE_AVAILABLE 1
+#define BIOSMEM_TYPE_RESERVED 2
+#define BIOSMEM_TYPE_ACPI 3
+#define BIOSMEM_TYPE_NVS 4
+#define BIOSMEM_TYPE_UNUSABLE 5
+#define BIOSMEM_TYPE_DISABLED 6
+
+/*
+ * Memory map entry.
+ */
+struct biosmem_map_entry {
+ uint64_t base_addr;
+ uint64_t length;
+ unsigned int type;
+};
+
+/*
+ * Memory map built from the information passed by the boot loader.
+ *
+ * If the boot loader didn't pass a valid memory map, a simple map is built
+ * based on the mem_lower and mem_upper multiboot fields.
+ */
+static struct biosmem_map_entry biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2]
+ __initdata;
+
+/*
+ * Number of valid entries in the BIOS memory map table.
+ */
+static unsigned int biosmem_map_size __initdata;
+
+/*
+ * Boundaries of the simple bootstrap heap.
+ */
+static unsigned long biosmem_heap_start __initdata;
+static unsigned long biosmem_heap_free __initdata;
+static unsigned long biosmem_heap_end __initdata;
+
+static void __boot
+biosmem_map_build(const struct multiboot_info *mbi)
+{
+ struct multiboot_mmap_entry *mb_entry, *mb_end;
+ struct biosmem_map_entry *start, *entry, *end;
+
+ mb_entry = mbi->mmap_addr;
+ mb_end = mbi->mmap_addr + mbi->mmap_length;
+ start = (struct biosmem_map_entry *)BOOT_ADDR_VTOP(biosmem_map);
+ entry = start;
+ end = entry + BIOSMEM_MAX_MAP_SIZE;
+
+ while ((mb_entry < mb_end) && (entry < end)) {
+ entry->base_addr = mb_entry->base_addr;
+ entry->length = mb_entry->length;
+ entry->type = mb_entry->type;
+
+ mb_entry = (void *)mb_entry + sizeof(mb_entry->size) + mb_entry->size;
+ entry++;
+ }
+
+ BOOT_VTOP(biosmem_map_size) = entry - start;
+}
+
+static void __boot
+biosmem_map_build_simple(const struct multiboot_info *mbi)
+{
+ struct biosmem_map_entry *entry;
+
+ entry = (struct biosmem_map_entry *)BOOT_ADDR_VTOP(biosmem_map);
+ entry->base_addr = 0;
+ entry->length = mbi->mem_lower << 10;
+ entry->type = BIOSMEM_TYPE_AVAILABLE;
+
+ entry++;
+ entry->base_addr = BIOSMEM_END;
+ entry->length = mbi->mem_upper << 10;
+ entry->type = BIOSMEM_TYPE_AVAILABLE;
+
+ BOOT_VTOP(biosmem_map_size) = 2;
+}
+
+static void __boot
+biosmem_find_boot_data_update(unsigned long min, unsigned long *start,
+ unsigned long *end, const void *data_start,
+ const void *data_end)
+{
+ if ((min <= (unsigned long)data_start)
+ && ((unsigned long)data_start < *start)) {
+ *start = (unsigned long)data_start;
+ *end = (unsigned long)data_end;
+ }
+}
+
+/*
+ * Find the first boot data in the given range, and return their containing
+ * area (start address is returned directly, end address is returned in end).
+ * The following are considered boot data :
+ * - the kernel
+ * - the kernel command line
+ * - the module table
+ * - the modules
+ * - the modules command lines
+ *
+ * If no boot data was found, 0 is returned, and the end address isn't set.
+ */
+static unsigned long __boot
+biosmem_find_boot_data(const struct multiboot_info *mbi, unsigned long min,
+ unsigned long max, unsigned long *endp)
+{
+ struct multiboot_module *mod;
+ unsigned long start, end = end;
+ uint32_t i;
+
+ start = max;
+
+ biosmem_find_boot_data_update(min, &start, &end, &_boot,
+ (void *)BOOT_ADDR_VTOP(&_end));
+
+ if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != NULL))
+ biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline,
+ mbi->cmdline + mbi->unused0);
+
+ if ((mbi->flags & MULTIBOOT_LOADER_MODULES) && (mbi->mods_count > 0)) {
+ biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr,
+ mbi->mods_addr + mbi->mods_count);
+
+ for (i = 0; i < mbi->mods_count; i++) {
+ mod = &mbi->mods_addr[i];
+ biosmem_find_boot_data_update(min, &start, &end, mod->mod_start,
+ mod->mod_end);
+
+ if (mod->string != NULL)
+ biosmem_find_boot_data_update(min, &start, &end, mod->string,
+ mod->string + mod->reserved);
+ }
+ }
+
+ if (start == max)
+ return 0;
+
+ *endp = end;
+ return start;
+}
+
+static void __boot
+biosmem_setup_allocator(struct multiboot_info *mbi)
+{
+ unsigned long heap_start, heap_end, max_heap_start, max_heap_end;
+ unsigned long mem_end, next;
+
+ /*
+ * Find some memory for the heap. Look for the largest unused area in
+ * upper memory, carefully avoiding all boot data.
+ */
+ mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
+ max_heap_start = 0;
+ max_heap_end = 0;
+ next = BIOSMEM_END;
+
+ do {
+ heap_start = next;
+ heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next);
+
+ if (heap_end == 0) {
+ heap_end = mem_end;
+ next = 0;
+ }
+
+ if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) {
+ max_heap_start = heap_start;
+ max_heap_end = heap_end;
+ }
+ } while (next != 0);
+
+ max_heap_start = vm_page_round(max_heap_start);
+ max_heap_end = vm_page_trunc(max_heap_end);
+
+ if (max_heap_start >= max_heap_end)
+ init_panic("unable to find memory for the boot allocator");
+
+ BOOT_VTOP(biosmem_heap_start) = max_heap_start;
+ BOOT_VTOP(biosmem_heap_free) = max_heap_start;
+ BOOT_VTOP(biosmem_heap_end) = max_heap_end;
+}
+
+static size_t __boot
+biosmem_strlen(const char *s)
+{
+ size_t i;
+
+ i = 0;
+
+ while (*s++ != '\0')
+ i++;
+
+ return i;
+}
+
+static void __boot
+biosmem_save_cmdline_sizes(struct multiboot_info *mbi)
+{
+ struct multiboot_module *mod;
+ uint32_t i;
+
+ if (mbi->flags & MULTIBOOT_LOADER_CMDLINE)
+ mbi->unused0 = biosmem_strlen(mbi->cmdline) + 1;
+
+ if (mbi->flags & MULTIBOOT_LOADER_MODULES)
+ for (i = 0; i < mbi->mods_count; i++) {
+ mod = &mbi->mods_addr[i];
+ mod->reserved = biosmem_strlen(mod->string) + 1;
+ }
+}
+
+void __boot
+biosmem_bootstrap(struct multiboot_info *mbi)
+{
+ if (mbi->flags & MULTIBOOT_LOADER_MMAP)
+ biosmem_map_build(mbi);
+ else
+ biosmem_map_build_simple(mbi);
+
+ /*
+ * The kernel and modules command lines will be memory mapped later
+ * during initialization. Their respective sizes must be saved.
+ */
+ biosmem_save_cmdline_sizes(mbi);
+ biosmem_setup_allocator(mbi);
+}
+
+void * __boot
+biosmem_bootalloc(unsigned int nr_pages)
+{
+ unsigned long free, page;
+ char *ptr;
+
+ if (nr_pages == 0)
+ init_panic("attempt to allocate 0 pages");
+
+ free = BOOT_VTOP(biosmem_heap_free);
+ page = free;
+ free += PAGE_SIZE * nr_pages;
+
+ if ((free <= BOOT_VTOP(biosmem_heap_start))
+ || (free > BOOT_VTOP(biosmem_heap_end)))
+ init_panic("unable to allocate memory");
+
+ BOOT_VTOP(biosmem_heap_free) = free;
+
+ for (ptr = (char *)page; ptr < (char *)free; ptr++)
+ *ptr = '\0';
+
+ return (void *)page;
+}
+
+static const char * __init
+biosmem_type_desc(unsigned int type)
+{
+ switch (type) {
+ case BIOSMEM_TYPE_AVAILABLE:
+ return "available";
+ case BIOSMEM_TYPE_RESERVED:
+ return "reserved";
+ case BIOSMEM_TYPE_ACPI:
+ return "ACPI";
+ case BIOSMEM_TYPE_NVS:
+ return "ACPI NVS";
+ case BIOSMEM_TYPE_UNUSABLE:
+ return "unusable";
+ default:
+ return "unknown (reserved)";
+ }
+}
+
+static int __init
+biosmem_map_entry_is_invalid(const struct biosmem_map_entry *entry) {
+ return (entry->base_addr + entry->length) <= entry->base_addr;
+}
+
+static void __init
+biosmem_map_filter(void)
+{
+ struct biosmem_map_entry *entry;
+ unsigned int i;
+
+ i = 0;
+
+ while (i < biosmem_map_size) {
+ entry = &biosmem_map[i];
+
+ if (biosmem_map_entry_is_invalid(entry)) {
+ biosmem_map_size--;
+ memmove(entry, entry + 1, (biosmem_map_size - i) * sizeof(*entry));
+ continue;
+ }
+
+ i++;
+ }
+}
+
+static void __init
+biosmem_map_sort(void)
+{
+ struct biosmem_map_entry tmp;
+ unsigned int i, j;
+
+ /*
+ * Simple insertion sort.
+ */
+ for (i = 1; i < biosmem_map_size; i++) {
+ tmp = biosmem_map[i];
+
+ for (j = i - 1; j < i; j--) {
+ if (biosmem_map[j].base_addr < tmp.base_addr)
+ break;
+
+ biosmem_map[j + 1] = biosmem_map[j];
+ }
+
+ biosmem_map[j + 1] = tmp;
+ }
+}
+
+static void __init
+biosmem_map_adjust(void)
+{
+ struct biosmem_map_entry tmp, *a, *b, *first, *second;
+ uint64_t a_end, b_end, last_end;
+ unsigned int i, j, last_type;
+
+ biosmem_map_filter();
+
+ /*
+ * Resolve overlapping areas, giving priority to most restrictive
+ * (i.e. numerically higher) types.
+ */
+ for (i = 0; i < biosmem_map_size; i++) {
+ a = &biosmem_map[i];
+ a_end = a->base_addr + a->length;
+
+ j = i + 1;
+
+ while (j < biosmem_map_size) {
+ b = &biosmem_map[j];
+ b_end = b->base_addr + b->length;
+
+ if ((a->base_addr >= b_end) || (a_end <= b->base_addr)) {
+ j++;
+ continue;
+ }
+
+ if (a->base_addr < b->base_addr) {
+ first = a;
+ second = b;
+ } else {
+ first = b;
+ second = a;
+ }
+
+ if (a_end > b_end) {
+ last_end = a_end;
+ last_type = a->type;
+ } else {
+ last_end = b_end;
+ last_type = b->type;
+ }
+
+ tmp.base_addr = second->base_addr;
+ tmp.length = MIN(a_end, b_end) - tmp.base_addr;
+ tmp.type = MAX(a->type, b->type);
+ first->length = tmp.base_addr - first->base_addr;
+ second->base_addr += tmp.length;
+ second->length = last_end - second->base_addr;
+ second->type = last_type;
+
+ /*
+ * Filter out invalid entries.
+ */
+ if (biosmem_map_entry_is_invalid(a)
+ && biosmem_map_entry_is_invalid(b)) {
+ *a = tmp;
+ biosmem_map_size--;
+ memmove(b, b + 1, (biosmem_map_size - j) * sizeof(*b));
+ continue;
+ } else if (biosmem_map_entry_is_invalid(a)) {
+ *a = tmp;
+ j++;
+ continue;
+ } else if (biosmem_map_entry_is_invalid(b)) {
+ *b = tmp;
+ j++;
+ continue;
+ }
+
+ if (tmp.type == a->type)
+ first = a;
+ else if (tmp.type == b->type)
+ first = b;
+ else {
+
+ /*
+ * If the overlapping area can't be merged with one of its
+ * neighbors, it must be added as a new entry.
+ */
+
+ if (biosmem_map_size >= ARRAY_SIZE(biosmem_map))
+ panic("biosmem: too many memory map entries");
+
+ biosmem_map[biosmem_map_size] = tmp;
+ biosmem_map_size++;
+ j++;
+ continue;
+ }
+
+ if (first->base_addr > tmp.base_addr)
+ first->base_addr = tmp.base_addr;
+
+ first->length += tmp.length;
+ j++;
+ }
+ }
+
+ biosmem_map_sort();
+}
+
+static void __init
+biosmem_map_show(void)
+{
+ const struct biosmem_map_entry *entry, *end;
+
+ printk("biosmem: physical memory map:\n");
+
+ for (entry = biosmem_map, end = entry + biosmem_map_size;
+ entry < end;
+ entry++)
+ printk("biosmem: %018llx:%018llx, %s\n", entry->base_addr,
+ entry->base_addr + entry->length,
+ biosmem_type_desc(entry->type));
+}
+
+static int __init
+biosmem_map_find_avail(vm_phys_t *phys_start, vm_phys_t *phys_end)
+{
+ const struct biosmem_map_entry *entry, *map_end;
+ vm_phys_t start, end, seg_start, seg_end;
+ uint64_t entry_end;
+
+ seg_start = (vm_phys_t)-1;
+ seg_end = (vm_phys_t)-1;
+ map_end = biosmem_map + biosmem_map_size;
+
+ for (entry = biosmem_map; entry < map_end; entry++) {
+ if (entry->type != BIOSMEM_TYPE_AVAILABLE)
+ continue;
+
+#ifndef PAE
+ if (entry->base_addr >= VM_PHYS_NORMAL_LIMIT)
+ break;
+#endif /* PAE */
+
+ start = vm_page_round(entry->base_addr);
+
+ if (start >= *phys_end)
+ break;
+
+ entry_end = entry->base_addr + entry->length;
+
+#ifndef PAE
+ if (entry_end > VM_PHYS_NORMAL_LIMIT)
+ entry_end = VM_PHYS_NORMAL_LIMIT;
+#endif /* PAE */
+
+ end = vm_page_trunc(entry_end);
+
+ /* TODO: check against a minimum size */
+ if ((start < end) && (start < *phys_end) && (end > *phys_start)) {
+ if (seg_start == (vm_phys_t)-1)
+ seg_start = start;
+
+ seg_end = end;
+ }
+ }
+
+ if ((seg_start == (vm_phys_t)-1) || (seg_end == (vm_phys_t)-1))
+ return -1;
+
+ if (seg_start > *phys_start)
+ *phys_start = seg_start;
+
+ if (seg_end < *phys_end)
+ *phys_end = seg_end;
+
+ return 0;
+}
+
+static void __init
+biosmem_load_segment(const char *name, vm_phys_t phys_start,
+ vm_phys_t phys_end, vm_phys_t avail_start,
+ vm_phys_t avail_end, unsigned int seglist_prio)
+{
+ if ((avail_start < phys_start) || (avail_start > phys_end))
+ avail_start = phys_start;
+
+ if ((avail_end < phys_start) || (avail_end > phys_end))
+ avail_end = phys_end;
+
+ vm_phys_load(name, phys_start, phys_end, avail_start, avail_end,
+ seglist_prio);
+}
+
+void __init
+biosmem_setup(void)
+{
+ vm_phys_t phys_start, phys_end;
+ int error;
+
+ biosmem_map_adjust();
+ biosmem_map_show();
+
+ phys_start = BIOSMEM_BASE;
+ phys_end = VM_PHYS_NORMAL_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (!error)
+ biosmem_load_segment("normal", phys_start, phys_end,
+ biosmem_heap_free, biosmem_heap_end,
+ VM_PHYS_SEGLIST_NORMAL);
+
+#ifdef PAE
+ phys_start = VM_PHYS_NORMAL_LIMIT;
+ phys_end = VM_PHYS_HIGHMEM_LIMIT;
+ error = biosmem_map_find_avail(&phys_start, &phys_end);
+
+ if (!error)
+ biosmem_load_segment("highmem", phys_start, phys_end,
+ phys_start, phys_end, VM_PHYS_SEGLIST_HIGHMEM);
+#endif /* PAE */
+}
+
+static void __init
+biosmem_find_reserved_area_update(vm_phys_t min, vm_phys_t *start,
+ vm_phys_t *end, vm_phys_t reserved_start,
+ vm_phys_t reserved_end)
+{
+ if ((min <= reserved_start) && (reserved_start < *start)) {
+ *start = reserved_start;
+ *end = reserved_end;
+ }
+}
+
+static vm_phys_t __init
+biosmem_find_reserved_area(vm_phys_t min, vm_phys_t max,
+ vm_phys_t *endp)
+{
+ vm_phys_t start, end = end;
+
+ start = max;
+ biosmem_find_reserved_area_update(min, &start, &end, (unsigned long)&_boot,
+ BOOT_ADDR_VTOP(&_end));
+ biosmem_find_reserved_area_update(min, &start, &end, biosmem_heap_start,
+ biosmem_heap_end);
+
+ if (start == max)
+ return 0;
+
+ *endp = end;
+ return start;
+}
+
+static void __init
+biosmem_free_usable_range(vm_phys_t start, vm_phys_t end)
+{
+ struct vm_page *page;
+
+ while (start < end) {
+ page = vm_phys_lookup_page(start);
+ assert(page != NULL);
+ vm_phys_manage(page);
+ start += PAGE_SIZE;
+ }
+}
+
+static void __init
+biosmem_free_usable_upper(vm_phys_t upper_end)
+{
+ vm_phys_t next, start, end;
+
+ next = BIOSMEM_END;
+
+ do {
+ start = next;
+ end = biosmem_find_reserved_area(start, upper_end, &next);
+
+ if (end == 0) {
+ end = upper_end;
+ next = 0;
+ }
+
+ biosmem_free_usable_range(start, end);
+ } while (next != 0);
+}
+
+void __init
+biosmem_free_usable(void)
+{
+ struct biosmem_map_entry *entry;
+ vm_phys_t start, end;
+ uint64_t entry_end;
+ unsigned int i;
+
+ for (i = 0; i < biosmem_map_size; i++) {
+ entry = &biosmem_map[i];
+
+ if (entry->type != BIOSMEM_TYPE_AVAILABLE)
+ continue;
+
+ /* High memory is always loaded during setup */
+ if (entry->base_addr >= VM_PHYS_NORMAL_LIMIT)
+ break;
+
+ entry_end = entry->base_addr + entry->length;
+
+ if (entry_end > VM_PHYS_NORMAL_LIMIT)
+ entry_end = VM_PHYS_NORMAL_LIMIT;
+
+ start = vm_page_round(entry->base_addr);
+ end = vm_page_trunc(entry_end);
+
+ if (start < BIOSMEM_BASE) {
+ assert(end < BIOSMEM_END);
+ start = BIOSMEM_BASE;
+ }
+
+ /*
+ * Upper memory contains the kernel and the bootstrap heap, and
+ * requires special handling.
+ */
+ if (start == BIOSMEM_END)
+ biosmem_free_usable_upper(end);
+ else
+ biosmem_free_usable_range(start, end);
+ }
+}
diff --git a/arch/i386/machine/biosmem.h b/arch/i386/machine/biosmem.h
new file mode 100644
index 00000000..d5a01195
--- /dev/null
+++ b/arch/i386/machine/biosmem.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_BIOSMEM_H
+#define _I386_BIOSMEM_H
+
+#include <machine/multiboot.h>
+
+/*
+ * Address where the address of the Extended BIOS Data Area segment can be
+ * found.
+ */
+#define BIOSMEM_EBDA_PTR 0x40e
+
+/*
+ * Significant low memory addresses.
+ *
+ * The first 64 KiB are reserved for various reasons (e.g. to preserve BIOS
+ * data and to work around data corruption on some hardware).
+ */
+#define BIOSMEM_BASE 0x010000
+#define BIOSMEM_BASE_END 0x0a0000
+#define BIOSMEM_EXT_ROM 0x0e0000
+#define BIOSMEM_ROM 0x0f0000
+#define BIOSMEM_END 0x100000
+
+/*
+ * Early initialization of the biosmem module.
+ *
+ * This function processes the given multiboot data for BIOS-provided
+ * memory information, and sets up a bootstrap physical page allocator.
+ *
+ * It is called before paging is enabled.
+ */
+void biosmem_bootstrap(struct multiboot_info *mbi);
+
+/*
+ * Allocate contiguous physical pages during bootstrap.
+ *
+ * This function is called before paging is enabled.
+ */
+void * biosmem_bootalloc(unsigned int nr_pages);
+
+/*
+ * Set up physical memory based on the information obtained during bootstrap
+ * and load it in the VM system.
+ */
+void biosmem_setup(void);
+
+/*
+ * Free all usable memory.
+ *
+ * This includes ranges that weren't part of the bootstrap allocator initial
+ * heap, e.g. because they contained boot data.
+ */
+void biosmem_free_usable(void);
+
+#endif /* _I386_BIOSMEM_H */
diff --git a/arch/i386/machine/boot.S b/arch/i386/machine/boot.S
new file mode 100644
index 00000000..eec7c585
--- /dev/null
+++ b/arch/i386/machine/boot.S
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define __ASSEMBLY__
+
+#include <machine/asm.h>
+#include <machine/cpu.h>
+#include <machine/boot.h>
+#include <machine/multiboot.h>
+
+/*
+ * Convert a physical address in the .boot section to its real address in
+ * the MP trampoline code.
+ */
+#define BOOT_MP_ADDR_PTOT(addr) (BOOT_MP_TRAMPOLINE_ADDR + (addr) \
+ - boot_ap_start)
+
+.section .boothdr, "wax"
+
+ /*
+ * Multiboot header.
+ */
+.align 4
+DATA(boot_header)
+ .long MULTIBOOT_OS_MAGIC
+ .long MULTIBOOT_OS_FLAGS
+ .long -(MULTIBOOT_OS_FLAGS + MULTIBOOT_OS_MAGIC)
+END(boot_header)
+
+/*
+ * Entry point.
+ */
+ENTRY(_start)
+ /*
+ * Set up a simple GDT to conform to the multiboot specification.
+ */
+ lgdt boot_gdtr
+
+ /*
+ * Keep %eax and %ebx.
+ */
+ movw $0x10, %cx
+ movw %cx, %ds
+ movw %cx, %es
+ movw %cx, %ss
+ xorw %cx, %cx
+ movw %cx, %fs
+ movw %cx, %gs
+ ljmp $8, $1f
+
+1:
+ /*
+ * Set up the init stack.
+ */
+ movl $(init_stack + BOOT_STACK_SIZE - KERNEL_OFFSET), %esp
+ movl %esp, %ebp
+
+ /*
+ * Enable paging.
+ */
+ pushl %ebx
+ pushl %eax
+ call init_paging
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $CPU_CR0_PG, %eax
+ movl %eax, %cr0
+ ljmp $8, $1f
+
+1:
+ /*
+ * Reset the stack, use high addresses.
+ */
+ movl $(init_stack + BOOT_STACK_SIZE), %esp
+ movl %esp, %ebp
+
+ /*
+ * Prevent stack tracing from searching previous frames.
+ */
+ pushl $0
+ jmp init
+
+ /*
+ * Never reached.
+ */
+END(_start)
+
+.section .boot, "wax"
+
+DATA(boot_gdtr)
+ .word (8 * 3)
+ .long boot_gdt
+END(boot_gdtr)
+
+/*
+ * This is where an AP runs after leaving the trampoline code.
+ */
+ENTRY(boot_ap_start32)
+ /*
+ * Set up the GDT again, because the current one is from the trampoline code
+ * which isn't part of the identity mapping and won't be available once paging
+ * is enabled.
+ */
+ lgdt boot_gdtr
+ movw $0x10, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ xorw %ax, %ax
+ movw %ax, %fs
+ movw %ax, %gs
+ ljmp $8, $1f
+
+1:
+ /*
+ * Set up the init stack.
+ */
+ movl $(init_ap_stack + BOOT_STACK_SIZE - KERNEL_OFFSET), %esp
+ movl %esp, %ebp
+
+ /*
+ * Enable paging.
+ */
+ call init_ap_paging
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $CPU_CR0_PG, %eax
+ movl %eax, %cr0
+ ljmp $8, $1f
+
+1:
+ /*
+ * Switch to the boot stack preallocated for this AP by the BSP.
+ */
+ movl (init_ap_boot_stack - KERNEL_OFFSET), %esp
+ addl $BOOT_STACK_SIZE, %esp
+ movl %esp, %ebp
+
+ /*
+ * Prevent stack tracing from searching previous frames.
+ */
+ pushl $0
+ jmp init_ap
+
+ /*
+ * Never reached.
+ */
+END(boot_ap_start32)
+
+/*
+ * This section, including the GDT, is the MP trampoline code run by APs
+ * on startup. It is copied at a fixed location in the first segment and
+ * must enable protected mode to jump back into the kernel.
+ */
+.align 8
+ENTRY(boot_ap_start)
+ .code16
+ cli
+ xorw %ax, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+ movw %ax, %ss
+ lgdt BOOT_MP_ADDR_PTOT(boot_ap_gdtr)
+ movl %cr0, %eax
+ orl $CPU_CR0_PE, %eax
+ movl %eax, %cr0
+ ljmp $8, $BOOT_MP_ADDR_PTOT(1f)
+
+.align 4
+1:
+ .code32
+ movw $0x10, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+ xorw %ax, %ax
+ movw %ax, %fs
+ movw %ax, %gs
+ ljmp $8, $boot_ap_start32
+END(boot_ap_start)
+
+DATA(boot_ap_gdtr)
+ .word (8 * 3)
+ .long BOOT_MP_ADDR_PTOT(boot_gdt)
+END(boot_ap_gdtr)
+
+.align 8
+DATA(boot_gdt)
+ /*
+ * Null selector.
+ */
+ .word 0x0000
+ .word 0x0000
+ .word 0x0000
+ .word 0x0000
+
+ /*
+ * Code segment selector.
+ */
+ .word 0xffff
+ .word 0x0000
+ .word 0x9a00
+ .word 0x00cf
+
+ /*
+ * Data segment selector.
+ */
+ .word 0xffff
+ .word 0x0000
+ .word 0x9200
+ .word 0x00cf
+END(boot_gdt)
+
+DATA(boot_ap_size)
+ .long . - boot_ap_start
+END(boot_ap_size)
diff --git a/arch/i386/machine/boot.h b/arch/i386/machine/boot.h
new file mode 100644
index 00000000..11255509
--- /dev/null
+++ b/arch/i386/machine/boot.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_BOOT_H
+#define _I386_BOOT_H
+
+/*
+ * The kernel is physically loaded at BOOT_OFFSET by the boot loader. It
+ * will quickly establish the necessary mappings to run at KERNEL_OFFSET.
+ *
+ * See the linker script for more information.
+ */
+#define BOOT_OFFSET 0x00100000
+#define KERNEL_OFFSET 0xc0000000
+
+/*
+ * Size of the stack used to bootstrap the kernel.
+ */
+#define BOOT_STACK_SIZE 4096
+
+/*
+ * Address where the MP trampoline code is copied and run at.
+ *
+ * It must reside at a free location in the first segment and be page
+ * aligned.
+ */
+#define BOOT_MP_TRAMPOLINE_ADDR 0x7000
+
+#ifndef __ASSEMBLY__
+
+#include <lib/macros.h>
+
+/*
+ * Access a variable during bootstrap, while still running at physical
+ * addresses.
+ */
+#define BOOT_VTOP(var) \
+ (*((typeof(var) *)((unsigned long)(&var) - KERNEL_OFFSET)))
+
+/*
+ * Address translation macros.
+ */
+#define BOOT_ADDR_VTOP(addr) ((unsigned long)(addr) - KERNEL_OFFSET)
+#define BOOT_ADDR_PTOV(addr) ((unsigned long)(addr) + KERNEL_OFFSET)
+
+/*
+ * Functions used before paging is enabled must be part of the .boot section
+ * so that they run at physical addresses. There is no .bootdata section; the
+ * BOOT_VTOP() macro should be used instead.
+ */
+#define __boot __section(".boot")
+
+/*
+ * Boundaries of the .boot section.
+ */
+extern char _boot;
+extern char _eboot;
+
+/*
+ * Size of the trampoline code used for APs.
+ */
+extern unsigned long boot_ap_size;
+
+/*
+ * Address of the MP trampoline code.
+ */
+void boot_ap_start(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _I386_BOOT_H */
diff --git a/arch/i386/machine/cpu.c b/arch/i386/machine/cpu.c
new file mode 100644
index 00000000..2c7a2cd1
--- /dev/null
+++ b/arch/i386/machine/cpu.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/acpimp.h>
+#include <machine/biosmem.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+#include <machine/init.h>
+#include <machine/io.h>
+#include <machine/lapic.h>
+#include <machine/mps.h>
+#include <machine/trap.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+
+#define CPU_TYPE_MASK 0x00003000
+#define CPU_TYPE_SHIFT 12
+#define CPU_FAMILY_MASK 0x00000f00
+#define CPU_FAMILY_SHIFT 8
+#define CPU_EXTFAMILY_MASK 0x0ff00000
+#define CPU_EXTFAMILY_SHIFT 20
+#define CPU_MODEL_MASK 0x000000f0
+#define CPU_MODEL_SHIFT 4
+#define CPU_EXTMODEL_MASK 0x000f0000
+#define CPU_EXTMODEL_SHIFT 16
+#define CPU_STEPPING_MASK 0x0000000f
+#define CPU_STEPPING_SHIFT 0
+#define CPU_BRAND_MASK 0x000000ff
+#define CPU_BRAND_SHIFT 0
+#define CPU_CLFLUSH_MASK 0x0000ff00
+#define CPU_CLFLUSH_SHIFT 8
+#define CPU_APIC_ID_MASK 0xff000000
+#define CPU_APIC_ID_SHIFT 24
+
+#define CPU_IDT_SIZE 256
+
+#define CPU_INVALID_APIC_ID ((unsigned int)-1)
+
+/*
+ * MP related CMOS ports, registers and values.
+ */
+#define CPU_MP_CMOS_PORT_REG 0x70
+#define CPU_MP_CMOS_PORT_DATA 0x71
+#define CPU_MP_CMOS_REG_RESET 0x0f
+#define CPU_MP_CMOS_DATA_RESET_WARM 0x0a
+#define CPU_MP_CMOS_RESET_VECTOR 0x467
+
+struct cpu cpu_array[MAX_CPUS];
+
+/*
+ * Number of configured processors.
+ */
+static unsigned int cpu_array_size;
+
+/*
+ * Interrupt descriptor table.
+ */
+static struct cpu_gate_desc cpu_idt[CPU_IDT_SIZE] __aligned(8);
+
+static void
+cpu_seg_set(struct cpu_seg_desc *desc, unsigned long base, unsigned long limit,
+ unsigned long granularity, unsigned long dpl, unsigned long s_flag,
+ unsigned long type)
+{
+ if (granularity & CPU_DESC_GRAN_4KB)
+ limit >>= 12;
+
+ desc->high = (base & CPU_DESC_SEG_BASE_HIGH_MASK)
+ | (granularity & CPU_DESC_GRAN_MASK)
+ | CPU_DESC_DB32
+ | (limit & CPU_DESC_SEG_LIMIT_HIGH_MASK)
+ | CPU_DESC_PRESENT
+ | (dpl & CPU_DESC_PL_MASK)
+ | (s_flag & CPU_DESC_S_MASK)
+ | (type & CPU_DESC_TYPE_MASK)
+ | ((base & CPU_DESC_SEG_BASE_MID_MASK) >> 16);
+ desc->low = ((base & CPU_DESC_SEG_BASE_LOW_MASK) << 16)
+ | (limit & CPU_DESC_SEG_LIMIT_LOW_MASK);
+}
+
+static void
+cpu_seg_set_null(struct cpu_seg_desc *desc)
+{
+ desc->high = 0;
+ desc->low = 0;
+}
+
+static void
+cpu_seg_set_code(struct cpu_seg_desc *desc, unsigned long dpl)
+{
+ cpu_seg_set(desc, 0, 0xffffffff, CPU_DESC_GRAN_4KB, dpl,
+ CPU_DESC_S_CODE_DATA, CPU_DESC_TYPE_CODE_READABLE);
+}
+
+static void
+cpu_seg_set_data(struct cpu_seg_desc *desc, unsigned long base,
+ unsigned long dpl)
+{
+ cpu_seg_set(desc, base, 0xffffffff, CPU_DESC_GRAN_4KB, dpl,
+ CPU_DESC_S_CODE_DATA, CPU_DESC_TYPE_DATA_WRITEABLE);
+}
+
+static void __init
+cpu_init_gdt(struct cpu *cpu)
+{
+ struct cpu_pseudo_desc gdtr;
+
+ cpu_seg_set_null(&cpu->gdt[CPU_GDT_NULL_IDX]);
+ cpu_seg_set_data(&cpu->gdt[CPU_GDT_CPU_IDX], (unsigned long)cpu,
+ CPU_DESC_PL_SYSTEM);
+ cpu_seg_set_code(&cpu->gdt[CPU_GDT_CODE_IDX], CPU_DESC_PL_SYSTEM);
+ cpu_seg_set_data(&cpu->gdt[CPU_GDT_DATA_IDX], 0, CPU_DESC_PL_SYSTEM);
+ gdtr.address = (unsigned long)cpu->gdt;
+ gdtr.limit = sizeof(cpu->gdt) - 1;
+ cpu_load_gdt(&gdtr);
+}
+
+static void
+cpu_idt_set_gate(unsigned int vector, unsigned long dpl, unsigned long type,
+ void (*isr)(void))
+{
+ struct cpu_gate_desc *desc;
+
+ assert(vector < ARRAY_SIZE(cpu_idt));
+
+ desc = &cpu_idt[vector];
+ desc->high = ((unsigned long)isr & CPU_DESC_GATE_OFFSET_HIGH_MASK)
+ | CPU_DESC_PRESENT
+ | (dpl & CPU_DESC_PL_MASK)
+ | (type & CPU_DESC_TYPE_MASK);
+ desc->low = ((CPU_GDT_SELECTOR(CPU_GDT_CODE_IDX)) << 16)
+ | ((unsigned long)isr & CPU_DESC_GATE_OFFSET_LOW_MASK);
+}
+
+static void __init
+cpu_idt_init(void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(cpu_idt); i++)
+ cpu_idt_set_gate(i, CPU_DESC_PL_SYSTEM, CPU_DESC_TYPE_GATE_TRAP,
+ cpu_trap_default);
+
+ /* TODO Complete */
+ cpu_idt_set_gate(T_APIC_TIMER_INTR, CPU_DESC_PL_SYSTEM,
+ CPU_DESC_TYPE_GATE_INTR, cpu_trap_lapic_timer_intr);
+ cpu_idt_set_gate(T_APIC_SPURIOUS_INTR, CPU_DESC_PL_SYSTEM,
+ CPU_DESC_TYPE_GATE_INTR, cpu_trap_lapic_spurious_intr);
+}
+
+static void
+cpu_load_idt(void)
+{
+ static volatile struct cpu_pseudo_desc idtr;
+
+ idtr.address = (unsigned long)cpu_idt;
+ idtr.limit = sizeof(cpu_idt) - 1;
+ asm volatile("lidt %0" : : "m" (idtr));
+}
+
+static __always_inline void
+cpu_cpuid(unsigned long *eax, unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ asm volatile("cpuid" : "+a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx));
+}
+
+/*
+ * Initialize the given cpu structure for the current processor.
+ *
+ * On the BSP, this function is called before it can determine the cpu
+ * structure. It is part of its task to make it possible.
+ */
+static void __init
+cpu_init(struct cpu *cpu)
+{
+ unsigned long eax, ebx, ecx, edx, max_eax;
+
+ /*
+ * Assume at least an i686 processor.
+ */
+
+ cpu_intr_restore(CPU_EFL_ONE);
+ cpu_set_cr0(CPU_CR0_PG | CPU_CR0_AM | CPU_CR0_WP | CPU_CR0_NE | CPU_CR0_ET
+ | CPU_CR0_TS | CPU_CR0_MP | CPU_CR0_PE);
+ cpu_init_gdt(cpu);
+ cpu_load_idt();
+
+ eax = 0;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+ max_eax = eax;
+ memcpy(cpu->vendor_id, &ebx, sizeof(ebx));
+ memcpy(cpu->vendor_id + 4, &edx, sizeof(edx));
+ memcpy(cpu->vendor_id + 8, &ecx, sizeof(ecx));
+ cpu->vendor_id[sizeof(cpu->vendor_id) - 1] = '\0';
+
+ /* Initialized if the processor supports brand strings */
+ cpu->model_name[0] = '\0';
+
+ assert(max_eax >= 1);
+
+ eax = 1;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+ cpu->type = (eax & CPU_TYPE_MASK) >> CPU_TYPE_SHIFT;
+ cpu->family = (eax & CPU_FAMILY_MASK) >> CPU_FAMILY_SHIFT;
+
+ if (cpu->family == 0xf)
+ cpu->family += (eax & CPU_EXTFAMILY_MASK) >> CPU_EXTFAMILY_SHIFT;
+
+ cpu->model = (eax & CPU_MODEL_MASK) >> CPU_MODEL_SHIFT;
+
+ if ((cpu->model == 6) || (cpu->model == 0xf))
+ cpu->model += (eax & CPU_EXTMODEL_MASK) >> CPU_EXTMODEL_SHIFT;
+
+ cpu->stepping = (eax & CPU_STEPPING_MASK) >> CPU_STEPPING_SHIFT;
+ cpu->clflush_size = ((ebx & CPU_CLFLUSH_MASK) >> CPU_CLFLUSH_SHIFT) * 8;
+ cpu->initial_apic_id = (ebx & CPU_APIC_ID_MASK) >> CPU_APIC_ID_SHIFT;
+ cpu->features1 = ecx;
+ cpu->features2 = edx;
+
+ eax = 0x80000000;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+
+ if ((eax & 0x80000000) && (eax >= 0x80000004)) {
+ eax = 0x80000002;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+ memcpy(cpu->model_name, &eax, sizeof(eax));
+ memcpy(cpu->model_name + 4, &ebx, sizeof(ebx));
+ memcpy(cpu->model_name + 8, &ecx, sizeof(ecx));
+ memcpy(cpu->model_name + 12, &edx, sizeof(edx));
+
+ eax = 0x80000003;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+ memcpy(cpu->model_name + 16, &eax, sizeof(eax));
+ memcpy(cpu->model_name + 20, &ebx, sizeof(ebx));
+ memcpy(cpu->model_name + 24, &ecx, sizeof(ecx));
+ memcpy(cpu->model_name + 28, &edx, sizeof(edx));
+
+ eax = 0x80000004;
+ cpu_cpuid(&eax, &ebx, &ecx, &edx);
+ memcpy(cpu->model_name + 32, &eax, sizeof(eax));
+ memcpy(cpu->model_name + 36, &ebx, sizeof(ebx));
+ memcpy(cpu->model_name + 40, &ecx, sizeof(ecx));
+ memcpy(cpu->model_name + 44, &edx, sizeof(edx));
+
+ cpu->model_name[sizeof(cpu->model_name) - 1] = '\0';
+ }
+
+ cpu->state = CPU_STATE_ON;
+}
+
+void __init
+cpu_setup(void)
+{
+ size_t i;
+
+ cpu_idt_init();
+
+ for (i = 0; i < ARRAY_SIZE(cpu_array); i++) {
+ cpu_array[i].self = &cpu_array[i];
+ cpu_array[i].id = i;
+ cpu_array[i].apic_id = CPU_INVALID_APIC_ID;
+ cpu_array[i].state = CPU_STATE_OFF;
+ }
+
+ cpu_array_size = 1;
+ cpu_init(&cpu_array[0]);
+}
+
+static void __init
+cpu_panic_on_missing_feature(const char *feature)
+{
+ panic("cpu: %s feature missing", feature);
+}
+
+void __init
+cpu_check(const struct cpu *cpu)
+{
+ if (!(cpu->features2 & CPU_FEATURE2_FPU))
+ cpu_panic_on_missing_feature("fpu");
+
+ /* TODO: support UP with legacy PIC machines */
+ if (!(cpu->features2 & CPU_FEATURE2_APIC))
+ cpu_panic_on_missing_feature("apic");
+}
+
+void
+cpu_info(const struct cpu *cpu)
+{
+ printk("cpu%u: %s, type %u, family %u, model %u, stepping %u\n",
+ cpu->id, cpu->vendor_id, cpu->type, cpu->family, cpu->model,
+ cpu->stepping);
+
+ if (strlen(cpu->model_name) > 0)
+ printk("cpu%u: %s\n", cpu->id, cpu->model_name);
+}
+
+void __init
+cpu_mp_register_lapic(unsigned int apic_id, int is_bsp)
+{
+ if (is_bsp) {
+ if (cpu_array[0].apic_id != CPU_INVALID_APIC_ID)
+ panic("cpu: another processor pretends to be the BSP");
+
+ cpu_array[0].apic_id = apic_id;
+ return;
+ }
+
+ if (cpu_array_size == ARRAY_SIZE(cpu_array)) {
+ printk("cpu: ignoring processor beyond id %u\n", MAX_CPUS - 1);
+ return;
+ }
+
+ cpu_array[cpu_array_size].apic_id = apic_id;
+ cpu_array_size++;
+}
+
+static void __init
+cpu_mp_start_aps(void)
+{
+ uint16_t reset_vector[2];
+ struct cpu *cpu;
+ void *ptr;
+ unsigned long map_addr;
+ size_t map_size;
+ unsigned int i;
+
+ if (cpu_array_size == 1)
+ return;
+
+ assert(BOOT_MP_TRAMPOLINE_ADDR < BIOSMEM_BASE);
+ assert(vm_page_aligned(BOOT_MP_TRAMPOLINE_ADDR));
+ assert(boot_ap_size <= PAGE_SIZE);
+
+ /* Set up the AP trampoline code */
+ ptr = vm_kmem_map_pa(BOOT_MP_TRAMPOLINE_ADDR, boot_ap_size,
+ &map_addr, &map_size);
+
+ if (ptr == NULL)
+ panic("cpu: unable to map trampoline area in kernel map");
+
+ memcpy(ptr, boot_ap_start, boot_ap_size);
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ /* Set up the warm reset vector */
+ reset_vector[0] = 0;
+ reset_vector[1] = BOOT_MP_TRAMPOLINE_ADDR >> 4;
+ ptr = vm_kmem_map_pa(CPU_MP_CMOS_RESET_VECTOR, sizeof(reset_vector),
+ &map_addr, &map_size);
+
+ if (ptr == NULL)
+ panic("cpu: unable to map warm reset vector in kernel map");
+
+ memcpy(ptr, reset_vector, sizeof(reset_vector));
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ io_write_byte(CPU_MP_CMOS_PORT_REG, CPU_MP_CMOS_REG_RESET);
+ io_write_byte(CPU_MP_CMOS_PORT_DATA, CPU_MP_CMOS_DATA_RESET_WARM);
+
+ /* Perform the "Universal Start-up Algorithm" */
+ for (i = 1; i < cpu_array_size; i++) {
+ cpu = &cpu_array[i];
+
+ cpu->boot_stack = vm_kmem_alloc(BOOT_STACK_SIZE);
+
+ if (cpu->boot_stack == 0)
+ panic("unable to allocate boot stack for cpu%u", i);
+
+ init_ap_id = i;
+ init_ap_boot_stack = cpu->boot_stack;
+
+ lapic_ipi_init_assert(cpu->apic_id);
+ cpu_delay(200);
+ lapic_ipi_init_deassert(cpu->apic_id);
+ cpu_delay(10000);
+ lapic_ipi_startup(cpu->apic_id, BOOT_MP_TRAMPOLINE_ADDR >> 12);
+ cpu_delay(200);
+ lapic_ipi_startup(cpu->apic_id, BOOT_MP_TRAMPOLINE_ADDR >> 12);
+ cpu_delay(200);
+
+ while (cpu->state == CPU_STATE_OFF)
+ cpu_pause();
+ }
+}
+
+static void __init
+cpu_mp_info(void)
+{
+ printk("cpu: %u processors configured\n", cpu_array_size);
+}
+
+static int __init
+cpu_mp_probe(void)
+{
+ int error;
+
+ error = acpimp_setup();
+
+ if (!error)
+ return 0;
+
+ error = mps_setup();
+
+ if (!error)
+ return 0;
+
+ return -1;
+}
+
+void __init
+cpu_mp_setup(void)
+{
+ int error;
+
+ error = cpu_mp_probe();
+
+ if (error)
+ return;
+
+ cpu_mp_start_aps();
+ cpu_mp_info();
+}
+
+void __init
+cpu_ap_setup(void)
+{
+ cpu_init(&cpu_array[init_ap_id]);
+ cpu_check(cpu_current());
+ lapic_ap_setup();
+}
diff --git a/arch/i386/machine/cpu.h b/arch/i386/machine/cpu.h
new file mode 100644
index 00000000..13d72f41
--- /dev/null
+++ b/arch/i386/machine/cpu.h
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_CPU_H
+#define _I386_CPU_H
+
+/*
+ * GDT entry indexes and size.
+ */
+#define CPU_GDT_NULL_IDX 0
+#define CPU_GDT_CPU_IDX 1
+#define CPU_GDT_CODE_IDX 2
+#define CPU_GDT_DATA_IDX 3
+#define CPU_GDT_SIZE 4
+
+/*
+ * Convert a GDT index into a selector.
+ */
+#define CPU_GDT_SELECTOR(index) ((index) * 8)
+
+/*
+ * Control register 0 flags.
+ */
+#define CPU_CR0_PE 0x00000001
+#define CPU_CR0_MP 0x00000002
+#define CPU_CR0_TS 0x00000008
+#define CPU_CR0_ET 0x00000010
+#define CPU_CR0_NE 0x00000020
+#define CPU_CR0_WP 0x00010000
+#define CPU_CR0_AM 0x00040000
+#define CPU_CR0_PG 0x80000000
+
+/*
+ * Control register 4 flags.
+ */
+#define CPU_CR4_PAE 0x00000020
+#define CPU_CR4_PGE 0x00000080
+
+/*
+ * EFLAGS register flags.
+ */
+#define CPU_EFL_ONE 0x00000002
+#define CPU_EFL_IF 0x00000200
+
+/*
+ * Flags in the feature2 member.
+ */
+#define CPU_FEATURE2_FPU 0x00000001
+#define CPU_FEATURE2_MSR 0x00000020
+#define CPU_FEATURE2_APIC 0x00000200
+#define CPU_FEATURE2_PGE 0x00002000
+
+#ifndef __ASSEMBLY__
+
+#include <kern/param.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <machine/pit.h>
+
+#define CPU_VENDOR_ID_SIZE 13
+#define CPU_MODEL_NAME_SIZE 49
+
+struct cpu_pseudo_desc {
+ uint16_t limit;
+ uint32_t address;
+} __packed;
+
+/*
+ * Gate/segment descriptor bits and masks.
+ */
+#define CPU_DESC_TYPE_DATA_WRITEABLE 0x00000200
+#define CPU_DESC_TYPE_CODE_READABLE 0x00000a00
+#define CPU_DESC_TYPE_GATE_INTR 0x00000e00
+#define CPU_DESC_TYPE_GATE_TRAP 0x00000f00
+#define CPU_DESC_TYPE_MASK 0x00000f00
+#define CPU_DESC_S_CODE_DATA 0x00001000
+#define CPU_DESC_S_MASK 0x00001000
+#define CPU_DESC_PL_SYSTEM 0x00000000
+#define CPU_DESC_PL_MASK 0x00006000
+#define CPU_DESC_PRESENT 0x00008000
+#define CPU_DESC_DB32 0x00400000
+#define CPU_DESC_GRAN_4KB 0x00800000
+#define CPU_DESC_GRAN_MASK 0x00800000
+
+#define CPU_DESC_GATE_OFFSET_LOW_MASK 0x0000ffff
+#define CPU_DESC_GATE_OFFSET_HIGH_MASK 0xffff0000
+#define CPU_DESC_SEG_BASE_LOW_MASK 0x0000ffff
+#define CPU_DESC_SEG_BASE_MID_MASK 0x00ff0000
+#define CPU_DESC_SEG_BASE_HIGH_MASK 0xff000000
+#define CPU_DESC_SEG_LIMIT_LOW_MASK 0x0000ffff
+#define CPU_DESC_SEG_LIMIT_HIGH_MASK 0x000f0000
+
+/*
+ * Gate descriptor.
+ */
+struct cpu_gate_desc {
+ uint32_t low;
+ uint32_t high;
+} __packed;
+
+/*
+ * Segment descriptor.
+ */
+struct cpu_seg_desc {
+ uint32_t low;
+ uint32_t high;
+} __packed;
+
+/*
+ * CPU states.
+ */
+#define CPU_STATE_OFF 0
+#define CPU_STATE_ON 1
+
+/*
+ * The fs segment register is used to store the address of the per-cpu data.
+ * As a result, they must be at least 16-bytes aligned.
+ */
+#define CPU_ALIGN (MAX(16, CPU_L1_SIZE))
+
+struct cpu {
+ struct cpu *self;
+ unsigned int id;
+ unsigned int apic_id;
+ char vendor_id[CPU_VENDOR_ID_SIZE];
+ char model_name[CPU_MODEL_NAME_SIZE];
+ unsigned int type;
+ unsigned int family;
+ unsigned int model;
+ unsigned int stepping;
+ unsigned int clflush_size;
+ unsigned int initial_apic_id;
+ unsigned int features1;
+ unsigned int features2;
+ struct cpu_seg_desc gdt[CPU_GDT_SIZE] __aligned(8);
+ volatile int state;
+ unsigned long boot_stack;
+} __aligned(CPU_ALIGN);
+
+extern struct cpu cpu_array[MAX_CPUS];
+
+/*
+ * Macro to create functions that read/write registers.
+ */
+#define CPU_DECL_GETSET_REGISTER(name) \
+static __always_inline unsigned long \
+cpu_get_ ## name(void) \
+{ \
+ unsigned long name; \
+ \
+ asm volatile("mov %%" XQUOTE(name) ", %0" : "=r" (name)); \
+ return name; \
+} \
+ \
+static __always_inline void \
+cpu_set_ ## name(unsigned long value) \
+{ \
+ asm volatile("mov %0, %%" XQUOTE(name) : : "r" (value)); \
+}
+
+/*
+ * Access to the processor control registers. CR1 is reserved.
+ */
+CPU_DECL_GETSET_REGISTER(cr0)
+CPU_DECL_GETSET_REGISTER(cr2)
+CPU_DECL_GETSET_REGISTER(cr3)
+CPU_DECL_GETSET_REGISTER(cr4)
+
+/*
+ * Flush the whole TLB.
+ */
+static __always_inline void
+cpu_tlb_flush(void)
+{
+ cpu_set_cr3(cpu_get_cr3());
+}
+
+/*
+ * Flush a single page table entry in the TLB. In some cases, the entire TLB
+ * can be flushed by this instruction. The va parameter is a virtual
+ * address in the page described by the PTE to flush.
+ */
+static __always_inline void
+cpu_tlb_flush_va(unsigned long va)
+{
+ asm volatile("invlpg (%0)" : : "r" (va) : "memory");
+}
+
+/*
+ * Return the content of the EFLAGS register.
+ */
+static __always_inline unsigned long
+cpu_get_flags(void)
+{
+ unsigned long eflags;
+
+ asm volatile("pushf\n"
+ "popl %0\n"
+ : "=r" (eflags));
+
+ return eflags;
+}
+
+/*
+ * Enable local interrupts.
+ */
+static __always_inline void
+cpu_intr_enable(void)
+{
+ asm volatile("sti");
+}
+
+/*
+ * Disable local interrupts.
+ */
+static __always_inline void
+cpu_intr_disable(void)
+{
+ asm volatile("cli");
+}
+
+/*
+ * Restore the content of the EFLAGS register, possibly enabling interrupts.
+ */
+static __always_inline void
+cpu_intr_restore(unsigned long eflags)
+{
+ asm volatile("pushl %0\n"
+ "popf\n"
+ : : "r" (eflags));
+}
+
+/*
+ * Disable local interrupts, returning the previous content of the EFLAGS
+ * register.
+ */
+static __always_inline unsigned long
+cpu_intr_save(void)
+{
+ unsigned long eflags;
+
+ eflags = cpu_get_flags();
+ cpu_intr_disable();
+
+ return eflags;
+}
+
+/*
+ * Return true if interrupts are enabled.
+ */
+static __always_inline int
+cpu_intr_enabled(void)
+{
+ unsigned long eflags;
+
+ eflags = cpu_get_flags();
+ return (eflags & CPU_EFL_IF) ? 1 : 0;
+}
+
+/*
+ * Spin-wait loop hint.
+ */
+static __always_inline void
+cpu_pause(void)
+{
+ asm volatile("pause");
+}
+
+/*
+ * Make the CPU idle until the next interrupt.
+ */
+static __always_inline void
+cpu_idle(void)
+{
+ asm volatile("hlt");
+}
+
+/*
+ * Halt the CPU.
+ */
+static __noreturn __always_inline void
+cpu_halt(void)
+{
+ cpu_intr_disable();
+
+ for (;;)
+ cpu_idle();
+}
+
+static __always_inline struct cpu *
+cpu_current(void)
+{
+ struct cpu *cpu;
+
+ asm volatile("movl %%fs:%1, %0"
+ : "=r" (cpu)
+ : "m" (*(struct cpu *)offsetof(struct cpu, self)));
+ return cpu;
+}
+
+static __always_inline unsigned int
+cpu_id(void)
+{
+ return cpu_current()->id;
+}
+
+static __always_inline void
+cpu_enable_pae(void)
+{
+ cpu_set_cr4(cpu_get_cr4() | CPU_CR4_PAE);
+}
+
+static __always_inline int
+cpu_has_global_pages(void)
+{
+ return cpu_current()->features2 & CPU_FEATURE2_PGE;
+}
+
+static __always_inline void
+cpu_enable_global_pages(void)
+{
+ cpu_set_cr4(cpu_get_cr4() | CPU_CR4_PGE);
+}
+
+static __always_inline void
+cpu_get_msr(uint32_t msr, uint32_t *low, uint32_t *high)
+{
+ asm volatile("rdmsr" : "=a" (*low), "=d" (*high) : "c" (msr));
+}
+
+static __always_inline void
+cpu_set_msr(uint32_t msr, uint32_t low, uint32_t high)
+{
+ asm volatile("wrmsr" : : "c" (msr), "a" (low), "d" (high));
+}
+
+/*
+ * XXX For now, directly use the PIT.
+ */
+static __always_inline void
+cpu_delay(unsigned long usecs)
+{
+ pit_delay(usecs);
+}
+
+/*
+ * Set the given GDT for the current processor, and reload its segment
+ * registers.
+ */
+void cpu_load_gdt(struct cpu_pseudo_desc *gdtr);
+
+/*
+ * Set up the cpu module.
+ */
+void cpu_setup(void);
+
+/*
+ * Make sure the CPU has some required features.
+ */
+void cpu_check(const struct cpu *cpu);
+
+/*
+ * Display processor information.
+ */
+void cpu_info(const struct cpu *cpu);
+
+/*
+ * Register the presence of a local APIC.
+ */
+void cpu_mp_register_lapic(unsigned int apic_id, int is_bsp);
+
+/*
+ * Probe application processors and start them.
+ */
+void cpu_mp_setup(void);
+
+/*
+ * AP-specific functions.
+ */
+void cpu_ap_setup(void);
+
+/*
+ * Trap functions.
+ */
+void cpu_trap_default(void);
+void cpu_trap_lapic_timer_intr(void);
+void cpu_trap_lapic_spurious_intr(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _I386_CPU_H */
diff --git a/arch/i386/machine/cpu_asm.S b/arch/i386/machine/cpu_asm.S
new file mode 100644
index 00000000..373ca8e2
--- /dev/null
+++ b/arch/i386/machine/cpu_asm.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define __ASSEMBLY__
+
+#include <machine/asm.h>
+#include <machine/cpu.h>
+
+.text
+
+ENTRY(cpu_load_gdt)
+ movl 4(%esp), %eax
+ lgdt (%eax)
+
+ movl $(CPU_GDT_SELECTOR(CPU_GDT_DATA_IDX)), %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ xorl %eax, %eax
+ movl %eax, %gs
+
+ movl $(CPU_GDT_SELECTOR(CPU_GDT_CPU_IDX)), %eax
+ movl %eax, %fs
+
+ /* Alter the stack to reload the code segment using a far return */
+ popl %eax
+ pushl $(CPU_GDT_SELECTOR(CPU_GDT_CODE_IDX))
+ pushl %eax
+ lret
+END(cpu_load_gdt)
+
+/*
+ * TODO: traps handling.
+ */
+ENTRY(cpu_trap_default)
+ iret
+END(cpu_trap_default)
+
+ENTRY(cpu_trap_lapic_timer_intr)
+ call lapic_timer_intr
+ iret
+END(cpu_trap_lapic_timer_intr)
+
+ENTRY(cpu_trap_lapic_spurious_intr)
+ iret
+END(cpu_trap_lapic_spurious_intr)
diff --git a/arch/i386/machine/init.c b/arch/i386/machine/init.c
new file mode 100644
index 00000000..2ee81035
--- /dev/null
+++ b/arch/i386/machine/init.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/kernel.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/biosmem.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+#include <machine/init.h>
+#include <machine/multiboot.h>
+#include <machine/pit.h>
+#include <machine/pmap.h>
+#include <machine/vga.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_setup.h>
+
+/*
+ * Macros used by the very early panic function.
+ */
+#define INIT_VGAMEM ((uint16_t *)0xb8000)
+#define INIT_VGACHARS (80 * 25)
+#define INIT_VGACOLOR 0x7
+
+char init_stack[BOOT_STACK_SIZE] __initdata;
+char init_ap_stack[BOOT_STACK_SIZE] __initdata;
+unsigned long init_ap_id __initdata;
+unsigned long init_ap_boot_stack __initdata;
+
+/*
+ * Copy of the multiboot data passed by the boot loader.
+ */
+static struct multiboot_info init_mbi __initdata;
+
+void __boot
+init_panic(const char *msg)
+{
+ uint16_t *ptr, *end;
+ const char *s;
+
+ ptr = INIT_VGAMEM;
+ end = ptr + INIT_VGACHARS;
+
+ s = (const char *)BOOT_ADDR_VTOP("boot panic: ");
+
+ while ((ptr < end) && (*s != '\0'))
+ *ptr++ = (INIT_VGACOLOR << 8) | *s++;
+
+ s = (const char *)BOOT_ADDR_VTOP(msg);
+
+ while ((ptr < end) && (*s != '\0'))
+ *ptr++ = (INIT_VGACOLOR << 8) | *s++;
+
+ while (ptr < end)
+ *ptr++ = (INIT_VGACOLOR << 8) | ' ';
+
+ cpu_halt();
+
+ /* Never reached */
+}
+
+pmap_pte_t * __boot
+init_paging(uint32_t eax, const struct multiboot_info *mbi)
+{
+ pmap_pte_t *pdir, *ptps, *pte, *id_pte;
+ unsigned long i, nr_pages, nr_ptps, kern_start, kern_end;
+
+ if (eax != MULTIBOOT_LOADER_MAGIC)
+ init_panic("not started by a multiboot compliant boot loader");
+
+ if (!(mbi->flags & MULTIBOOT_LOADER_MEMORY))
+ init_panic("missing basic memory information");
+
+ /*
+ * Save the multiboot data passed by the boot loader and initialize the
+ * bootstrap allocator.
+ */
+ BOOT_VTOP(init_mbi) = *mbi;
+ biosmem_bootstrap(&BOOT_VTOP(init_mbi));
+
+ /*
+ * Create the kernel virtual mapping. Two mappings are actually established,
+ * using the same PTPs: a direct physical mapping, where virtual and
+ * physical addresses are identical (the identity mapping), and the true
+ * kernel mapping at KERNEL_OFFSET. The identity mapping is required to
+ * avoid a fault directly after paging is enabled. In addition, a few pages
+ * are reserved immediately after the kernel for the pmap module.
+ *
+ * While only the kernel is mapped, the PTPs are initialized so that all
+ * memory from KERNEL_OFFSET up to the pmap reserved pages can be mapped,
+ * which is required by pmap_growkernel().
+ */
+
+ /* Allocate the PTPs */
+ kern_end = BOOT_ADDR_VTOP(&_end);
+ nr_pages = (kern_end / PAGE_SIZE) + PMAP_RESERVED_PAGES;
+ nr_ptps = P2ROUND(nr_pages, PMAP_PTE_PER_PT) / PMAP_PTE_PER_PT;
+ ptps = biosmem_bootalloc(nr_ptps);
+
+ /* Insert the PTPs in the page directory */
+ pdir = (pmap_pte_t *)BOOT_ADDR_VTOP(pmap_kpdir);
+ pte = pdir + (KERNEL_OFFSET >> PMAP_PDE_SHIFT);
+ id_pte = pdir;
+
+ for (i = 0; i < nr_ptps; i++) {
+ *pte = ((unsigned long)ptps + (i * PAGE_SIZE))
+ | PMAP_PTE_WRITE | PMAP_PTE_PRESENT;
+ *id_pte++ = *pte++;
+ }
+
+ /* Map the kernel */
+ kern_start = (unsigned long)&_boot;
+
+ for (i = kern_start; i < kern_end; i += PAGE_SIZE)
+ ptps[vm_page_atop(i)] = i | PMAP_PTE_WRITE | PMAP_PTE_PRESENT;
+
+#ifdef PAE
+ pte = (pmap_pte_t *)BOOT_ADDR_VTOP(pmap_kpdpt);
+
+ for (i = 0; i < PMAP_NR_PDT; i++)
+ pte[i] = ((unsigned long)pdir + (i * PAGE_SIZE)) | PMAP_PTE_PRESENT;
+
+ cpu_enable_pae();
+
+ return pte;
+#else /* PAE */
+ return pdir;
+#endif /* PAE */
+}
+
+pmap_pte_t * __boot
+init_ap_paging(void)
+{
+#ifdef PAE
+ cpu_enable_pae();
+ return (pmap_pte_t *)BOOT_ADDR_VTOP(pmap_kpdpt);
+#else /* PAE */
+ return (pmap_pte_t *)BOOT_ADDR_VTOP(pmap_kpdir);
+#endif /* PAE */
+}
+
+/*
+ * Copy physical memory into a kernel allocated buffer.
+ */
+static void * __init
+init_save_boot_data_copy(const void *ptr, size_t size)
+{
+ unsigned long map_addr;
+ size_t map_size;
+ const void *src;
+ void *copy;
+
+ src = vm_kmem_map_pa((unsigned long)ptr, size, &map_addr, &map_size);
+
+ if (src == NULL)
+ panic("unable to map boot data in kernel map");
+
+ copy = kmem_alloc(size);
+
+ if (copy == NULL)
+ panic("unable to allocate memory for boot data copy");
+
+ memcpy(copy, src, size);
+ vm_kmem_unmap_pa(map_addr, map_size);
+ return copy;
+}
+
+/*
+ * Copy boot data in kernel allocated memory.
+ *
+ * At this point, the only required boot data are the modules and the command
+ * line strings. Once the boot data are managed as kernel buffers, their
+ * backing pages can be freed.
+ *
+ * TODO Handle more boot data such as debugging symbols.
+ */
+static void __init
+init_save_boot_data(void)
+{
+ uint32_t i;
+
+ if (init_mbi.flags & MULTIBOOT_LOADER_CMDLINE)
+ init_mbi.cmdline = init_save_boot_data_copy(init_mbi.cmdline,
+ init_mbi.unused0);
+ else
+ init_mbi.cmdline = NULL;
+
+ if (init_mbi.flags & MULTIBOOT_LOADER_MODULES) {
+ struct multiboot_module *mod;
+ size_t size;
+
+ size = init_mbi.mods_count * sizeof(struct multiboot_module);
+ init_mbi.mods_addr = init_save_boot_data_copy(init_mbi.mods_addr, size);
+
+ for (i = 0; i < init_mbi.mods_count; i++) {
+ mod = &init_mbi.mods_addr[i];
+ size = mod->mod_end - mod->mod_start;
+ mod->mod_start = init_save_boot_data_copy(mod->mod_start, size);
+ mod->mod_end = mod->mod_start + size;
+
+ if (mod->string != NULL)
+ mod->string = init_save_boot_data_copy(mod->string,
+ mod->reserved);
+ }
+ } else {
+ init_mbi.mods_count = 0;
+ init_mbi.mods_addr = NULL;
+ }
+}
+
+void __init
+init(void)
+{
+ cpu_setup();
+ pmap_bootstrap();
+ vga_setup();
+ kernel_show_banner();
+ cpu_check(cpu_current());
+ cpu_info(cpu_current());
+ biosmem_setup();
+ vm_setup();
+ init_save_boot_data();
+ biosmem_free_usable();
+ vm_phys_info();
+ pit_setup();
+ cpu_mp_setup();
+ kernel_main();
+
+ /* Never reached */
+}
+
+void __init
+init_ap(void)
+{
+ cpu_ap_setup();
+ cpu_info(cpu_current());
+
+ cpu_intr_enable();
+
+ for (;;)
+ cpu_idle();
+
+ /* Never reached */
+}
diff --git a/arch/i386/machine/init.h b/arch/i386/machine/init.h
new file mode 100644
index 00000000..8b5d6938
--- /dev/null
+++ b/arch/i386/machine/init.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_INIT_H
+#define _I386_INIT_H
+
+#include <lib/macros.h>
+#include <lib/stdint.h>
+#include <machine/boot.h>
+#include <machine/multiboot.h>
+#include <machine/pmap.h>
+
+/*
+ * Stack used to bootstrap the kernel.
+ */
+extern char init_stack[BOOT_STACK_SIZE];
+
+/*
+ * Common stack used by APs to bootstrap.
+ */
+extern char init_ap_stack[BOOT_STACK_SIZE];
+
+/*
+ * This variable contains the CPU ID of an AP during its early boot.
+ */
+extern unsigned long init_ap_id;
+
+/*
+ * After its early boot, an AP enables paging and jumps to virtual
+ * addresses. At this point, it switches to a per-AP preallocated
+ * stack. This variable contains the (virtual) address of that stack.
+ */
+extern unsigned long init_ap_boot_stack;
+
+/*
+ * Print the given message and halt the system immediately.
+ *
+ * This function allows early initialization code to print something helpful
+ * before printk is available.
+ */
+void __noreturn init_panic(const char *s);
+
+/*
+ * This function is called by the bootstrap code before paging is enabled.
+ * It establishes a direct mapping of the kernel at virtual addresses and
+ * returns the physical address of the page directory. It is up to the
+ * caller to actually enable paging.
+ */
+pmap_pte_t * init_paging(uint32_t eax, const struct multiboot_info *mbi);
+
+/*
+ * This function is called by the AP bootstrap code before paging is enabled.
+ * It merely returns the physical address of the already existing kernel page
+ * directory.
+ */
+pmap_pte_t * init_ap_paging(void);
+
+/*
+ * Main entry point, called directly after basic paging is initialized.
+ */
+void init(void);
+
+/*
+ * Entry point for APs.
+ */
+void init_ap(void);
+
+#endif /* _I386_INIT_H */
diff --git a/arch/i386/machine/io.h b/arch/i386/machine/io.h
new file mode 100644
index 00000000..66a53561
--- /dev/null
+++ b/arch/i386/machine/io.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_IO_H
+#define _I386_IO_H
+
+#include <lib/stdint.h>
+
+/*
+ * Read a byte from an I/O port.
+ */
+static inline uint8_t
+io_read_byte(uint16_t port)
+{
+ uint8_t value;
+
+ asm volatile("inb %%dx, %%al" : "=a" (value) : "d" (port));
+ return value;
+}
+
+/*
+ * Write a byte to an I/O port.
+ */
+static inline void
+io_write_byte(uint16_t port, uint8_t value)
+{
+ asm volatile("outb %%al, %%dx" : : "d" (port), "a" (value));
+}
+
+#endif /* _I386_IO_H */
diff --git a/arch/i386/machine/lapic.c b/arch/i386/machine/lapic.c
new file mode 100644
index 00000000..1a1236af
--- /dev/null
+++ b/arch/i386/machine/lapic.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <lib/macros.h>
+#include <lib/stdint.h>
+#include <machine/cpu.h>
+#include <machine/lapic.h>
+#include <machine/pmap.h>
+#include <machine/trap.h>
+#include <vm/vm_kmem.h>
+
+/*
+ * Mask used to check that local APICS are internal.
+ */
+#define LAPIC_VERSION_MASK 0x10
+
+/*
+ * Common bits for registers in the local vector table.
+ */
+#define LAPIC_LVT_DELIVERY_FIXED 0x00000000
+#define LAPIC_LVT_DELIVERY_NMI 0x00000400
+#define LAPIC_LVT_DELIVERY_EXTINT 0x00000700
+#define LAPIC_LVT_MASK_INTR 0x00010000
+
+/*
+ * LVT timer register bits.
+ */
+#define LAPIC_LVT_TIMER_PERIODIC 0x00020000
+
+/*
+ * Various values related to the local APIC timer.
+ */
+#define LAPIC_TIMER_DCR_DIV1 0x0000000b
+#define LAPIC_TIMER_COUNT_MAX 0xffffffff
+
+/*
+ * The value of this duration (in microseconds) must be carefully set.
+ * It must divide a second (1000000) without loss of precision. It is
+ * recommended to use either 1s or 100ms. The former gives the best
+ * results, as it renders the time used for accounting operations
+ * negligible, but is slightly longer.
+ */
+#define LAPIC_TIMER_CAL_DELAY 1000000
+
+/*
+ * Spurious-interrupt vector register bits.
+ */
+#define LAPIC_SVR_SOFT_EN 0x00000100
+
+/*
+ * Interrupt command register (lower word) bits.
+ */
+#define LAPIC_ICR_VECTOR_MASK 0x000000ff
+#define LAPIC_ICR_DELIVERY_INIT 0x00000500
+#define LAPIC_ICR_DELIVERY_STARTUP 0x00000600
+#define LAPIC_ICR_STATUS_PENDING 0x00001000
+#define LAPIC_ICR_LEVEL_ASSERT 0x00004000
+#define LAPIC_ICR_TRIGGER_LEVEL 0x00008000
+#define LAPIC_ICR_DEST_SELF 0x00040000
+#define LAPIC_ICR_DEST_ALL_WITH_SELF 0x00080000
+#define LAPIC_ICR_DEST_ALL_EXCEPT_SELF 0x000c0000
+#define LAPIC_ICR_DEST_MASK 0x000c0000
+#define LAPIC_ICR_RESERVED 0xfff32000
+
+/*
+ * ICR destination shift and mask.
+ */
+#define LAPIC_DEST_SHIFT 24
+#define LAPIC_DEST_MASK 0xff000000
+
+/*
+ * Local APIC registers are accessed with 32-bits loads/stores aligned on
+ * 128 bits.
+ */
+struct lapic_register {
+ uint32_t reg;
+ uint32_t reserved0;
+ uint32_t reserved1;
+ uint32_t reserved2;
+} __packed;
+
+/*
+ * Local APIC register map.
+ */
+struct lapic_map {
+ const struct lapic_register reserved0;
+ const struct lapic_register reserved1;
+
+ /*
+ * Some processors don't allow writing to this register, and the
+ * specification explicitely discourages modifications. Consider it
+ * read only.
+ */
+ const struct lapic_register id;
+ const struct lapic_register version;
+ const struct lapic_register reserved2;
+ const struct lapic_register reserved3;
+ const struct lapic_register reserved4;
+ const struct lapic_register reserved5;
+ struct lapic_register tpr;
+ const struct lapic_register reserved6; /* APR */
+ const struct lapic_register ppr;
+ struct lapic_register eoi;
+ const struct lapic_register reserved7; /* RRD */
+ struct lapic_register ldr;
+ struct lapic_register dfr;
+ struct lapic_register svr;
+ const struct lapic_register isr0;
+ const struct lapic_register isr1;
+ const struct lapic_register isr2;
+ const struct lapic_register isr3;
+ const struct lapic_register isr4;
+ const struct lapic_register isr5;
+ const struct lapic_register isr6;
+ const struct lapic_register isr7;
+ const struct lapic_register tmr0;
+ const struct lapic_register tmr1;
+ const struct lapic_register tmr2;
+ const struct lapic_register tmr3;
+ const struct lapic_register tmr4;
+ const struct lapic_register tmr5;
+ const struct lapic_register tmr6;
+ const struct lapic_register tmr7;
+ const struct lapic_register irr0;
+ const struct lapic_register irr1;
+ const struct lapic_register irr2;
+ const struct lapic_register irr3;
+ const struct lapic_register irr4;
+ const struct lapic_register irr5;
+ const struct lapic_register irr6;
+ const struct lapic_register irr7;
+ struct lapic_register esr;
+ const struct lapic_register reserved8;
+ const struct lapic_register reserved9;
+ const struct lapic_register reserved10;
+ const struct lapic_register reserved11;
+ const struct lapic_register reserved12;
+ const struct lapic_register reserved13;
+ struct lapic_register lvt_cmci;
+ struct lapic_register icr_low;
+ struct lapic_register icr_high;
+ struct lapic_register lvt_timer;
+ const struct lapic_register reserved14; /* Thermal sensor register */
+ const struct lapic_register reserved15; /* Performance counters register */
+ struct lapic_register lvt_lint0;
+ struct lapic_register lvt_lint1;
+ struct lapic_register lvt_error;
+ struct lapic_register timer_icr;
+ const struct lapic_register timer_ccr;
+ const struct lapic_register reserved16;
+ const struct lapic_register reserved17;
+ const struct lapic_register reserved18;
+ const struct lapic_register reserved19;
+ struct lapic_register timer_dcr;
+ const struct lapic_register reserved20;
+} __packed;
+
+/*
+ * Address where local APIC registers are mapped.
+ */
+static volatile struct lapic_map *lapic_map;
+
+/*
+ * Base frequency of the local APIC timer.
+ */
+static uint32_t lapic_bus_freq;
+
+static uint32_t
+lapic_read(const volatile struct lapic_register *r)
+{
+ return r->reg;
+}
+
+static void
+lapic_write(volatile struct lapic_register *r, uint32_t value)
+{
+ r->reg = value;
+}
+
+static void __init
+lapic_setup_timer(void)
+{
+ uint32_t c1, c2;
+
+ lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1);
+
+ /* The APIC timer counter should never wrap around here */
+ lapic_write(&lapic_map->timer_icr, LAPIC_TIMER_COUNT_MAX);
+ c1 = lapic_read(&lapic_map->timer_ccr);
+ cpu_delay(LAPIC_TIMER_CAL_DELAY);
+ c2 = lapic_read(&lapic_map->timer_ccr);
+ lapic_bus_freq = (c1 - c2) * (1000000 / LAPIC_TIMER_CAL_DELAY);
+ printk("lapic: bus frequency: %u.%02u MHz\n", lapic_bus_freq / 1000000,
+ lapic_bus_freq % 1000000);
+ lapic_write(&lapic_map->timer_icr, lapic_bus_freq / HZ);
+}
+
+static void
+lapic_eoi(void)
+{
+ lapic_write(&lapic_map->eoi, 0);
+}
+
+static void __init
+lapic_setup_registers(void)
+{
+ /*
+ * LVT mask bits can only be cleared when the local APIC is enabled.
+ */
+ lapic_write(&lapic_map->svr, LAPIC_SVR_SOFT_EN | T_APIC_SPURIOUS_INTR);
+ lapic_write(&lapic_map->tpr, 0);
+ lapic_write(&lapic_map->eoi, 0);
+ lapic_write(&lapic_map->esr, 0);
+ lapic_write(&lapic_map->lvt_timer, LAPIC_LVT_TIMER_PERIODIC
+ | T_APIC_TIMER_INTR);
+ lapic_write(&lapic_map->lvt_lint0, LAPIC_LVT_MASK_INTR);
+ lapic_write(&lapic_map->lvt_lint1, LAPIC_LVT_MASK_INTR);
+ lapic_write(&lapic_map->lvt_error, T_APIC_ERROR_INTR);
+ lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1);
+ lapic_write(&lapic_map->timer_icr, lapic_bus_freq / HZ);
+}
+
+void __init
+lapic_setup(uint32_t map_addr)
+{
+ uint32_t value;
+
+ lapic_map = vm_kmem_map_pa(map_addr, sizeof(*lapic_map), NULL, NULL);
+
+ if (lapic_map == NULL)
+ panic("lapic: unable to map registers in kernel map");
+
+ value = lapic_read(&lapic_map->version);
+
+ if ((value & LAPIC_VERSION_MASK) != LAPIC_VERSION_MASK)
+ panic("lapic: external local APIC not supported");
+
+ lapic_setup_registers();
+ lapic_setup_timer();
+}
+
+void __init
+lapic_ap_setup(void)
+{
+ lapic_setup_registers();
+}
+
+static void
+lapic_ipi(uint32_t dest, uint32_t icr)
+{
+ uint32_t value;
+
+ if ((icr & LAPIC_ICR_DEST_MASK) == 0) {
+ value = lapic_read(&lapic_map->icr_high);
+ value &= ~LAPIC_DEST_MASK;
+ value |= dest << LAPIC_DEST_SHIFT;
+ lapic_write(&lapic_map->icr_high, value);
+ }
+
+ value = lapic_read(&lapic_map->icr_low);
+ value &= LAPIC_ICR_RESERVED;
+ value |= icr;
+ lapic_write(&lapic_map->icr_low, value);
+}
+
+static void
+lapic_ipi_wait(void)
+{
+ uint32_t value;
+
+ do {
+ value = lapic_read(&lapic_map->icr_low);
+ cpu_pause();
+ } while (value & LAPIC_ICR_STATUS_PENDING);
+}
+
+void
+lapic_ipi_init_assert(uint32_t dest)
+{
+ lapic_ipi(dest, LAPIC_ICR_TRIGGER_LEVEL | LAPIC_ICR_LEVEL_ASSERT
+ | LAPIC_ICR_DELIVERY_INIT);
+ lapic_ipi_wait();
+}
+
+void
+lapic_ipi_init_deassert(uint32_t dest)
+{
+ lapic_ipi(dest, LAPIC_ICR_TRIGGER_LEVEL | LAPIC_ICR_DELIVERY_INIT);
+ lapic_ipi_wait();
+}
+
+void
+lapic_ipi_startup(uint32_t dest, uint32_t vector)
+{
+ lapic_ipi(dest, LAPIC_ICR_DELIVERY_STARTUP
+ | (vector & LAPIC_ICR_VECTOR_MASK));
+ lapic_ipi_wait();
+}
+
+void
+lapic_timer_intr(void)
+{
+ lapic_eoi();
+}
diff --git a/arch/i386/machine/lapic.h b/arch/i386/machine/lapic.h
new file mode 100644
index 00000000..5d5c82aa
--- /dev/null
+++ b/arch/i386/machine/lapic.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_LAPIC_H
+#define _I386_LAPIC_H
+
+#include <lib/stdint.h>
+
+/*
+ * Set up the lapic module.
+ */
+void lapic_setup(uint32_t map_addr);
+
+/*
+ * Set up the local APIC for an AP.
+ */
+void lapic_ap_setup(void);
+
+/*
+ * Functions used when initializing an AP.
+ */
+void lapic_ipi_init_assert(uint32_t dest);
+void lapic_ipi_init_deassert(uint32_t dest);
+void lapic_ipi_startup(uint32_t dest, uint32_t vector);
+
+/*
+ * Interrupt handlers.
+ */
+void lapic_timer_intr(void);
+
+#endif /* _I386_LAPIC_H */
diff --git a/arch/i386/machine/mps.c b/arch/i386/machine/mps.c
new file mode 100644
index 00000000..8a4a899d
--- /dev/null
+++ b/arch/i386/machine/mps.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/printk.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/biosmem.h>
+#include <machine/cpu.h>
+#include <machine/io.h>
+#include <machine/lapic.h>
+#include <machine/mps.h>
+#include <vm/vm_kmem.h>
+
+/*
+ * Alignment of the FPS.
+ */
+#define MPS_FPS_ALIGN 16
+
+/*
+ * Signature of the floating pointer structure.
+ */
+#define MPS_FPS_SIG "_MP_"
+
+/*
+ * Flag of the feature2 byte indicating the presence of the interrupt mode
+ * configuration register (IMCR).
+ */
+#define MPS_FPS_IMCR_PRESENT 0x80
+
+/*
+ * IMCR ports and values.
+ */
+#define MPS_IMCR_PORT_ADDR 0x22
+#define MPS_IMCR_PORT_DATA 0x23
+#define MPS_IMCR_SELECT 0x70
+#define MPS_IMCR_APIC_MODE 0x01
+
+struct mps_fps {
+ uint8_t signature[4];
+ uint32_t phys_addr;
+ uint8_t length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t conf_type;
+ uint8_t feature2;
+ uint8_t feature3;
+ uint8_t feature4;
+ uint8_t feature5;
+} __packed;
+
+/*
+ * Processor entry flags.
+ */
+#define MPS_PROC_EN 0x1
+#define MPS_PROC_BP 0x2
+
+struct mps_proc {
+ uint8_t type;
+ uint8_t lapic_id;
+ uint8_t lapic_version;
+ uint8_t cpu_flags;
+ uint32_t cpu_signature;
+ uint32_t feature_flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+} __packed;
+
+struct mps_bus {
+ uint8_t type;
+ uint8_t bus_id;
+ uint8_t bus_type[6];
+} __packed;
+
+struct mps_ioapic {
+ uint8_t type;
+ uint8_t id;
+ uint8_t version;
+ uint8_t flags;
+ uint32_t addr;
+} __packed;
+
+struct mps_intr {
+ uint8_t type;
+ uint8_t intr_type;
+ uint16_t intr_flags;
+ uint8_t src_bus_id;
+ uint8_t src_bus_irq;
+ uint8_t dst_apic_id;
+ uint8_t dst_apic_intr;
+} __packed;
+
+union mps_entry {
+ uint8_t type;
+ struct mps_proc proc;
+ struct mps_bus bus;
+ struct mps_ioapic ioapic;
+ struct mps_intr intr;
+} __packed;
+
+/*
+ * Signature of the MPS table.
+ */
+#define MPS_TABLE_SIG "PCMP"
+
+struct mps_cth {
+ uint8_t signature[4];
+ uint16_t base_table_length;
+ uint8_t spec_rev;
+ uint8_t checksum;
+ uint8_t oem_id[8];
+ uint8_t prod_id[12];
+ uint32_t oem_table_ptr;
+ uint16_t oem_table_size;
+ uint16_t entry_count;
+ uint32_t lapic_addr;
+ uint16_t ext_table_length;
+ uint8_t ext_table_checksum;
+ uint8_t reserved;
+ union mps_entry entries[0];
+} __packed;
+
+struct mps_iter {
+ const union mps_entry *entry;
+ uint16_t index;
+ uint16_t size;
+};
+
+#define mps_foreach(table, iter) \
+for (mps_iter_init(iter, table); mps_iter_valid(iter); mps_iter_next(iter))
+
+/*
+ * MPS table entry type codes.
+ */
+#define MPS_ENTRY_PROC 0
+#define MPS_ENTRY_BUS 1
+#define MPS_ENTRY_IOAPIC 2
+#define MPS_ENTRY_IOINTR 3
+#define MPS_ENTRY_LOCAL_INTR 4
+
+/*
+ * Array of entry type sizes.
+ *
+ * The entry type codes must match the indexes of their associated size.
+ */
+static const size_t mps_entry_sizes[] __initdata = {
+ sizeof(struct mps_proc),
+ sizeof(struct mps_bus),
+ sizeof(struct mps_ioapic),
+ sizeof(struct mps_intr),
+ sizeof(struct mps_intr)
+};
+
+static unsigned int __init
+mps_checksum(const void *ptr, size_t size)
+{
+ const uint8_t *bytes;
+ uint8_t checksum;
+ size_t i;
+
+ bytes = ptr;
+ checksum = 0;
+
+ for (i = 0; i < size; i++)
+ checksum += bytes[i];
+
+ return checksum;
+}
+
+static int __init
+mps_check_fps(const struct mps_fps *fps)
+{
+ unsigned int checksum;
+
+ if (memcmp(fps->signature, MPS_FPS_SIG, sizeof(fps->signature)) != 0)
+ return -1;
+
+ checksum = mps_checksum(fps, sizeof(*fps));
+
+ if (checksum != 0)
+ return -1;
+
+ return 0;
+}
+
+static int __init
+mps_get_fps(vm_phys_t start, size_t size, struct mps_fps *fps)
+{
+ const struct mps_fps *src;
+ unsigned long addr, end, map_addr;
+ size_t map_size;
+ int error;
+
+ assert(size > 0);
+ assert(P2ALIGNED(size, MPS_FPS_ALIGN));
+
+ if (!P2ALIGNED(start, MPS_FPS_ALIGN))
+ return -1;
+
+ addr = (unsigned long)vm_kmem_map_pa(start, size, &map_addr, &map_size);
+
+ if (addr == 0)
+ panic("mps: unable to map bios memory in kernel map");
+
+ for (end = addr + size; addr < end; addr += MPS_FPS_ALIGN) {
+ src = (const struct mps_fps *)addr;
+ error = mps_check_fps(src);
+
+ if (!error)
+ break;
+ }
+
+ if (!(addr < end)) {
+ error = -1;
+ goto out;
+ }
+
+ memcpy(fps, src, sizeof(*fps));
+ error = 0;
+
+out:
+ vm_kmem_unmap_pa(map_addr, map_size);
+ return error;
+}
+
+static int __init
+mps_find_fps(struct mps_fps *fps)
+{
+ const uint16_t *ptr;
+ unsigned long base, map_addr;
+ size_t map_size;
+ int error;
+
+ ptr = vm_kmem_map_pa(BIOSMEM_EBDA_PTR, sizeof(*ptr), &map_addr, &map_size);
+
+ if (ptr == NULL)
+ panic("mps: unable to map ebda pointer in kernel map");
+
+ base = *((const volatile uint16_t *)ptr);
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ if (base != 0)
+ base <<= 4;
+ else
+ base = BIOSMEM_BASE_END - 1024;
+
+ error = mps_get_fps(base, 1024, fps);
+
+ if (!error)
+ return 0;
+
+ error = mps_get_fps(BIOSMEM_ROM, BIOSMEM_END - BIOSMEM_ROM, fps);
+
+ if (!error)
+ return 0;
+
+ printk("mps: unable to find floating pointer structure\n");
+ return -1;
+}
+
+static struct mps_cth * __init
+mps_copy_table(const struct mps_fps *fps)
+{
+ const struct mps_cth *table;
+ struct mps_cth *copy;
+ unsigned long map_addr;
+ size_t size, map_size;
+ unsigned int checksum;
+
+ if (fps->phys_addr == 0) {
+ printk("mps: table doesn't exist");
+ return NULL;
+ }
+
+ table = vm_kmem_map_pa(fps->phys_addr, sizeof(*table),
+ &map_addr, &map_size);
+
+ if (table == NULL)
+ panic("mps: unable to map table header in kernel map");
+
+ size = ((const volatile struct mps_cth *)table)->base_table_length;
+ vm_kmem_unmap_pa(map_addr, map_size);
+
+ table = vm_kmem_map_pa(fps->phys_addr, size, &map_addr, &map_size);
+
+ if (table == NULL)
+ panic("mps: unable to map table in kernel map");
+
+ if (memcmp(table->signature, MPS_TABLE_SIG, sizeof(table->signature))
+ != 0) {
+ printk("mps: invalid table signature\n");
+ copy = NULL;
+ goto error;
+ }
+
+ checksum = mps_checksum(table, size);
+
+ if (checksum != 0) {
+ printk("mps: table checksum failed\n");
+ copy = NULL;
+ goto error;
+ }
+
+ copy = kmem_alloc(size);
+
+ if (copy == NULL)
+ panic("mps: unable to allocate memory for table copy");
+
+ memcpy(copy, table, size);
+
+error:
+ vm_kmem_unmap_pa(map_addr, map_size);
+ return copy;
+}
+
+static void __init
+mps_info(const struct mps_cth *table)
+{
+ printk("mps: spec revision: 1.%u, %.*s %.*s\n",
+ (unsigned int)table->spec_rev,
+ (int)sizeof(table->oem_id), table->oem_id,
+ (int)sizeof(table->prod_id), table->prod_id);
+}
+
+static void __init
+mps_set_intr_mode(const struct mps_fps *fps)
+{
+ uint8_t byte;
+
+ if (!(fps->feature2 & MPS_FPS_IMCR_PRESENT))
+ return;
+
+ /* Switch to symmetric I/O mode */
+ io_write_byte(MPS_IMCR_PORT_ADDR, MPS_IMCR_SELECT);
+ byte = io_read_byte(MPS_IMCR_PORT_DATA);
+ byte |= MPS_IMCR_APIC_MODE;
+ io_write_byte(MPS_IMCR_PORT_DATA, byte);
+}
+
+static void __init
+mps_iter_init(struct mps_iter *iter, const struct mps_cth *table)
+{
+ iter->entry = table->entries;
+ iter->index = 0;
+ iter->size = table->entry_count;
+}
+
+static int __init
+mps_iter_valid(const struct mps_iter *iter)
+{
+ return iter->index < iter->size;
+}
+
+static void __init
+mps_iter_next(struct mps_iter *iter)
+{
+ assert(iter->entry->type < ARRAY_SIZE(mps_entry_sizes));
+ iter->entry = (void *)iter->entry + mps_entry_sizes[iter->entry->type];
+ iter->index++;
+}
+
+static void __init
+mps_load_proc(const struct mps_proc *proc)
+{
+ if (!(proc->cpu_flags & MPS_PROC_EN))
+ return;
+
+ cpu_mp_register_lapic(proc->lapic_id, proc->cpu_flags & MPS_PROC_BP);
+}
+
+static void __init
+mps_load_table(const struct mps_cth *table)
+{
+ struct mps_iter iter;
+
+ lapic_setup(table->lapic_addr);
+
+ mps_foreach(table, &iter) {
+ switch (iter.entry->type) {
+ case MPS_ENTRY_PROC:
+ mps_load_proc(&iter.entry->proc);
+ break;
+ case MPS_ENTRY_BUS:
+ case MPS_ENTRY_IOAPIC:
+ case MPS_ENTRY_IOINTR:
+ case MPS_ENTRY_LOCAL_INTR:
+ break;
+ default:
+ panic("mps: invalid table entry type");
+ }
+ }
+}
+
+static void __init
+mps_free_table(struct mps_cth *table)
+{
+ kmem_free(table, table->base_table_length);
+}
+
+int __init
+mps_setup(void)
+{
+ struct mps_cth *table;
+ struct mps_fps fps;
+ int error;
+
+ error = mps_find_fps(&fps);
+
+ if (error)
+ return error;
+
+ if (fps.conf_type != 0)
+ panic("mps: default tables not implemented");
+ else {
+ table = mps_copy_table(&fps);
+
+ if (table == NULL)
+ return -1;
+ }
+
+ mps_info(table);
+ mps_set_intr_mode(&fps);
+ mps_load_table(table);
+ mps_free_table(table);
+ return 0;
+}
diff --git a/arch/i386/machine/mps.h b/arch/i386/machine/mps.h
new file mode 100644
index 00000000..304cec7d
--- /dev/null
+++ b/arch/i386/machine/mps.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Information gathering module, supporting the Intel MultiProcessor
+ * Specification v1.4.
+ */
+
+#ifndef _I386_MPS_H
+#define _I386_MPS_H
+
+/*
+ * Load multiprocessor information.
+ *
+ * Return 0 if successful (an error usually means hardware doesn't support
+ * the MPS).
+ */
+int mps_setup(void);
+
+#endif /* _I386_MPS_H */
diff --git a/arch/i386/machine/multiboot.h b/arch/i386/machine/multiboot.h
new file mode 100644
index 00000000..70e47cd7
--- /dev/null
+++ b/arch/i386/machine/multiboot.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_MULTIBOOT_H
+#define _I386_MULTIBOOT_H
+
+/*
+ * Magic number provided by the OS to the boot loader.
+ */
+#define MULTIBOOT_OS_MAGIC 0x1badb002
+
+/*
+ * Multiboot flags requesting services from the boot loader.
+ */
+#define MULTIBOOT_OS_MEMORY_INFO 0x2
+
+#define MULTIBOOT_OS_FLAGS MULTIBOOT_OS_MEMORY_INFO
+
+/*
+ * Magic number to identify a multiboot compliant boot loader.
+ */
+#define MULTIBOOT_LOADER_MAGIC 0x2badb002
+
+/*
+ * Multiboot flags set by the boot loader.
+ */
+#define MULTIBOOT_LOADER_MEMORY 0x01
+#define MULTIBOOT_LOADER_CMDLINE 0x04
+#define MULTIBOOT_LOADER_MODULES 0x08
+#define MULTIBOOT_LOADER_MMAP 0x40
+
+#ifndef __ASSEMBLY__
+
+#include <lib/macros.h>
+#include <lib/stdint.h>
+
+/*
+ * A multiboot module.
+ */
+struct multiboot_module {
+ void *mod_start;
+ void *mod_end;
+ char *string;
+ uint32_t reserved;
+} __packed;
+
+/*
+ * Memory map entry.
+ */
+struct multiboot_mmap_entry {
+ uint32_t size;
+ uint64_t base_addr;
+ uint64_t length;
+ uint32_t type;
+} __packed;
+
+/*
+ * Multiboot information structure to get data passed by the boot loader.
+ */
+struct multiboot_info {
+ uint32_t flags;
+ uint32_t mem_lower;
+ uint32_t mem_upper;
+ uint32_t unused0;
+ char *cmdline;
+ uint32_t mods_count;
+ struct multiboot_module *mods_addr;
+ uint32_t unused1[4];
+ uint32_t mmap_length;
+ void *mmap_addr;
+ uint32_t unused2[9];
+} __packed;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _I386_MULTIBOOT_H */
diff --git a/arch/i386/machine/param.h b/arch/i386/machine/param.h
new file mode 100644
index 00000000..caa16912
--- /dev/null
+++ b/arch/i386/machine/param.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_PARAM_H
+#define _I386_PARAM_H
+
+#include <machine/boot.h>
+
+#define __LITTLE_ENDIAN__
+
+/*
+ * L1 cache line size.
+ *
+ * XXX Use this value until processor selection is available.
+ */
+#define CPU_L1_SIZE 64
+
+/*
+ * System timer frequency.
+ */
+#define HZ 100
+
+/*
+ * 4 KiB virtual pages.
+ */
+#define PAGE_SHIFT 12
+
+/*
+ * User space boundaries.
+ */
+#define VM_MIN_ADDRESS 0UL
+#define VM_MAX_ADDRESS (unsigned long)KERNEL_OFFSET
+
+/*
+ * Kernel stack size for threads and interrupt handlers.
+ */
+#define STACK_SIZE 4096
+
+/*
+ * Size of a linear mapping of PTEs (see the pmap module).
+ */
+#ifdef PAE
+#define VM_PMAP_PTEMAP_SIZE 0x800000UL
+#else /* PAE */
+#define VM_PMAP_PTEMAP_SIZE 0x400000UL
+#endif /* PAE */
+
+/*
+ * Kernel space boundaries.
+ *
+ * Addresses beyond VM_MAX_KERNEL_ADDRESS are used for PTEs linear mappings.
+ * An area the size of such a mapping is reserved to avoid overflows.
+ *
+ * See the pmap module for more information.
+ */
+#define VM_MIN_KERNEL_ADDRESS VM_MAX_ADDRESS
+#define VM_MAX_KERNEL_ADDRESS (~(VM_PMAP_PTEMAP_SIZE * 2) + 1)
+
+/*
+ * Maximum number of physical segments.
+ */
+#ifdef PAE
+#define VM_MAX_PHYS_SEG 2
+#else /* PAE */
+#define VM_MAX_PHYS_SEG 1
+#endif /* PAE */
+
+/*
+ * Number of physical segment lists.
+ */
+#define VM_NR_PHYS_SEGLIST VM_MAX_PHYS_SEG
+
+/*
+ * Segment list priorities.
+ */
+#define VM_PHYS_SEGLIST_HIGHMEM 1
+#define VM_PHYS_SEGLIST_NORMAL 0
+
+/*
+ * Segment boundaries.
+ */
+#ifdef PAE
+#define VM_PHYS_NORMAL_LIMIT 0x100000000ULL
+#define VM_PHYS_HIGHMEM_LIMIT 0x1000000000ULL
+#else /* PAE */
+#define VM_PHYS_NORMAL_LIMIT 0xfffff000UL
+#endif /* PAE */
+
+/*
+ * Virtual space reserved for kernel map entries.
+ */
+#define VM_MAP_KENTRY_SIZE 0x800000UL
+
+#endif /* _I386_PARAM_H */
diff --git a/arch/i386/machine/pit.c b/arch/i386/machine/pit.c
new file mode 100644
index 00000000..e5113ceb
--- /dev/null
+++ b/arch/i386/machine/pit.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <lib/assert.h>
+#include <machine/io.h>
+#include <machine/pit.h>
+
+/*
+ * I/O ports.
+ */
+#define PIT_PORT_COUNTER0 0x40
+#define PIT_PORT_MODE 0x43
+
+/*
+ * Mode control register bits.
+ */
+#define PIT_MODE_LATCH 0x00
+#define PIT_MODE_RATE_GEN 0x04
+#define PIT_MODE_RW_LSB 0x10
+#define PIT_MODE_RW_MSB 0x20
+
+/*
+ * Native timer frequency.
+ */
+#define PIT_FREQ 1193182
+
+/*
+ * Maximum value of a counter.
+ */
+#define PIT_MAX_COUNT 0xffff
+
+void __init
+pit_setup(void)
+{
+ io_write_byte(PIT_PORT_MODE, PIT_MODE_RATE_GEN | PIT_MODE_RW_LSB
+ | PIT_MODE_RW_MSB);
+ io_write_byte(PIT_PORT_COUNTER0, PIT_MAX_COUNT & 0xff);
+ io_write_byte(PIT_PORT_COUNTER0, PIT_MAX_COUNT >> 8);
+}
+
+static unsigned int
+pit_read(void)
+{
+ unsigned int low, high;
+
+ io_write_byte(PIT_PORT_MODE, PIT_MODE_LATCH);
+ low = io_read_byte(PIT_PORT_COUNTER0);
+ high = io_read_byte(PIT_PORT_COUNTER0);
+ return (high << 8) | low;
+}
+
+void
+pit_delay(unsigned long usecs)
+{
+ long total, prev, count, diff;
+
+ assert(usecs != 0);
+
+ /* TODO Avoid 64-bits conversion if result is known not to overflow */
+ total = (long)(((long long)usecs * PIT_FREQ + 999999) / 1000000);
+ prev = pit_read();
+
+ do {
+ count = pit_read();
+ diff = prev - count;
+ prev = count;
+
+ if (diff < 0)
+ diff += PIT_MAX_COUNT;
+
+ total -= diff;
+ } while (total > 0);
+}
diff --git a/arch/i386/machine/pit.h b/arch/i386/machine/pit.h
new file mode 100644
index 00000000..26192e62
--- /dev/null
+++ b/arch/i386/machine/pit.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_PIT_H
+#define _I386_PIT_H
+
+/*
+ * Set up the i8253 Programmable Interval Timer.
+ */
+void pit_setup(void);
+
+/*
+ * Wait (without sleeping) until the specified amount of time has elapsed.
+ */
+void pit_delay(unsigned long usecs);
+
+#endif /* _I386_PIT_H */
diff --git a/arch/i386/machine/pmap.c b/arch/i386/machine/pmap.c
new file mode 100644
index 00000000..37c664ab
--- /dev/null
+++ b/arch/i386/machine/pmap.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+#include <lib/string.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+#include <machine/pmap.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Kernel page directory.
+ */
+pmap_pte_t pmap_kpdir[PMAP_NR_PDT * PMAP_PTE_PER_PT] __aligned(PAGE_SIZE);
+
+#ifdef PAE
+/*
+ * Kernel page directory pointer table.
+ */
+pmap_pte_t pmap_kpdpt[PMAP_NR_PDT] __aligned(sizeof(pmap_kpdpt));
+#endif /* PAE */
+
+/*
+ * Global symbols required by the pmap MI interface.
+ */
+static struct pmap kernel_pmap_store;
+struct pmap *kernel_pmap;
+unsigned long pmap_klimit;
+
+/*
+ * Reserved pages of virtual memory available for early allocation.
+ */
+static unsigned long pmap_boot_heap __initdata;
+
+/*
+ * Start of the available virtual kernel space, before the VM system is
+ * initialized.
+ */
+static unsigned long pmap_avail_start __initdata;
+
+/*
+ * Table used to convert machine-independent protection flags to
+ * machine-dependent PTE bits.
+ */
+static pmap_pte_t pmap_prot_conv_table[8];
+
+/*
+ * This variable is set to PMAP_PTE_GLOBAL if global pages are available.
+ */
+static pmap_pte_t pmap_pte_global;
+
+/*
+ * Address for temporary mappings of pages to zero.
+ */
+static unsigned long pmap_zero_va;
+
+static void __init
+pmap_setup_global_pages(void)
+{
+ pmap_pte_t *pde, *pde_end, *pte, *pte_end;
+
+ for (pde = PMAP_PDP_BASE, pde_end = pde + (PMAP_NR_PDT * PMAP_PTE_PER_PT);
+ pde < pde_end;
+ pde++) {
+ if (!(*pde & PMAP_PTE_PRESENT))
+ continue;
+
+ for (pte = PMAP_PTE_BASE + ((pde - PMAP_PDP_BASE) * PMAP_PTE_PER_PT),
+ pte_end = pte + PMAP_PTE_PER_PT;
+ pte < pte_end;
+ pte++) {
+ if (!(*pte & PMAP_PTE_PRESENT))
+ continue;
+
+ *pte |= PMAP_PTE_GLOBAL;
+ }
+ }
+
+ pmap_pte_global = PMAP_PTE_GLOBAL;
+ cpu_enable_global_pages();
+}
+
+void __init
+pmap_bootstrap(void)
+{
+ unsigned int i;
+
+ /*
+ * First, fill the protection conversion table.
+ */
+ pmap_prot_conv_table[VM_PROT_NONE] = 0;
+ pmap_prot_conv_table[VM_PROT_READ] = 0;
+ pmap_prot_conv_table[VM_PROT_WRITE] = PMAP_PTE_WRITE;
+ pmap_prot_conv_table[VM_PROT_WRITE | VM_PROT_READ] = PMAP_PTE_WRITE;
+ pmap_prot_conv_table[VM_PROT_EXECUTE] = 0;
+ pmap_prot_conv_table[VM_PROT_EXECUTE | VM_PROT_READ] = 0;
+ pmap_prot_conv_table[VM_PROT_EXECUTE | VM_PROT_WRITE] = PMAP_PTE_WRITE;
+ pmap_prot_conv_table[VM_PROT_ALL] = PMAP_PTE_WRITE;
+
+ /*
+ * Next, take care of the kernel pmap.
+ */
+ kernel_pmap = &kernel_pmap_store;
+ kernel_pmap->pdir = pmap_kpdir;
+ kernel_pmap->pdir_pa = BOOT_ADDR_VTOP(pmap_kpdir);
+
+#ifdef PAE
+ kernel_pmap->pdpt = (pmap_pte_t *)BOOT_ADDR_VTOP(pmap_kpdpt);
+#endif /* PAE */
+
+ /*
+ * Establish the linear mapping of PTEs.
+ */
+ for (i = 0; i < PMAP_NR_PDT; i++)
+ kernel_pmap->pdir[PMAP_PDE_PTE + i] =
+ ((pmap_pte_t)kernel_pmap->pdir_pa + (i << PMAP_PTE_SHIFT))
+ | PMAP_PTE_WRITE | PMAP_PTE_PRESENT;
+
+ cpu_tlb_flush();
+
+ /*
+ * Tune section permissions.
+ */
+ pmap_kprotect((unsigned long)&_text, (unsigned long)&_rodata,
+ VM_PROT_READ | VM_PROT_EXECUTE);
+ pmap_kprotect((unsigned long)&_rodata, (unsigned long)&_data, VM_PROT_READ);
+ cpu_tlb_flush();
+
+ if (cpu_has_global_pages())
+ pmap_setup_global_pages();
+
+ pmap_boot_heap = (unsigned long)&_end;
+ pmap_avail_start = pmap_boot_heap + (PMAP_RESERVED_PAGES * PAGE_SIZE);
+ pmap_klimit = P2ROUND(pmap_avail_start, PMAP_PDE_MAPSIZE);
+ pmap_zero_va = pmap_bootalloc(1);
+}
+
+unsigned long __init
+pmap_bootalloc(unsigned int nr_pages)
+{
+ unsigned long page;
+ size_t size;
+
+ assert(nr_pages > 0);
+
+ size = nr_pages * PAGE_SIZE;
+
+ assert((pmap_boot_heap + size) > pmap_boot_heap);
+ assert((pmap_boot_heap + size) <= pmap_avail_start);
+
+ page = pmap_boot_heap;
+ pmap_boot_heap += size;
+ return page;
+}
+
+static inline pmap_pte_t *
+pmap_pde(pmap_pte_t *pdir, unsigned long va)
+{
+ return &pdir[va >> PMAP_PDE_SHIFT];
+}
+
+void __init
+pmap_virtual_space(unsigned long *virt_start, unsigned long *virt_end)
+{
+ *virt_start = pmap_avail_start;
+ *virt_end = VM_MAX_KERNEL_ADDRESS;
+}
+
+void
+pmap_growkernel(unsigned long va)
+{
+ struct vm_page *page;
+ pmap_pte_t *pde;
+ vm_phys_t pa;
+
+ while (va > pmap_klimit) {
+ pde = pmap_pde(pmap_kpdir, pmap_klimit);
+ assert(*pde == 0);
+
+ if (!vm_phys_ready)
+ pa = vm_phys_bootalloc();
+ else {
+ page = vm_phys_alloc(0);
+
+ if (page == NULL)
+ panic("pmap: no page available to grow kernel space");
+
+ pa = vm_page_to_pa(page);
+ }
+
+ pmap_zero_page(pa);
+ *pde = pa | pmap_pte_global | PMAP_PTE_WRITE | PMAP_PTE_PRESENT;
+ pmap_klimit = P2ROUND(pmap_klimit + PMAP_PDE_MAPSIZE, PMAP_PDE_MAPSIZE);
+ }
+}
+
+void
+pmap_kenter(unsigned long va, vm_phys_t pa)
+{
+ PMAP_PTE_BASE[vm_page_atop(va)] = (pa & PMAP_PTE_PMASK) | pmap_pte_global
+ | PMAP_PTE_WRITE | PMAP_PTE_PRESENT;
+ cpu_tlb_flush_va(va);
+}
+
+void
+pmap_kremove(unsigned long start, unsigned long end)
+{
+ while (start < end) {
+ PMAP_PTE_BASE[vm_page_atop(start)] = 0;
+ cpu_tlb_flush_va(start);
+ start += PAGE_SIZE;
+ }
+}
+
+void
+pmap_kprotect(unsigned long start, unsigned long end, int prot)
+{
+ pmap_pte_t *pte, flags;
+
+ flags = pmap_prot_conv_table[prot & VM_PROT_ALL];
+
+ while (start < end) {
+ pte = PMAP_PTE_BASE + vm_page_atop(start);
+ *pte = (*pte & ~PMAP_PTE_PROT_MASK) | flags;
+ cpu_tlb_flush_va(start);
+ start += PAGE_SIZE;
+ }
+}
+
+vm_phys_t
+pmap_kextract(unsigned long va)
+{
+ pmap_pte_t *pde;
+
+ pde = pmap_pde(pmap_kpdir, va);
+
+ if (*pde == 0)
+ return 0;
+
+ return PMAP_PTE_BASE[vm_page_atop(va)] & PMAP_PTE_PMASK;
+}
+
+void
+pmap_zero_page(vm_phys_t pa)
+{
+ pmap_kenter(pmap_zero_va, pa);
+ memset((void *)pmap_zero_va, 0, PAGE_SIZE);
+ pmap_kremove(pmap_zero_va, pmap_zero_va + PAGE_SIZE);
+}
diff --git a/arch/i386/machine/pmap.h b/arch/i386/machine/pmap.h
new file mode 100644
index 00000000..7053ae7d
--- /dev/null
+++ b/arch/i386/machine/pmap.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_PMAP_H
+#define _I386_PMAP_H
+
+#include <kern/param.h>
+#include <kern/types.h>
+#include <lib/stdint.h>
+
+/*
+ * Page directory/table properties.
+ */
+#ifdef PAE
+#define PMAP_PDE_SHIFT 21
+#define PMAP_PT_SHIFT 9
+#define PMAP_NR_PDT 4
+#define PMAP_PTE_PMASK 0x0000000ffffff000ULL
+#else /* PAE */
+#define PMAP_PDE_SHIFT 22
+#define PMAP_PT_SHIFT 10
+#define PMAP_NR_PDT 1
+#define PMAP_PTE_PMASK (~PAGE_MASK)
+#endif /* PAE */
+
+#define PMAP_PTE_SHIFT 12
+
+/*
+ * The amount of virtual memory described by a page directory/table entry.
+ */
+#define PMAP_PDE_MAPSIZE (1 << PMAP_PDE_SHIFT)
+#define PMAP_PTE_MAPSIZE (1 << PMAP_PTE_SHIFT)
+
+/*
+ * Number of entries in a page directory/table.
+ */
+#define PMAP_PTE_PER_PT (1 << PMAP_PT_SHIFT)
+
+/*
+ * PDE/PTE flags.
+ */
+#define PMAP_PTE_PRESENT 0x001
+#define PMAP_PTE_WRITE 0x002
+#define PMAP_PTE_USER 0x004
+#define PMAP_PTE_WRITE_THROUGH 0x008
+#define PMAP_PTE_CACHE_DISABLE 0x010
+#define PMAP_PTE_ACCESSED 0x020
+#define PMAP_PTE_DIRTY 0x040
+#define PMAP_PTE_GLOBAL 0x100
+#define PMAP_PTE_AVAIL1 0x200
+#define PMAP_PTE_AVAIL2 0x400
+#define PMAP_PTE_AVAIL3 0x800
+
+/*
+ * Flags related to page protection.
+ */
+#define PMAP_PTE_PROT_MASK PMAP_PTE_WRITE
+
+/*
+ * Index of the first PDE for the kernel address space.
+ */
+#define PMAP_PDE_KERN (VM_MIN_KERNEL_ADDRESS >> PMAP_PDE_SHIFT)
+
+/*
+ * PDE index for the page directory.
+ */
+#define PMAP_PDE_PTE (VM_MAX_KERNEL_ADDRESS >> PMAP_PDE_SHIFT)
+
+/*
+ * Page table entry (also usable as a page directory entry or page directory
+ * pointer table entry).
+ */
+#ifdef PAE
+typedef uint64_t pmap_pte_t;
+#else /* PAE */
+typedef uint32_t pmap_pte_t;
+#endif /* PAE */
+
+/*
+ * Base virtual address of the linear mapping of PTEs.
+ */
+#define PMAP_PTE_BASE ((pmap_pte_t *)(PMAP_PDE_PTE << PMAP_PDE_SHIFT))
+
+/*
+ * Base virtual address of the page directory, in the linear mapping of PTEs.
+ */
+#define PMAP_PDP_BASE (PMAP_PTE_BASE + (PMAP_PDE_PTE * PMAP_PTE_PER_PT))
+
+/*
+ * Virtual address of the PDE that points to the PDP.
+ */
+#define PMAP_PDP_PDE (PMAP_PDP_BASE + PMAP_PDE_PTE)
+
+/*
+ * Number of pages to reserve for the pmap module after the kernel.
+ *
+ * This pool of pure virtual memory can be used to reserve virtual addresses
+ * before the VM system is initialized.
+ */
+#define PMAP_RESERVED_PAGES 2
+
+/*
+ * Physical address map.
+ */
+struct pmap {
+ pmap_pte_t *pdir; /* Page directory virtual address */
+ vm_phys_t pdir_pa; /* Page directory physical address */
+#ifdef PAE
+ pmap_pte_t *pdpt; /* Page directory pointer table physical address */
+#endif /* PAE */
+};
+
+/*
+ * The kernel pmap.
+ */
+extern struct pmap *kernel_pmap;
+
+/*
+ * Kernel page directory.
+ */
+extern pmap_pte_t pmap_kpdir[PMAP_NR_PDT * PMAP_PTE_PER_PT];
+
+#ifdef PAE
+/*
+ * Kernel page directory pointer table.
+ */
+extern pmap_pte_t pmap_kpdpt[PMAP_NR_PDT];
+#endif /* PAE */
+
+/*
+ * Address below which using the low level kernel pmap functions is safe.
+ * Its value is adjusted by calling pmap_growkernel().
+ */
+extern unsigned long pmap_klimit;
+
+/*
+ * Early initialization of the pmap module.
+ *
+ * When this function is called, basic paging is enabled and the kernel
+ * already runs at virtual addresses.
+ */
+void pmap_bootstrap(void);
+
+/*
+ * Allocate pure virtual memory.
+ *
+ * This memory is obtained from a very small pool of reserved pages located
+ * immediately after the kernel. Its purpose is to allow early mappings to
+ * be created before the VM system is available.
+ */
+unsigned long pmap_bootalloc(unsigned int nr_pages);
+
+/*
+ * Return the available kernel virtual space in virt_start and virt_end.
+ *
+ * This function is called early, during initialization of the VM system, and
+ * can't be used after since the VM has taken control of the kernel address
+ * space.
+ */
+void pmap_virtual_space(unsigned long *virt_start, unsigned long *virt_end);
+
+/*
+ * Preallocate resources so that addresses up to va can be mapped safely in
+ * the kernel pmap.
+ */
+void pmap_growkernel(unsigned long va);
+
+/*
+ * Kernel specific mapping functions.
+ *
+ * Resources for the new mappings must be preallocated.
+ */
+void pmap_kenter(unsigned long va, vm_phys_t pa);
+void pmap_kremove(unsigned long start, unsigned long end);
+void pmap_kprotect(unsigned long start, unsigned long end, int prot);
+vm_phys_t pmap_kextract(unsigned long va);
+
+/*
+ * Zero a page at the given physical address.
+ */
+void pmap_zero_page(vm_phys_t pa);
+
+#endif /* _I386_PMAP_H */
diff --git a/arch/i386/machine/trap.h b/arch/i386/machine/trap.h
new file mode 100644
index 00000000..8c65b718
--- /dev/null
+++ b/arch/i386/machine/trap.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_TRAP_H
+#define _I386_TRAP_H
+
+/*
+ * Trap vectors.
+ */
+#define T_DIVIDE_ERROR 0
+#define T_DEBUG 1
+#define T_NMI 2
+#define T_INT3 3
+#define T_OVERFLOW 4
+#define T_OUT_OF_BOUNDS 5
+#define T_INVALID_OPCODE 6
+#define T_NO_FPU 7
+#define T_DOUBLE_FAULT 8
+#define T_FPU_FAULT 9
+#define T_INVALID_TSS 10
+#define T_SEGMENT_NOT_PRESENT 11
+#define T_STACK_FAULT 12
+#define T_GENERAL_PROTECTION 13
+#define T_PAGE_FAULT 14
+#define T_FLOATING_POINT_ERROR 16
+#define T_WATCHPOINT 17
+#define T_MACHINE_CHECK 18
+#define T_SSE_FAULT 19
+#define T_APIC_TIMER_INTR 253
+#define T_APIC_ERROR_INTR 254
+#define T_APIC_SPURIOUS_INTR 255
+
+#endif /* _I386_TRAP_H */
diff --git a/arch/i386/machine/types.h b/arch/i386/machine/types.h
new file mode 100644
index 00000000..10825624
--- /dev/null
+++ b/arch/i386/machine/types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_TYPES_H
+#define _I386_TYPES_H
+
+#ifdef PAE
+typedef unsigned long long vm_phys_t;
+#else /* PAE */
+typedef unsigned long vm_phys_t;
+#endif /* PAE */
+
+#endif /* _I386_TYPES_H */
diff --git a/arch/i386/machine/vga.c b/arch/i386/machine/vga.c
new file mode 100644
index 00000000..60911943
--- /dev/null
+++ b/arch/i386/machine/vga.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <lib/macros.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/io.h>
+#include <machine/pmap.h>
+#include <machine/vga.h>
+
+/*
+ * Screen dimensions.
+ */
+#define VGA_COLUMNS 80
+#define VGA_LINES 25
+
+/*
+ * Text mode mapped memory and size.
+ */
+#define VGA_MEMORY 0xb8000
+#define VGA_MEMORY_SIZE (VGA_COLUMNS * VGA_LINES * 2)
+
+/*
+ * I/O ports.
+ */
+#define VGA_MISC_OUTPUT_REGISTER_READ 0x3cc
+#define VGA_MISC_OUTPUT_REGISTER_WRITE 0x3c2
+#define VGA_CRTC_ADDRESS_REGISTER 0x3d4
+#define VGA_CRTC_DATA_REGISTER 0x3d5
+
+/*
+ * CRTC registers.
+ */
+#define VGA_CRTC_CURSOR_LOCATION_HIGH_REGISTER 0xe
+#define VGA_CRTC_CURSOR_LOCATION_LOW_REGISTER 0xf
+
+/*
+ * Foreground screen color.
+ */
+#define VGA_FOREGROUND_COLOR 0x7
+
+/*
+ * Blank space 16 bits word.
+ */
+#define VGA_BLANK ((VGA_FOREGROUND_COLOR << 8) | ' ')
+
+/*
+ * Number of spaces to display for a tabulation.
+ */
+#define VGA_TABULATION_SPACES 8
+
+static uint8_t *vga_memory;
+static uint16_t vga_cursor;
+
+static uint16_t
+vga_get_cursor_position(void)
+{
+ uint16_t tmp;
+
+ io_write_byte(VGA_CRTC_ADDRESS_REGISTER,
+ VGA_CRTC_CURSOR_LOCATION_HIGH_REGISTER);
+ tmp = io_read_byte(VGA_CRTC_DATA_REGISTER) << 8;
+ io_write_byte(VGA_CRTC_ADDRESS_REGISTER,
+ VGA_CRTC_CURSOR_LOCATION_LOW_REGISTER);
+ tmp |= io_read_byte(VGA_CRTC_DATA_REGISTER);
+
+ return tmp;
+}
+
+static void
+vga_set_cursor_position(uint16_t position)
+{
+ io_write_byte(VGA_CRTC_ADDRESS_REGISTER,
+ VGA_CRTC_CURSOR_LOCATION_HIGH_REGISTER);
+ io_write_byte(VGA_CRTC_DATA_REGISTER, position >> 8);
+ io_write_byte(VGA_CRTC_ADDRESS_REGISTER,
+ VGA_CRTC_CURSOR_LOCATION_LOW_REGISTER);
+ io_write_byte(VGA_CRTC_DATA_REGISTER, position & 0xff);
+}
+
+static uint8_t
+vga_get_cursor_column(void)
+{
+ return vga_cursor % VGA_COLUMNS;
+}
+
+void __init
+vga_setup(void)
+{
+ uint8_t misc_output_register;
+ unsigned long va;
+
+ va = pmap_bootalloc(1);
+ pmap_kenter(va, VGA_MEMORY);
+ vga_memory = (uint8_t *)va;
+
+ /*
+ * Check if the Input/Output Address Select bit is set.
+ */
+ misc_output_register = io_read_byte(VGA_MISC_OUTPUT_REGISTER_READ);
+
+ if (!(misc_output_register & 0x1)) {
+ /*
+ * Set the I/O AS bit.
+ */
+ misc_output_register |= 0x1;
+
+ /*
+ * Update the misc output register.
+ */
+ io_write_byte(VGA_MISC_OUTPUT_REGISTER_WRITE, misc_output_register);
+ }
+
+ vga_cursor = vga_get_cursor_position();
+}
+
+static void
+vga_scroll_lines(void)
+{
+ uint16_t *last_line;
+ int i;
+
+ memmove(vga_memory, (uint16_t *)vga_memory + VGA_COLUMNS,
+ VGA_MEMORY_SIZE - (VGA_COLUMNS * 2));
+ last_line = (uint16_t *)vga_memory + (VGA_COLUMNS * (VGA_LINES - 1));
+
+ for(i = 0; i < VGA_COLUMNS; i++)
+ last_line[i] = VGA_BLANK;
+}
+
+void
+vga_write_byte(uint8_t byte)
+{
+ if (byte == '\r')
+ return;
+ else if (byte == '\n') {
+ vga_cursor += VGA_COLUMNS - vga_get_cursor_column();
+
+ if (vga_cursor >= (VGA_LINES * VGA_COLUMNS)) {
+ vga_scroll_lines();
+ vga_cursor -= VGA_COLUMNS;
+ }
+
+ vga_set_cursor_position(vga_cursor);
+ } else if (byte == '\b') {
+ if (vga_cursor > 0) {
+ vga_cursor--;
+ ((uint16_t *)vga_memory)[vga_cursor] = VGA_BLANK;
+ vga_set_cursor_position(vga_cursor);
+ }
+ } else if (byte == '\t') {
+ int i;
+
+ for(i = 0; i < VGA_TABULATION_SPACES; i++)
+ vga_write_byte(' ');
+ } else {
+ if ((vga_cursor + 1) >= VGA_COLUMNS * VGA_LINES) {
+ vga_scroll_lines();
+ vga_cursor -= VGA_COLUMNS;
+ }
+
+ ((uint16_t *)vga_memory)[vga_cursor] = ((VGA_FOREGROUND_COLOR << 8)
+ | byte);
+ vga_cursor++;
+ vga_set_cursor_position(vga_cursor);
+ }
+}
+
+void console_write_byte(char c) __alias("vga_write_byte");
diff --git a/arch/i386/machine/vga.h b/arch/i386/machine/vga.h
new file mode 100644
index 00000000..ff8646c4
--- /dev/null
+++ b/arch/i386/machine/vga.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _I386_VGA_H
+#define _I386_VGA_H
+
+#include <lib/stdint.h>
+
+/*
+ * Initialize the vga module.
+ */
+void vga_setup(void);
+
+/*
+ * Write a byte on the screen at current cursor position.
+ */
+void vga_write_byte(uint8_t byte);
+
+#endif /* _I386_VGA_H */
diff --git a/arch/i386/x15.lds.S b/arch/i386/x15.lds.S
new file mode 100644
index 00000000..363bc919
--- /dev/null
+++ b/arch/i386/x15.lds.S
@@ -0,0 +1,88 @@
+/*
+ * Don't replace i386.
+ */
+#undef i386
+
+OUTPUT_FORMAT("elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+#define __ASSEMBLY__
+
+#include <kern/param.h>
+#include <machine/boot.h>
+
+PHDRS
+{
+ boot PT_LOAD FLAGS(7);
+ init PT_LOAD FLAGS(7);
+ text PT_LOAD FLAGS(5);
+ rodata PT_LOAD FLAGS(4);
+ data PT_LOAD FLAGS(6);
+}
+
+SECTIONS
+{
+ . = BOOT_OFFSET;
+
+ .boot ALIGN(PAGE_SIZE): {
+ _boot = .;
+ *(.boothdr)
+ *(.boot)
+ _eboot = .;
+ } : boot
+
+ . += KERNEL_OFFSET;
+
+ .init ALIGN(PAGE_SIZE): AT(ADDR(.init) - KERNEL_OFFSET) {
+ _init = .;
+ *(.init)
+ _einit = .;
+ } : init
+
+ .initrodata ALIGN(8): AT(ADDR(.initrodata) - KERNEL_OFFSET) {
+ _initrodata = .;
+ *(.initrodata)
+ _einitrodata = .;
+ } : init
+
+ .initdata ALIGN(8): AT(ADDR(.initdata) - KERNEL_OFFSET) {
+ _initdata = .;
+ *(.initdata)
+ _einitdata = .;
+ } : init
+
+ .text ALIGN(PAGE_SIZE): AT(ADDR(.text) - KERNEL_OFFSET) {
+ _text = .;
+ *(.text)
+ _etext = .;
+ } : text
+
+ .rodata ALIGN(PAGE_SIZE): AT(ADDR(.rodata) - KERNEL_OFFSET) {
+ _rodata = .;
+ *(.rodata)
+ _erodata = .;
+ } : rodata
+
+ .notes ALIGN(8): AT(ADDR(.notes) - KERNEL_OFFSET) {
+ _notes = .;
+ *(.note.*)
+ _enotes = .;
+ } : rodata
+
+ .data ALIGN(PAGE_SIZE): AT(ADDR(.data) - KERNEL_OFFSET) {
+ _data = .;
+ *(.data)
+ _edata = .;
+ } : data
+
+ .bss ALIGN(8): AT(ADDR(.bss) - KERNEL_OFFSET) {
+ _bss = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(PAGE_SIZE);
+ _ebss = .;
+ } : data
+
+ _end = .;
+}
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 00000000..f4af31ae
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,70 @@
+AC_INIT([X15], [0.1], [rbraun@sceen.net], [x15])
+
+AC_CONFIG_SRCDIR([kern/printk.c])
+AC_CONFIG_AUX_DIR([build-aux])
+
+AM_INIT_AUTOMAKE([gnu check-news subdir-objects 1.10])
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])],
+ [AC_SUBST([AM_DEFAULT_VERBOSITY], [1])])
+
+AC_PREFIX_DEFAULT([])
+
+
+#
+# Deduce the output variable `systype' from the configuration parameters.
+#
+AC_CANONICAL_HOST
+
+AS_CASE(["$host_cpu"],
+ [i?86|x86_64], [m4_include([arch/i386/configfrag.ac])],
+ [AC_MSG_ERROR([unsuported CPU type])])
+
+AC_SUBST([systype])
+
+
+#
+# Arguments to configure
+#
+AC_ARG_ENABLE([debug],
+ [AS_HELP_STRING([--disable-debug],
+ [disable the debugging facilities])])
+
+AC_ARG_WITH([max-cpus],
+ [AS_HELP_STRING([--with-max-cpus=MAX_CPUS],
+ [set the maximum number of supported processors])],
+ [opt_max_cpus=$withval],
+ [opt_max_cpus=32])
+
+
+#
+# Options
+#
+AS_IF([test x"$enable_debug" = xno],
+ [AC_DEFINE([NDEBUG], [1], [general debugging])])
+
+AC_DEFINE([__KERNEL__], [1], [kernel code])
+
+AC_DEFINE_UNQUOTED([ARCH], [$systype], [arch])
+AM_CONDITIONAL([I386], [test "$systype" = i386])
+
+AC_DEFINE_UNQUOTED([MAX_CPUS], [$opt_max_cpus], [maximum number of supported processors])
+
+
+#
+# Programs.
+#
+AM_PROG_AS
+AC_PROG_CC([gcc])
+AC_PROG_CPP
+AC_PROG_RANLIB
+
+AM_PROG_CC_C_O
+
+#
+# Output
+#
+AC_CONFIG_HEADER([config.h])
+AC_CONFIG_FILES([Makefile])
+
+AC_OUTPUT
diff --git a/kern/error.h b/kern/error.h
new file mode 100644
index 00000000..bb1514a5
--- /dev/null
+++ b/kern/error.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_ERROR_H
+#define _KERN_ERROR_H
+
+#define ERROR_NOMEM 1
+
+#endif /* _KERN_ERROR_H */
diff --git a/kern/init.h b/kern/init.h
new file mode 100644
index 00000000..a3128d75
--- /dev/null
+++ b/kern/init.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_INIT_H
+#define _KERN_INIT_H
+
+#include <lib/macros.h>
+
+/*
+ * These sections should contain code and data which can be discarded once
+ * kernel initialization is done.
+ */
+#define __init __section(".init")
+#define __initrodata __section(".initrodata")
+#define __initdata __section(".initdata")
+
+#endif /* _KERN_INIT_H */
diff --git a/kern/kernel.c b/kern/kernel.c
new file mode 100644
index 00000000..52217876
--- /dev/null
+++ b/kern/kernel.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/kernel.h>
+#include <machine/cpu.h>
+
+void __init
+kernel_main(void)
+{
+ cpu_intr_enable();
+
+ for (;;)
+ cpu_idle();
+
+ /* Never reached */
+}
diff --git a/kern/kernel.h b/kern/kernel.h
new file mode 100644
index 00000000..f4f99d30
--- /dev/null
+++ b/kern/kernel.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_KERNEL_H
+#define _KERN_KERNEL_H
+
+#include <kern/printk.h>
+
+/*
+ * Kernel properties.
+ */
+#define KERNEL_NAME PACKAGE_NAME
+#define KERNEL_VERSION PACKAGE_VERSION
+
+static inline void
+kernel_show_banner(void)
+{
+ printk(KERNEL_NAME " " KERNEL_VERSION "\n");
+}
+
+void kernel_main(void);
+
+#endif /* _KERN_KERNEL_H */
diff --git a/kern/kmem.c b/kern/kmem.c
new file mode 100644
index 00000000..663cafe2
--- /dev/null
+++ b/kern/kmem.c
@@ -0,0 +1,1307 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This allocator is based on the "The Slab Allocator: An Object-Caching
+ * Kernel Memory Allocator" by Jeff Bonwick.
+ *
+ * It allows the allocation of objects (i.e. fixed-size typed buffers) from
+ * caches and is efficient in both space and time. This implementation follows
+ * many of the indications from the paper mentioned. The most notable
+ * differences are outlined below.
+ *
+ * The per-cache self-scaling hash table for buffer-to-bufctl conversion,
+ * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by
+ * a red-black tree storing slabs, sorted by address. The use of a
+ * self-balancing tree for buffer-to-slab conversions provides a few advantages
+ * over a hash table. Unlike a hash table, a BST provides a "lookup nearest"
+ * operation, so obtaining the slab data (whether it is embedded in the slab or
+ * off slab) from a buffer address simply consists of a "lookup nearest towards
+ * 0" tree search. Storing slabs instead of buffers also considerably reduces
+ * the number of elements to retain. Finally, a self-balancing tree is a true
+ * self-scaling data structure, whereas a hash table requires periodic
+ * maintenance and complete resizing, which is expensive. The only drawback is
+ * that releasing a buffer to the slab layer takes logarithmic time instead of
+ * constant time. But as the data set size is kept reasonable (because slabs
+ * are stored instead of buffers) and because the CPU pool layer services most
+ * requests, avoiding many accesses to the slab layer, it is considered an
+ * acceptable tradeoff.
+ *
+ * This implementation uses per-cpu pools of objects, which service most
+ * allocation requests. These pools act as caches (but are named differently
+ * to avoid confusion with CPU caches) that reduce contention on multiprocessor
+ * systems. When a pool is empty and cannot provide an object, it is filled by
+ * transferring multiple objects from the slab layer. The symmetric case is
+ * handled likewise.
+ */
+
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <lib/assert.h>
+#include <lib/limits.h>
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <lib/rbtree.h>
+#include <lib/sprintf.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <lib/string.h>
+#include <machine/cpu.h>
+#include <vm/vm_kmem.h>
+
+/*
+ * Minimum required alignment.
+ */
+#define KMEM_ALIGN_MIN 8
+
+/*
+ * Minimum number of buffers per slab.
+ *
+ * This value is ignored when the slab size exceeds a threshold.
+ */
+#define KMEM_MIN_BUFS_PER_SLAB 8
+
+/*
+ * Special slab size beyond which the minimum number of buffers per slab is
+ * ignored when computing the slab size of a cache.
+ */
+#define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE)
+
+/*
+ * Special buffer size under which slab data is unconditionnally allocated
+ * from its associated slab.
+ */
+#define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8)
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define KMEM_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Shift for the first general cache size.
+ */
+#define KMEM_CACHES_FIRST_SHIFT 5
+
+/*
+ * Number of caches backing general purpose allocations.
+ */
+#define KMEM_NR_MEM_CACHES 13
+
+/*
+ * Options for kmem_cache_alloc_verify().
+ */
+#define KMEM_AV_NOCONSTRUCT 0
+#define KMEM_AV_CONSTRUCT 1
+
+/*
+ * Error codes for kmem_cache_error().
+ */
+#define KMEM_ERR_INVALID 0 /* Invalid address being freed */
+#define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */
+#define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */
+#define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */
+#define KMEM_ERR_REDZONE 4 /* Redzone violation */
+
+/*
+ * Available CPU pool types.
+ *
+ * For each entry, the CPU pool size applies from the entry buf_size
+ * (excluded) up to (and including) the buf_size of the preceding entry.
+ *
+ * See struct kmem_cpu_pool_type for a description of the values.
+ */
+static struct kmem_cpu_pool_type kmem_cpu_pool_types[] = {
+ { 32768, 1, 0, NULL },
+ { 4096, 8, CPU_L1_SIZE, NULL },
+ { 256, 64, CPU_L1_SIZE, NULL },
+ { 0, 128, CPU_L1_SIZE, NULL }
+};
+
+/*
+ * Caches where CPU pool arrays are allocated from.
+ */
+static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)];
+
+/*
+ * Cache for off slab data.
+ */
+static struct kmem_cache kmem_slab_cache;
+
+/*
+ * General caches array.
+ */
+static struct kmem_cache kmem_caches[KMEM_NR_MEM_CACHES];
+
+/*
+ * List of all caches managed by the allocator.
+ */
+static struct list kmem_cache_list;
+/* static struct mutex kmem_cache_list_mutex; */
+
+static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error,
+ void *arg);
+static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache);
+static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf);
+
+static void *
+kmem_buf_verify_bytes(void *buf, void *pattern, size_t size)
+{
+ char *ptr, *pattern_ptr, *end;
+
+ end = buf + size;
+
+ for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++)
+ if (*ptr != *pattern_ptr)
+ return ptr;
+
+ return NULL;
+}
+
+static void
+kmem_buf_fill(void *buf, uint64_t pattern, size_t size)
+{
+ uint64_t *ptr, *end;
+
+ assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ assert(P2ALIGNED(size, sizeof(uint64_t)));
+
+ end = buf + size;
+
+ for (ptr = buf; ptr < end; ptr++)
+ *ptr = pattern;
+}
+
+static void *
+kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new, size_t size)
+{
+ uint64_t *ptr, *end;
+
+ assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ assert(P2ALIGNED(size, sizeof(uint64_t)));
+
+ end = buf + size;
+
+ for (ptr = buf; ptr < end; ptr++) {
+ if (*ptr != old)
+ return kmem_buf_verify_bytes(ptr, &old, sizeof(old));
+
+ *ptr = new;
+ }
+
+ return NULL;
+}
+
+static inline union kmem_bufctl *
+kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache)
+{
+ return (union kmem_bufctl *)(buf + cache->bufctl_dist);
+}
+
+static inline struct kmem_buftag *
+kmem_buf_to_buftag(void *buf, struct kmem_cache *cache)
+{
+ return (struct kmem_buftag *)(buf + cache->buftag_dist);
+}
+
+static inline void *
+kmem_bufctl_to_buf(union kmem_bufctl *bufctl, struct kmem_cache *cache)
+{
+ return (void *)bufctl - cache->bufctl_dist;
+}
+
+static void
+kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache)
+{
+ struct kmem_buftag *buftag;
+ size_t buf_size;
+ unsigned long buffers;
+ void *buf;
+
+ buf_size = cache->buf_size;
+ buf = slab->addr;
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
+ kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
+ buftag->state = KMEM_BUFTAG_FREE;
+ buf += buf_size;
+ buftag = kmem_buf_to_buftag(buf, cache);
+ }
+}
+
+/*
+ * Create an empty slab for a cache.
+ *
+ * The caller must drop all locks before calling this function.
+ */
+static struct kmem_slab *
+kmem_slab_create(struct kmem_cache *cache, size_t color)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+ size_t buf_size;
+ unsigned long buffers;
+ void *slab_buf;
+
+ if (cache->slab_alloc_fn == NULL)
+ slab_buf = (void *)vm_kmem_alloc(cache->slab_size);
+ else
+ slab_buf = (void *)cache->slab_alloc_fn(cache->slab_size);
+
+ if (slab_buf == NULL)
+ return NULL;
+
+ if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
+ assert(!(cache->flags & KMEM_CF_NO_RECLAIM));
+ slab = kmem_cache_alloc(&kmem_slab_cache);
+
+ if (slab == NULL) {
+ if (cache->slab_free_fn == NULL)
+ vm_kmem_free((unsigned long)slab_buf, cache->slab_size);
+ else
+ cache->slab_free_fn((unsigned long)slab_buf, cache->slab_size);
+
+ return NULL;
+ }
+ } else {
+ slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1;
+ }
+
+ list_node_init(&slab->list_node);
+ rbtree_node_init(&slab->tree_node);
+ slab->nr_refs = 0;
+ slab->first_free = NULL;
+ slab->addr = slab_buf + color;
+
+ buf_size = cache->buf_size;
+ bufctl = kmem_buf_to_bufctl(slab->addr, cache);
+
+ for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
+ bufctl->next = slab->first_free;
+ slab->first_free = bufctl;
+ bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size);
+ }
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_slab_create_verify(slab, cache);
+
+ return slab;
+}
+
+static inline int
+kmem_slab_use_tree(int flags)
+{
+ return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY);
+}
+
+static inline int
+kmem_slab_cmp_lookup(const void *addr, const struct rbtree_node *node)
+{
+ struct kmem_slab *slab;
+
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+
+ if (addr == slab->addr)
+ return 0;
+ else if (addr < slab->addr)
+ return -1;
+ else
+ return 1;
+}
+
+static inline int
+kmem_slab_cmp_insert(const struct rbtree_node *a, const struct rbtree_node *b)
+{
+ struct kmem_slab *slab;
+
+ slab = rbtree_entry(a, struct kmem_slab, tree_node);
+ return kmem_slab_cmp_lookup(slab->addr, b);
+}
+
+static void
+kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
+{
+ /* mutex_init(&cpu_pool->mutex); */
+ cpu_pool->flags = cache->flags;
+ cpu_pool->size = 0;
+ cpu_pool->transfer_size = 0;
+ cpu_pool->nr_objs = 0;
+ cpu_pool->array = NULL;
+}
+
+static inline struct kmem_cpu_pool *
+kmem_cpu_pool_get(struct kmem_cache *cache)
+{
+ return &cache->cpu_pools[cpu_id()];
+}
+
+static inline void
+kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache,
+ void **array)
+{
+ cpu_pool->size = cache->cpu_pool_type->array_size;
+ cpu_pool->transfer_size = (cpu_pool->size
+ + KMEM_CPU_POOL_TRANSFER_RATIO - 1)
+ / KMEM_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->array = array;
+}
+
+static inline void *
+kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool)
+{
+ cpu_pool->nr_objs--;
+ return cpu_pool->array[cpu_pool->nr_objs];
+}
+
+static inline void
+kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj)
+{
+ cpu_pool->array[cpu_pool->nr_objs] = obj;
+ cpu_pool->nr_objs++;
+}
+
+static int
+kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
+{
+ void *obj;
+ int i;
+
+ /* mutex_lock(&cache->mutex); */
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ obj = kmem_cache_alloc_from_slab(cache);
+
+ if (obj == NULL)
+ break;
+
+ kmem_cpu_pool_push(cpu_pool, obj);
+ }
+
+ /* mutex_unlock(&cache->mutex); */
+
+ return i;
+}
+
+static void
+kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
+{
+ void *obj;
+ int i;
+
+ /* mutex_lock(&cache->mutex); */
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ obj = kmem_cpu_pool_pop(cpu_pool);
+ kmem_cache_free_to_slab(cache, obj);
+ }
+
+ /* mutex_unlock(&cache->mutex); */
+}
+
+static void
+kmem_cache_error(struct kmem_cache *cache, void *buf, int error, void *arg)
+{
+ struct kmem_buftag *buftag;
+
+ printk("kmem: error: cache: %s, buffer: %p\n", cache->name, buf);
+
+ switch(error) {
+ case KMEM_ERR_INVALID:
+ panic("kmem: freeing invalid address");
+ break;
+ case KMEM_ERR_DOUBLEFREE:
+ panic("kmem: attempting to free the same address twice");
+ break;
+ case KMEM_ERR_BUFTAG:
+ buftag = arg;
+ panic("kmem: invalid buftag content, buftag state: %p",
+ (void *)buftag->state);
+ break;
+ case KMEM_ERR_MODIFIED:
+ panic("kmem: free buffer modified, fault address: %p, "
+ "offset in buffer: %td", arg, arg - buf);
+ break;
+ case KMEM_ERR_REDZONE:
+ panic("kmem: write beyond end of buffer, fault address: %p, "
+ "offset in buffer: %td", arg, arg - buf);
+ break;
+ default:
+ panic("kmem: unknown error");
+ }
+
+ /*
+ * Never reached.
+ */
+}
+
+/*
+ * Compute an appropriate slab size for the given cache.
+ *
+ * Once the slab size is known, this function sets the related properties
+ * (buffers per slab and maximum color). It can also set the KMEM_CF_DIRECT
+ * and/or KMEM_CF_SLAB_EXTERNAL flags depending on the resulting layout.
+ */
+static void
+kmem_cache_compute_sizes(struct kmem_cache *cache, int flags)
+{
+ size_t i, buffers, buf_size, slab_size, free_slab_size, optimal_size;
+ size_t waste, waste_min;
+ int embed, optimal_embed = optimal_embed;
+
+ buf_size = cache->buf_size;
+
+ if (buf_size < KMEM_BUF_SIZE_THRESHOLD)
+ flags |= KMEM_CACHE_NOOFFSLAB;
+
+ i = 0;
+ waste_min = (size_t)-1;
+
+ do {
+ i++;
+ slab_size = P2ROUND(i * buf_size, PAGE_SIZE);
+ free_slab_size = slab_size;
+
+ if (flags & KMEM_CACHE_NOOFFSLAB)
+ free_slab_size -= sizeof(struct kmem_slab);
+
+ buffers = free_slab_size / buf_size;
+ waste = free_slab_size % buf_size;
+
+ if (buffers > i)
+ i = buffers;
+
+ if (flags & KMEM_CACHE_NOOFFSLAB)
+ embed = 1;
+ else if (sizeof(struct kmem_slab) <= waste) {
+ embed = 1;
+ waste -= sizeof(struct kmem_slab);
+ } else {
+ embed = 0;
+ }
+
+ if (waste <= waste_min) {
+ waste_min = waste;
+ optimal_size = slab_size;
+ optimal_embed = embed;
+ }
+ } while ((buffers < KMEM_MIN_BUFS_PER_SLAB)
+ && (slab_size < KMEM_SLAB_SIZE_THRESHOLD));
+
+ assert(!(flags & KMEM_CACHE_NOOFFSLAB) || optimal_embed);
+
+ cache->slab_size = optimal_size;
+ slab_size = cache->slab_size - (optimal_embed
+ ? sizeof(struct kmem_slab)
+ : 0);
+ cache->bufs_per_slab = slab_size / buf_size;
+ cache->color_max = slab_size % buf_size;
+
+ if (cache->color_max >= PAGE_SIZE)
+ cache->color_max = PAGE_SIZE - 1;
+
+ if (optimal_embed) {
+ if (cache->slab_size == PAGE_SIZE)
+ cache->flags |= KMEM_CF_DIRECT;
+ } else {
+ cache->flags |= KMEM_CF_SLAB_EXTERNAL;
+ }
+}
+
+void
+kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size,
+ size_t align, kmem_cache_ctor_t ctor,
+ kmem_slab_alloc_fn_t slab_alloc_fn,
+ kmem_slab_free_fn_t slab_free_fn, int flags)
+{
+ struct kmem_cpu_pool_type *cpu_pool_type;
+ size_t i, buf_size;
+
+#ifdef KMEM_VERIFY
+ cache->flags = KMEM_CF_VERIFY;
+#else
+ cache->flags = 0;
+#endif
+
+ if (flags & KMEM_CACHE_NOCPUPOOL)
+ cache->flags |= KMEM_CF_NO_CPU_POOL;
+
+ if (flags & KMEM_CACHE_NORECLAIM) {
+ assert(slab_free_fn == NULL);
+ flags |= KMEM_CACHE_NOOFFSLAB;
+ cache->flags |= KMEM_CF_NO_RECLAIM;
+ }
+
+ if (flags & KMEM_CACHE_VERIFY)
+ cache->flags |= KMEM_CF_VERIFY;
+
+ if (align < KMEM_ALIGN_MIN)
+ align = KMEM_ALIGN_MIN;
+
+ assert(obj_size > 0);
+ assert(ISP2(align));
+ assert(align < PAGE_SIZE);
+
+ buf_size = P2ROUND(obj_size, align);
+
+ /* mutex_init(&cache->mutex); */
+ list_node_init(&cache->node);
+ list_init(&cache->partial_slabs);
+ list_init(&cache->free_slabs);
+ rbtree_init(&cache->active_slabs);
+ cache->obj_size = obj_size;
+ cache->align = align;
+ cache->buf_size = buf_size;
+ cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl);
+ cache->color = 0;
+ cache->nr_objs = 0;
+ cache->nr_bufs = 0;
+ cache->nr_slabs = 0;
+ cache->nr_free_slabs = 0;
+ cache->ctor = ctor;
+ cache->slab_alloc_fn = slab_alloc_fn;
+ cache->slab_free_fn = slab_free_fn;
+ strcpy(cache->name, name); /* TODO: strlcpy */
+ cache->buftag_dist = 0;
+ cache->redzone_pad = 0;
+
+ if (cache->flags & KMEM_CF_VERIFY) {
+ cache->bufctl_dist = buf_size;
+ cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl);
+ cache->redzone_pad = cache->bufctl_dist - cache->obj_size;
+ buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag);
+ buf_size = P2ROUND(buf_size, align);
+ cache->buf_size = buf_size;
+ }
+
+ kmem_cache_compute_sizes(cache, flags);
+
+ for (cpu_pool_type = kmem_cpu_pool_types;
+ buf_size <= cpu_pool_type->buf_size;
+ cpu_pool_type++);
+
+ cache->cpu_pool_type = cpu_pool_type;
+
+ for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++)
+ kmem_cpu_pool_init(&cache->cpu_pools[i], cache);
+
+ /* mutex_lock(&kmem_cache_list_mutex); */
+ list_insert_tail(&kmem_cache_list, &cache->node);
+ /* mutex_unlock(&kmem_cache_list_mutex); */
+}
+
+static inline int
+kmem_cache_empty(struct kmem_cache *cache)
+{
+ return cache->nr_objs == cache->nr_bufs;
+}
+
+static int
+kmem_cache_grow(struct kmem_cache *cache)
+{
+ struct kmem_slab *slab;
+ size_t color;
+ int empty;
+
+ /* mutex_lock(&cache->mutex); */
+
+ if (!kmem_cache_empty(cache)) {
+ /* mutex_unlock(&cache->mutex); */
+ return 1;
+ }
+
+ color = cache->color;
+ cache->color += cache->align;
+
+ if (cache->color > cache->color_max)
+ cache->color = 0;
+
+ /* mutex_unlock(&cache->mutex); */
+
+ slab = kmem_slab_create(cache, color);
+
+ /* mutex_lock(&cache->mutex); */
+
+ if (slab != NULL) {
+ list_insert_tail(&cache->free_slabs, &slab->list_node);
+ cache->nr_bufs += cache->bufs_per_slab;
+ cache->nr_slabs++;
+ cache->nr_free_slabs++;
+ }
+
+ /*
+ * Even if our slab creation failed, another thread might have succeeded
+ * in growing the cache.
+ */
+ empty = kmem_cache_empty(cache);
+
+ /* mutex_unlock(&cache->mutex); */
+
+ return !empty;
+}
+
+/*
+ * Allocate a raw (unconstructed) buffer from the slab layer of a cache.
+ *
+ * The cache must be locked before calling this function.
+ */
+static void *
+kmem_cache_alloc_from_slab(struct kmem_cache *cache)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+
+ if (!list_empty(&cache->partial_slabs))
+ slab = list_first_entry(&cache->partial_slabs, struct kmem_slab,
+ list_node);
+ else if (!list_empty(&cache->free_slabs))
+ slab = list_first_entry(&cache->free_slabs, struct kmem_slab, list_node);
+ else
+ return NULL;
+
+ bufctl = slab->first_free;
+ assert(bufctl != NULL);
+ slab->first_free = bufctl->next;
+ slab->nr_refs++;
+ cache->nr_objs++;
+
+ /*
+ * The slab has become complete.
+ */
+ if (slab->nr_refs == cache->bufs_per_slab) {
+ list_remove(&slab->list_node);
+
+ if (slab->nr_refs == 1)
+ cache->nr_free_slabs--;
+ } else if (slab->nr_refs == 1) {
+ /*
+ * The slab has become partial.
+ */
+ list_remove(&slab->list_node);
+ list_insert_tail(&cache->partial_slabs, &slab->list_node);
+ cache->nr_free_slabs--;
+ } else if (!list_singular(&cache->partial_slabs)) {
+ struct list *node;
+ struct kmem_slab *tmp;
+
+ /*
+ * The slab remains partial. If there are more than one partial slabs,
+ * maintain the list sorted.
+ */
+
+ assert(slab->nr_refs > 1);
+
+ for (node = list_prev(&slab->list_node);
+ !list_end(&cache->partial_slabs, node);
+ node = list_prev(node)) {
+ tmp = list_entry(node, struct kmem_slab, list_node);
+
+ if (tmp->nr_refs >= slab->nr_refs)
+ break;
+ }
+
+ /*
+ * If the direct neighbor was found, the list is already sorted.
+ * If no slab was found, the slab is inserted at the head of the list.
+ */
+ if (node != list_prev(&slab->list_node)) {
+ list_remove(&slab->list_node);
+ list_insert_after(node, &slab->list_node);
+ }
+ }
+
+ if ((slab->nr_refs == 1) && kmem_slab_use_tree(cache->flags))
+ rbtree_insert(&cache->active_slabs, &slab->tree_node,
+ kmem_slab_cmp_insert);
+
+ return kmem_bufctl_to_buf(bufctl, cache);
+}
+
+/*
+ * Release a buffer to the slab layer of a cache.
+ *
+ * The cache must be locked before calling this function.
+ */
+static void
+kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+
+ if (cache->flags & KMEM_CF_DIRECT) {
+ assert(cache->slab_size == PAGE_SIZE);
+ slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size)
+ - 1;
+ } else {
+ struct rbtree_node *node;
+
+ node = rbtree_lookup_nearest(&cache->active_slabs, buf,
+ kmem_slab_cmp_lookup, RBTREE_LEFT);
+ assert(node != NULL);
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ assert((unsigned long)buf < (P2ALIGN((unsigned long)slab->addr
+ + cache->slab_size, PAGE_SIZE)));
+ }
+
+ assert(slab->nr_refs >= 1);
+ assert(slab->nr_refs <= cache->bufs_per_slab);
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+ bufctl->next = slab->first_free;
+ slab->first_free = bufctl;
+ slab->nr_refs--;
+ cache->nr_objs--;
+
+ /*
+ * The slab has become free.
+ */
+ if (slab->nr_refs == 0) {
+ if (kmem_slab_use_tree(cache->flags))
+ rbtree_remove(&cache->active_slabs, &slab->tree_node);
+
+ /*
+ * The slab was partial.
+ */
+ if (cache->bufs_per_slab > 1)
+ list_remove(&slab->list_node);
+
+ list_insert_tail(&cache->free_slabs, &slab->list_node);
+ cache->nr_free_slabs++;
+ } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) {
+ /*
+ * The slab has become partial.
+ */
+ list_insert(&cache->partial_slabs, &slab->list_node);
+ } else if (!list_singular(&cache->partial_slabs)) {
+ struct list *node;
+ struct kmem_slab *tmp;
+
+ /*
+ * The slab remains partial. If there are more than one partial slabs,
+ * maintain the list sorted.
+ */
+
+ assert(slab->nr_refs > 0);
+
+ for (node = list_next(&slab->list_node);
+ !list_end(&cache->partial_slabs, node);
+ node = list_next(node)) {
+ tmp = list_entry(node, struct kmem_slab, list_node);
+
+ if (tmp->nr_refs <= slab->nr_refs)
+ break;
+ }
+
+ /*
+ * If the direct neighbor was found, the list is already sorted.
+ * If no slab was found, the slab is inserted at the tail of the list.
+ */
+ if (node != list_next(&slab->list_node)) {
+ list_remove(&slab->list_node);
+ list_insert_before(node, &slab->list_node);
+ }
+ }
+}
+
+static void
+kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf, int construct)
+{
+ struct kmem_buftag *buftag;
+ union kmem_bufctl *bufctl;
+ void *addr;
+
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ if (buftag->state != KMEM_BUFTAG_FREE)
+ kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
+
+ addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN,
+ cache->bufctl_dist);
+
+ if (addr != NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr);
+
+ addr = buf + cache->obj_size;
+ memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad);
+
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+ bufctl->redzone = KMEM_REDZONE_WORD;
+ buftag->state = KMEM_BUFTAG_ALLOC;
+
+ if (construct && (cache->ctor != NULL))
+ cache->ctor(buf);
+}
+
+void *
+kmem_cache_alloc(struct kmem_cache *cache)
+{
+ struct kmem_cpu_pool *cpu_pool;
+ int filled;
+ void *buf;
+
+ cpu_pool = kmem_cpu_pool_get(cache);
+
+ if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
+ goto slab_alloc;
+
+ /* mutex_lock(&cpu_pool->mutex); */
+
+fast_alloc:
+ if (likely(cpu_pool->nr_objs > 0)) {
+ buf = kmem_cpu_pool_pop(cpu_pool);
+ /* mutex_unlock(&cpu_pool->mutex); */
+
+ if (cpu_pool->flags & KMEM_CF_VERIFY)
+ kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT);
+
+ return buf;
+ }
+
+ if (cpu_pool->array != NULL) {
+ filled = kmem_cpu_pool_fill(cpu_pool, cache);
+
+ if (!filled) {
+ /* mutex_unlock(&cpu_pool->mutex); */
+
+ filled = kmem_cache_grow(cache);
+
+ if (!filled)
+ return NULL;
+
+ /* mutex_lock(&cpu_pool->mutex); */
+ }
+
+ goto fast_alloc;
+ }
+
+ /* mutex_unlock(&cpu_pool->mutex); */
+
+slab_alloc:
+ /* mutex_lock(&cache->mutex); */
+ buf = kmem_cache_alloc_from_slab(cache);
+ /* mutex_unlock(&cache->mutex); */
+
+ if (buf == NULL) {
+ filled = kmem_cache_grow(cache);
+
+ if (!filled)
+ return NULL;
+
+ goto slab_alloc;
+ }
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT);
+
+ if (cache->ctor != NULL)
+ cache->ctor(buf);
+
+ return buf;
+}
+
+static void
+kmem_cache_free_verify(struct kmem_cache *cache, void *buf)
+{
+ struct rbtree_node *node;
+ struct kmem_buftag *buftag;
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+ unsigned char *redzone_byte;
+ unsigned long slabend;
+
+ /* mutex_lock(&cache->mutex); */
+ node = rbtree_lookup_nearest(&cache->active_slabs, buf,
+ kmem_slab_cmp_lookup, RBTREE_LEFT);
+ /* mutex_unlock(&cache->mutex); */
+
+ if (node == NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE);
+
+ if ((unsigned long)buf >= slabend)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ if ((((unsigned long)buf - (unsigned long)slab->addr) % cache->buf_size)
+ != 0)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ /*
+ * As the buffer address is valid, accessing its buftag is safe.
+ */
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ if (buftag->state != KMEM_BUFTAG_ALLOC) {
+ if (buftag->state == KMEM_BUFTAG_FREE)
+ kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL);
+ else
+ kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
+ }
+
+ redzone_byte = buf + cache->obj_size;
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+
+ while (redzone_byte < (unsigned char *)bufctl) {
+ if (*redzone_byte != KMEM_REDZONE_BYTE)
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+
+ redzone_byte++;
+ }
+
+ if (bufctl->redzone != KMEM_REDZONE_WORD) {
+ unsigned long word;
+
+ word = KMEM_REDZONE_WORD;
+ redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word,
+ sizeof(bufctl->redzone));
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+ }
+
+ kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
+ buftag->state = KMEM_BUFTAG_FREE;
+}
+
+void
+kmem_cache_free(struct kmem_cache *cache, void *obj)
+{
+ struct kmem_cpu_pool *cpu_pool;
+ void **array;
+
+ cpu_pool = kmem_cpu_pool_get(cache);
+
+ if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
+ goto slab_free;
+
+ if (cpu_pool->flags & KMEM_CF_VERIFY)
+ kmem_cache_free_verify(cache, obj);
+
+ /* mutex_lock(&cpu_pool->mutex); */
+
+fast_free:
+ if (likely(cpu_pool->nr_objs < cpu_pool->size)) {
+ kmem_cpu_pool_push(cpu_pool, obj);
+ /* mutex_unlock(&cpu_pool->mutex); */
+ return;
+ }
+
+ if (cpu_pool->array != NULL) {
+ kmem_cpu_pool_drain(cpu_pool, cache);
+ goto fast_free;
+ }
+
+ /* mutex_unlock(&cpu_pool->mutex); */
+
+ array = kmem_cache_alloc(cache->cpu_pool_type->array_cache);
+
+ if (array != NULL) {
+ /* mutex_lock(&cpu_pool->mutex); */
+
+ /*
+ * Another thread may have built the CPU pool while the mutex was
+ * dropped.
+ */
+ if (cpu_pool->array != NULL) {
+ /* mutex_unlock(&cpu_pool->mutex); */
+ kmem_cache_free(cache->cpu_pool_type->array_cache, array);
+ goto fast_free;
+ }
+
+ kmem_cpu_pool_build(cpu_pool, cache, array);
+ goto fast_free;
+ }
+
+slab_free:
+ kmem_cache_free_to_slab(cache, obj);
+}
+
+void
+kmem_cache_info(struct kmem_cache *cache)
+{
+ struct kmem_cache *cache_stats;
+ char flags_str[64];
+
+ if (cache == NULL) {
+ /* mutex_lock(&kmem_cache_list_mutex); */
+
+ list_for_each_entry(&kmem_cache_list, cache, node)
+ kmem_cache_info(cache);
+
+ /* mutex_unlock(&kmem_cache_list_mutex); */
+
+ return;
+ }
+
+ cache_stats = kmem_alloc(sizeof(*cache_stats));
+
+ if (cache_stats == NULL) {
+ printk("kmem: unable to allocate memory for cache stats\n");
+ return;
+ }
+
+ /* mutex_lock(&cache->mutex); */
+ cache_stats->flags = cache->flags;
+ cache_stats->obj_size = cache->obj_size;
+ cache_stats->align = cache->align;
+ cache_stats->buf_size = cache->buf_size;
+ cache_stats->bufctl_dist = cache->bufctl_dist;
+ cache_stats->slab_size = cache->slab_size;
+ cache_stats->color_max = cache->color_max;
+ cache_stats->bufs_per_slab = cache->bufs_per_slab;
+ cache_stats->nr_objs = cache->nr_objs;
+ cache_stats->nr_bufs = cache->nr_bufs;
+ cache_stats->nr_slabs = cache->nr_slabs;
+ cache_stats->nr_free_slabs = cache->nr_free_slabs;
+ strcpy(cache_stats->name, cache->name);
+ cache_stats->buftag_dist = cache->buftag_dist;
+ cache_stats->redzone_pad = cache->redzone_pad;
+ cache_stats->cpu_pool_type = cache->cpu_pool_type;
+ /* mutex_unlock(&cache->mutex); */
+
+ snprintf(flags_str, sizeof(flags_str), "%s%s%s",
+ (cache_stats->flags & KMEM_CF_DIRECT) ? " DIRECT" : "",
+ (cache_stats->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "",
+ (cache_stats->flags & KMEM_CF_VERIFY) ? " VERIFY" : "");
+
+ printk("kmem: name: %s\n", cache_stats->name);
+ printk("kmem: flags: 0x%x%s\n", cache_stats->flags, flags_str);
+ printk("kmem: obj_size: %zu\n", cache_stats->obj_size);
+ printk("kmem: align: %zu\n", cache_stats->align);
+ printk("kmem: buf_size: %zu\n", cache_stats->buf_size);
+ printk("kmem: bufctl_dist: %zu\n", cache_stats->bufctl_dist);
+ printk("kmem: slab_size: %zu\n", cache_stats->slab_size);
+ printk("kmem: color_max: %zu\n", cache_stats->color_max);
+ printk("kmem: bufs_per_slab: %lu\n", cache_stats->bufs_per_slab);
+ printk("kmem: nr_objs: %lu\n", cache_stats->nr_objs);
+ printk("kmem: nr_bufs: %lu\n", cache_stats->nr_bufs);
+ printk("kmem: nr_slabs: %lu\n", cache_stats->nr_slabs);
+ printk("kmem: nr_free_slabs: %lu\n", cache_stats->nr_free_slabs);
+ printk("kmem: buftag_dist: %zu\n", cache_stats->buftag_dist);
+ printk("kmem: redzone_pad: %zu\n", cache_stats->redzone_pad);
+ printk("kmem: cpu_pool_size: %d\n", cache_stats->cpu_pool_type->array_size);
+
+ kmem_free(cache_stats, sizeof(*cache_stats));
+}
+
+void __init
+kmem_bootstrap(void)
+{
+ /* Make sure a bufctl can always be stored in a buffer */
+ assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN);
+
+ list_init(&kmem_cache_list);
+ /* mutex_init(&kmem_cache_list_mutex); */
+}
+
+void __init
+kmem_setup(void)
+{
+ struct kmem_cpu_pool_type *cpu_pool_type;
+ char name[KMEM_NAME_SIZE];
+ size_t i, size;
+
+ for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) {
+ cpu_pool_type = &kmem_cpu_pool_types[i];
+ cpu_pool_type->array_cache = &kmem_cpu_array_caches[i];
+ sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size);
+ size = sizeof(void *) * cpu_pool_type->array_size;
+ kmem_cache_init(cpu_pool_type->array_cache, name, size,
+ cpu_pool_type->array_align, NULL, NULL, NULL, 0);
+ }
+
+ /*
+ * Prevent off slab data for the slab cache to avoid infinite recursion.
+ */
+ kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab),
+ 0, NULL, NULL, NULL, KMEM_CACHE_NOOFFSLAB);
+
+ size = 1 << KMEM_CACHES_FIRST_SHIFT;
+
+ for (i = 0; i < ARRAY_SIZE(kmem_caches); i++) {
+ sprintf(name, "kmem_%zu", size);
+ kmem_cache_init(&kmem_caches[i], name, size, 0, NULL, NULL, NULL, 0);
+ size <<= 1;
+ }
+}
+
+/*
+ * Return the kmem cache index matching the given allocation size, which
+ * must be strictly greater than 0.
+ */
+static inline size_t
+kmem_get_index(unsigned long size)
+{
+ assert(size != 0);
+
+ size = (size - 1) >> KMEM_CACHES_FIRST_SHIFT;
+
+ if (size == 0)
+ return 0;
+ else
+ return (sizeof(long) * CHAR_BIT) - __builtin_clzl(size);
+}
+
+static void
+kmem_alloc_verify(struct kmem_cache *cache, void *buf, size_t size)
+{
+ size_t redzone_size;
+ void *redzone;
+
+ assert(size <= cache->obj_size);
+
+ redzone = buf + size;
+ redzone_size = cache->obj_size - size;
+ memset(redzone, KMEM_REDZONE_BYTE, redzone_size);
+}
+
+void *
+kmem_alloc(size_t size)
+{
+ size_t index;
+ void *buf;
+
+ if (size == 0)
+ return NULL;
+
+ index = kmem_get_index(size);
+
+ if (index < ARRAY_SIZE(kmem_caches)) {
+ struct kmem_cache *cache;
+
+ cache = &kmem_caches[index];
+ buf = kmem_cache_alloc(cache);
+
+ if ((buf != NULL) && (cache->flags & KMEM_CF_VERIFY))
+ kmem_alloc_verify(cache, buf, size);
+ } else {
+ buf = (void *)vm_kmem_alloc(size);
+ }
+
+ return buf;
+}
+
+void *
+kmem_zalloc(size_t size)
+{
+ void *ptr;
+
+ ptr = kmem_alloc(size);
+
+ if (ptr == NULL)
+ return NULL;
+
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+static void
+kmem_free_verify(struct kmem_cache *cache, void *buf, size_t size)
+{
+ unsigned char *redzone_byte, *redzone_end;
+
+ assert(size <= cache->obj_size);
+
+ redzone_byte = buf + size;
+ redzone_end = buf + cache->obj_size;
+
+ while (redzone_byte < redzone_end) {
+ if (*redzone_byte != KMEM_REDZONE_BYTE)
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+
+ redzone_byte++;
+ }
+}
+
+void
+kmem_free(void *ptr, size_t size)
+{
+ size_t index;
+
+ if ((ptr == NULL) || (size == 0))
+ return;
+
+ index = kmem_get_index(size);
+
+ if (index < ARRAY_SIZE(kmem_caches)) {
+ struct kmem_cache *cache;
+
+ cache = &kmem_caches[index];
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_free_verify(cache, ptr, size);
+
+ kmem_cache_free(cache, ptr);
+ } else {
+ vm_kmem_free((unsigned long)ptr, size);
+ }
+}
+
+void
+kmem_info(void)
+{
+ struct kmem_cache *cache, *cache_stats;
+ size_t mem_usage, mem_reclaimable;
+ int not_reclaimable;
+
+ cache_stats = kmem_alloc(sizeof(*cache_stats));
+
+ if (cache_stats == NULL) {
+ printk("kmem: unable to allocate memory for cache stats\n");
+ return;
+ }
+
+ printk("kmem: cache obj slab bufs objs bufs "
+ " total reclaimable\n");
+ printk("kmem: name size size /slab usage count "
+ " memory memory\n");
+
+ /* mutex_lock(&kmem_cache_list_mutex); */
+
+ list_for_each_entry(&kmem_cache_list, cache, node) {
+ /* mutex_lock(&cache->mutex); */
+ not_reclaimable = cache->flags & KMEM_CF_NO_RECLAIM;
+ cache_stats->obj_size = cache->obj_size;
+ cache_stats->slab_size = cache->slab_size;
+ cache_stats->bufs_per_slab = cache->bufs_per_slab;
+ cache_stats->nr_objs = cache->nr_objs;
+ cache_stats->nr_bufs = cache->nr_bufs;
+ cache_stats->nr_slabs = cache->nr_slabs;
+ cache_stats->nr_free_slabs = cache->nr_free_slabs;
+ strcpy(cache_stats->name, cache->name);
+ /* mutex_unlock(&cache->mutex); */
+
+ mem_usage = (cache_stats->nr_slabs * cache_stats->slab_size) >> 10;
+
+ if (not_reclaimable)
+ mem_reclaimable = 0;
+ else
+ mem_reclaimable =
+ (cache_stats->nr_free_slabs * cache_stats->slab_size) >> 10;
+
+ printk("kmem: %-19s %6zu %3zuk %4lu %6lu %6lu %7zuk %10zuk\n",
+ cache_stats->name, cache_stats->obj_size,
+ cache_stats->slab_size >> 10, cache_stats->bufs_per_slab,
+ cache_stats->nr_objs, cache_stats->nr_bufs, mem_usage,
+ mem_reclaimable);
+ }
+
+ /* mutex_unlock(&kmem_cache_list_mutex); */
+
+ kmem_free(cache_stats, sizeof(*cache_stats));
+}
diff --git a/kern/kmem.h b/kern/kmem.h
new file mode 100644
index 00000000..dee4a857
--- /dev/null
+++ b/kern/kmem.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Object caching and general purpose memory allocator.
+ */
+
+#ifndef _KERN_KMEM_H
+#define _KERN_KMEM_H
+
+#include <kern/param.h>
+#include <lib/list.h>
+#include <lib/rbtree.h>
+#include <lib/stddef.h>
+
+/*
+ * Per-processor cache of pre-constructed objects.
+ *
+ * The flags member is a read-only CPU-local copy of the parent cache flags.
+ */
+struct kmem_cpu_pool {
+ /* struct mutex mutex; */
+ int flags;
+ int size;
+ int transfer_size;
+ int nr_objs;
+ void **array;
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * When a cache is created, its CPU pool type is determined from the buffer
+ * size. For small buffer sizes, many objects can be cached in a CPU pool.
+ * Conversely, for large buffer sizes, this would incur much overhead, so only
+ * a few objects are stored in a CPU pool.
+ */
+struct kmem_cpu_pool_type {
+ size_t buf_size;
+ int array_size;
+ size_t array_align;
+ struct kmem_cache *array_cache;
+};
+
+/*
+ * Buffer descriptor.
+ *
+ * For normal caches (i.e. without KMEM_CF_VERIFY), bufctls are located at the
+ * end of (but inside) each buffer. If KMEM_CF_VERIFY is set, bufctls are
+ * located after each buffer.
+ *
+ * When an object is allocated to a client, its bufctl isn't used. This memory
+ * is instead used for redzoning if cache debugging is in effect.
+ */
+union kmem_bufctl {
+ union kmem_bufctl *next;
+ unsigned long redzone;
+};
+
+/*
+ * Redzone guard word.
+ */
+#ifdef __LP64__
+#ifdef __BIG_ENDIAN__
+#define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL
+#else /* __BIG_ENDIAN__ */
+#define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL
+#endif /* __BIG_ENDIAN__ */
+#else /* __LP64__ */
+#ifdef __BIG_ENDIAN__
+#define KMEM_REDZONE_WORD 0xfeedfaceUL
+#else /* __BIG_ENDIAN__ */
+#define KMEM_REDZONE_WORD 0xcefaedfeUL
+#endif /* __BIG_ENDIAN__ */
+#endif /* __LP64__ */
+
+/*
+ * Redzone byte for padding.
+ */
+#define KMEM_REDZONE_BYTE 0xbb
+
+/*
+ * Buffer tag.
+ *
+ * This structure is only used for KMEM_CF_VERIFY caches. It is located after
+ * the bufctl and includes information about the state of the buffer it
+ * describes (allocated or not). It should be thought of as a debugging
+ * extension of the bufctl.
+ */
+struct kmem_buftag {
+ unsigned long state;
+};
+
+/*
+ * Values the buftag state member can take.
+ */
+#ifdef __LP64__
+#ifdef __BIG_ENDIAN__
+#define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL
+#define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL
+#else /* __BIG_ENDIAN__ */
+#define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL
+#define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL
+#endif /* __BIG_ENDIAN__ */
+#else /* __LP64__ */
+#ifdef __BIG_ENDIAN__
+#define KMEM_BUFTAG_ALLOC 0xa110c8edUL
+#define KMEM_BUFTAG_FREE 0xf4eeb10cUL
+#else /* __BIG_ENDIAN__ */
+#define KMEM_BUFTAG_ALLOC 0xedc810a1UL
+#define KMEM_BUFTAG_FREE 0x0cb1eef4UL
+#endif /* __BIG_ENDIAN__ */
+#endif /* __LP64__ */
+
+/*
+ * Free and uninitialized patterns.
+ *
+ * These values are unconditionnally 64-bit wide since buffers are at least
+ * 8-byte aligned.
+ */
+#ifdef __BIG_ENDIAN__
+#define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL
+#define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL
+#else /* __BIG_ENDIAN__ */
+#define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL
+#define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL
+#endif /* __BIG_ENDIAN__ */
+
+/*
+ * Page-aligned collection of unconstructed buffers.
+ *
+ * This structure is either allocated from the slab cache, or, when internal
+ * fragmentation allows it, or if forced by the cache creator, from the slab
+ * it describes.
+ */
+struct kmem_slab {
+ struct list list_node;
+ struct rbtree_node tree_node;
+ unsigned long nr_refs;
+ union kmem_bufctl *first_free;
+ void *addr;
+};
+
+/*
+ * Type for constructor functions.
+ *
+ * The pre-constructed state of an object is supposed to include only
+ * elements such as e.g. linked lists, locks, reference counters. Therefore
+ * constructors are expected to 1) never fail and 2) not need any
+ * user-provided data. The first constraint implies that object construction
+ * never performs dynamic resource allocation, which also means there is no
+ * need for destructors.
+ */
+typedef void (*kmem_cache_ctor_t)(void *);
+
+/*
+ * Types for slab allocation/free functions.
+ *
+ * All addresses and sizes must be page-aligned.
+ */
+typedef unsigned long (*kmem_slab_alloc_fn_t)(size_t);
+typedef void (*kmem_slab_free_fn_t)(unsigned long, size_t);
+
+/*
+ * Cache name buffer size.
+ */
+#define KMEM_NAME_SIZE 32
+
+/*
+ * Cache flags.
+ *
+ * The flags don't change once set and can be tested without locking.
+ */
+#define KMEM_CF_NO_CPU_POOL 0x01 /* CPU pool layer disabled */
+#define KMEM_CF_SLAB_EXTERNAL 0x02 /* Slab data is off slab */
+#define KMEM_CF_NO_RECLAIM 0x04 /* Slabs are not reclaimable */
+#define KMEM_CF_VERIFY 0x08 /* Debugging facilities enabled */
+#define KMEM_CF_DIRECT 0x10 /* No buf-to-slab tree lookup */
+
+/*
+ * Cache of objects.
+ *
+ * Locking order : cpu_pool -> cache. CPU pools locking is ordered by CPU ID.
+ *
+ * The partial slabs list is sorted by slab references. Slabs with a high
+ * number of references are placed first on the list to reduce fragmentation.
+ * Sorting occurs at insertion/removal of buffers in a slab. As the list
+ * is maintained sorted, and the number of references only changes by one,
+ * this is a very cheap operation in the average case and the worst (linear)
+ * case is very unlikely.
+ */
+struct kmem_cache {
+ /* CPU pool layer */
+ struct kmem_cpu_pool cpu_pools[MAX_CPUS];
+ struct kmem_cpu_pool_type *cpu_pool_type;
+
+ /* Slab layer */
+ /* struct mutex mutex; */
+ struct list node; /* Cache list linkage */
+ struct list partial_slabs;
+ struct list free_slabs;
+ struct rbtree active_slabs;
+ int flags;
+ size_t obj_size; /* User-provided size */
+ size_t align;
+ size_t buf_size; /* Aligned object size */
+ size_t bufctl_dist; /* Distance from buffer to bufctl */
+ size_t slab_size;
+ size_t color;
+ size_t color_max;
+ unsigned long bufs_per_slab;
+ unsigned long nr_objs; /* Number of allocated objects */
+ unsigned long nr_bufs; /* Total number of buffers */
+ unsigned long nr_slabs;
+ unsigned long nr_free_slabs;
+ kmem_cache_ctor_t ctor;
+ kmem_slab_alloc_fn_t slab_alloc_fn;
+ kmem_slab_free_fn_t slab_free_fn;
+ char name[KMEM_NAME_SIZE];
+ size_t buftag_dist; /* Distance from buffer to buftag */
+ size_t redzone_pad; /* Bytes from end of object to redzone word */
+};
+
+/*
+ * Cache creation flags.
+ */
+#define KMEM_CACHE_NOCPUPOOL 0x1 /* Don't use the per-cpu pools */
+#define KMEM_CACHE_NOOFFSLAB 0x2 /* Don't allocate external slab data */
+#define KMEM_CACHE_NORECLAIM 0x4 /* Never give slabs back to their source,
+ implies KMEM_CACHE_NOOFFSLAB */
+#define KMEM_CACHE_VERIFY 0x8 /* Use debugging facilities */
+
+/*
+ * Initialize a cache.
+ *
+ * If a slab allocation/free function pointer is NULL, the default backend
+ * (vm_kmem on the kmem map) is used for the allocation/free action.
+ */
+void kmem_cache_init(struct kmem_cache *cache, const char *name,
+ size_t obj_size, size_t align, kmem_cache_ctor_t ctor,
+ kmem_slab_alloc_fn_t slab_alloc_fn,
+ kmem_slab_free_fn_t slab_free_fn, int flags);
+
+/*
+ * Allocate an object from a cache.
+ */
+void * kmem_cache_alloc(struct kmem_cache *cache);
+
+/*
+ * Release an object to its cache.
+ */
+void kmem_cache_free(struct kmem_cache *cache, void *obj);
+
+/*
+ * Display internal cache information.
+ *
+ * If cache is NULL, this function displays all managed caches.
+ */
+void kmem_cache_info(struct kmem_cache *cache);
+
+/*
+ * Early initialization of the kernel memory allocator.
+ *
+ * Once this function returns, caches can be initialized.
+ */
+void kmem_bootstrap(void);
+
+/*
+ * Set up the kernel memory allocator module.
+ */
+void kmem_setup(void);
+
+/*
+ * Allocate size bytes of uninitialized memory.
+ */
+void * kmem_alloc(size_t size);
+
+/*
+ * Allocate size bytes of zeroed memory.
+ */
+void * kmem_zalloc(size_t size);
+
+/*
+ * Release memory obtained with kmem_alloc() or kmem_zalloc().
+ *
+ * The size argument must strictly match the value given at allocation time.
+ */
+void kmem_free(void *ptr, size_t size);
+
+/*
+ * Display global kernel memory information.
+ */
+void kmem_info(void);
+
+#endif /* _KERN_KMEM_H */
diff --git a/kern/panic.c b/kern/panic.c
new file mode 100644
index 00000000..7615850e
--- /dev/null
+++ b/kern/panic.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdarg.h>
+
+#include <kern/panic.h>
+#include <kern/printk.h>
+#include <machine/cpu.h>
+
+void
+panic(const char *format, ...)
+{
+ va_list list;
+
+ cpu_intr_disable();
+
+ printk("\nkernel panic: ");
+ va_start(list, format);
+ vprintk(format, list);
+
+ cpu_halt();
+
+ /*
+ * Never reached.
+ */
+}
diff --git a/kern/panic.h b/kern/panic.h
new file mode 100644
index 00000000..a3bfbbe2
--- /dev/null
+++ b/kern/panic.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_PANIC_H
+#define _KERN_PANIC_H
+
+#include <lib/macros.h>
+
+/*
+ * Print the given message and halt the system immediately.
+ */
+void __noreturn panic(const char *format, ...) __format_printf(1, 2);
+
+#endif /* _KERN_PANIC_H */
diff --git a/kern/param.h b/kern/param.h
new file mode 100644
index 00000000..23511ee3
--- /dev/null
+++ b/kern/param.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_PARAM_H
+#define _KERN_PARAM_H
+
+#include <machine/param.h>
+
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define PAGE_MASK (PAGE_SIZE - 1)
+
+#endif /* _KERN_PARAM_H */
diff --git a/kern/printk.c b/kern/printk.c
new file mode 100644
index 00000000..fc609c3c
--- /dev/null
+++ b/kern/printk.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/printk.h>
+#include <lib/sprintf.h>
+#include <machine/cpu.h>
+
+/*
+ * Size of the static buffer.
+ */
+#define PRINTK_BUFSIZE 1024
+
+/*
+ * XXX Must be provided by a console driver.
+ */
+extern void console_write_byte(char c);
+
+static char printk_buffer[PRINTK_BUFSIZE];
+
+int
+printk(const char *format, ...)
+{
+ va_list ap;
+ int length;
+
+ va_start(ap, format);
+ length = vprintk(format, ap);
+ va_end(ap);
+
+ return length;
+}
+
+int
+vprintk(const char *format, va_list ap)
+{
+ unsigned long flags;
+ int length;
+ char *ptr;
+
+ flags = cpu_intr_save();
+
+ length = vsnprintf(printk_buffer, sizeof(printk_buffer), format, ap);
+
+ for (ptr = printk_buffer; *ptr != '\0'; ptr++)
+ console_write_byte(*ptr);
+
+ cpu_intr_restore(flags);
+
+ return length;
+}
diff --git a/kern/printk.h b/kern/printk.h
new file mode 100644
index 00000000..37f25453
--- /dev/null
+++ b/kern/printk.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Formatted output functions.
+ *
+ * The printk() and vprintk() functions internally use a statically
+ * allocated buffer. They won't produce output larger than 1 KiB. They can
+ * be used safely in any context.
+ *
+ * See the sprintf library module for information about the supported formats.
+ */
+
+#ifndef _KERN_PRINTK_H
+#define _KERN_PRINTK_H
+
+#include <stdarg.h>
+
+#include <lib/macros.h>
+
+int printk(const char *format, ...) __format_printf(1, 2);
+int vprintk(const char *format, va_list ap) __format_printf(1, 0);
+
+#endif /* _KERN_PRINTK_H */
diff --git a/kern/types.h b/kern/types.h
new file mode 100644
index 00000000..5fc3cb59
--- /dev/null
+++ b/kern/types.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_TYPES_H
+#define _KERN_TYPES_H
+
+#include <machine/types.h>
+
+#endif /* _KERN_TYPES_H */
diff --git a/lib/assert.h b/lib/assert.h
new file mode 100644
index 00000000..19da04c1
--- /dev/null
+++ b/lib/assert.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_ASSERT_H
+#define _LIB_ASSERT_H
+
+#ifdef NDEBUG
+#define assert(expression) ((void)(expression))
+#else /* NDEBUG */
+
+#include <kern/panic.h>
+#include <lib/macros.h>
+
+/*
+ * Panic if the given expression is false.
+ */
+#define assert(expression) \
+MACRO_BEGIN \
+ if (unlikely(!(expression))) \
+ panic("assertion (%s) failed in %s:%d, function %s()", \
+ XQUOTE(expression), __FILE__, __LINE__, __func__); \
+MACRO_END
+
+#endif /* NDEBUG */
+
+#endif /* _LIB_ASSERT_H */
diff --git a/lib/limits.h b/lib/limits.h
new file mode 100644
index 00000000..8f0bd4df
--- /dev/null
+++ b/lib/limits.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_LIMITS_H
+#define _LIB_LIMITS_H
+
+#define CHAR_BIT 8
+
+#endif /* _LIB_LIMITS_H */
diff --git a/lib/list.h b/lib/list.h
new file mode 100644
index 00000000..b530d6c4
--- /dev/null
+++ b/lib/list.h
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2009, 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Simple doubly-linked list.
+ */
+
+#ifndef _LIB_LIST_H
+#define _LIB_LIST_H
+
+#include <lib/macros.h>
+#include <lib/stddef.h>
+
+/*
+ * Structure used as both head and node.
+ *
+ * This implementation relies on using the same type for both heads and nodes.
+ *
+ * It is recommended to encode the use of struct list variables in their names,
+ * e.g. struct list free_list or struct list free_objects is a good hint for a
+ * list of free objects. A declaration like struct list free_node clearly
+ * indicates it is used as part of a node in the free list.
+ */
+struct list {
+ struct list *prev;
+ struct list *next;
+};
+
+/*
+ * Static list initializer.
+ */
+#define LIST_INITIALIZER(list) { &(list), &(list) }
+
+/*
+ * Initialize a list.
+ */
+static inline void
+list_init(struct list *list)
+{
+ list->prev = list;
+ list->next = list;
+}
+
+/*
+ * Initialize a list node.
+ *
+ * An entry is in no list when its node members point to NULL.
+ */
+static inline void
+list_node_init(struct list *node)
+{
+ node->prev = NULL;
+ node->next = NULL;
+}
+
+/*
+ * Return true if node is in no list.
+ */
+static inline int
+list_node_unlinked(const struct list *node)
+{
+ return node->prev == NULL;
+}
+
+/*
+ * Macro that evaluates to the address of the structure containing the
+ * given node based on the given type and member.
+ */
+#define list_entry(node, type, member) structof(node, type, member)
+
+/*
+ * Return the first node of a list.
+ */
+static inline struct list *
+list_first(const struct list *list)
+{
+ return list->next;
+}
+
+/*
+ * Return the last node of a list.
+ */
+static inline struct list *
+list_last(const struct list *list)
+{
+ return list->prev;
+}
+
+/*
+ * Return the node next to the given node.
+ */
+static inline struct list *
+list_next(const struct list *node)
+{
+ return node->next;
+}
+
+/*
+ * Return the node previous to the given node.
+ */
+static inline struct list *
+list_prev(const struct list *node)
+{
+ return node->prev;
+}
+
+/*
+ * Get the first entry of a list.
+ */
+#define list_first_entry(list, type, member) \
+ list_entry(list_first(list), type, member)
+
+/*
+ * Get the last entry of a list.
+ */
+#define list_last_entry(list, type, member) \
+ list_entry(list_last(list), type, member)
+
+/*
+ * Return true if node is after the last or before the first node of the list.
+ */
+static inline int
+list_end(const struct list *list, const struct list *node)
+{
+ return list == node;
+}
+
+/*
+ * Return true if list is empty.
+ */
+static inline int
+list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+/*
+ * Return true if list contains exactly one node.
+ */
+static inline int
+list_singular(const struct list *list)
+{
+ return (list != list->next) && (list->next == list->prev);
+}
+
+/*
+ * Split list2 by moving its nodes up to (but not including) the given
+ * node into list1 (which can be in a stale state).
+ *
+ * If list2 is empty, or node is list2 or list2->next, nothing is done.
+ */
+static inline void
+list_split(struct list *list1, struct list *list2, struct list *node)
+{
+ if (list_empty(list2) || (list2->next == node) || list_end(list2, node))
+ return;
+
+ list1->next = list2->next;
+ list1->next->prev = list1;
+
+ list1->prev = node->prev;
+ node->prev->next = list1;
+
+ list2->next = node;
+ node->prev = list2;
+}
+
+/*
+ * Append the nodes of list2 at the end of list1.
+ *
+ * After completion, list2 is stale.
+ */
+static inline void
+list_concat(struct list *list1, const struct list *list2)
+{
+ struct list *last1, *first2, *last2;
+
+ if (list_empty(list2))
+ return;
+
+ last1 = list1->prev;
+ first2 = list2->next;
+ last2 = list2->prev;
+
+ last1->next = first2;
+ first2->prev = last1;
+
+ last2->next = list1;
+ list1->prev = last2;
+}
+
+/*
+ * Set the new head of a list.
+ *
+ * This function is an optimized version of :
+ * list_init(&new_list);
+ * list_concat(&new_list, &old_list);
+ *
+ * After completion, old_head is stale.
+ */
+static inline void
+list_set_head(struct list *new_head, const struct list *old_head)
+{
+ if (list_empty(old_head)) {
+ list_init(new_head);
+ return;
+ }
+
+ *new_head = *old_head;
+ new_head->next->prev = new_head;
+ new_head->prev->next = new_head;
+}
+
+/*
+ * Add a node between two nodes.
+ */
+static inline void
+list_add(struct list *prev, struct list *next, struct list *node)
+{
+ next->prev = node;
+ node->next = next;
+
+ prev->next = node;
+ node->prev = prev;
+}
+
+/*
+ * Insert a node at the head of a list.
+ */
+static inline void
+list_insert(struct list *list, struct list *node)
+{
+ list_add(list, list->next, node);
+}
+
+/*
+ * Insert a node at the tail of a list.
+ */
+static inline void
+list_insert_tail(struct list *list, struct list *node)
+{
+ list_add(list->prev, list, node);
+}
+
+/*
+ * Insert a node before another node.
+ */
+static inline void
+list_insert_before(struct list *next, struct list *node)
+{
+ list_add(next->prev, next, node);
+}
+
+/*
+ * Insert a node after another node.
+ */
+static inline void
+list_insert_after(struct list *prev, struct list *node)
+{
+ list_add(prev, prev->next, node);
+}
+
+/*
+ * Remove a node from a list.
+ *
+ * After completion, the node is stale.
+ */
+static inline void
+list_remove(struct list *node)
+{
+ node->prev->next = node->next;
+ node->next->prev = node->prev;
+}
+
+/*
+ * Forge a loop to process all nodes of a list.
+ *
+ * The node must not be altered during the loop.
+ */
+#define list_for_each(list, node) \
+for (node = list_first(list); \
+ !list_end(list, node); \
+ node = list_next(node))
+
+/*
+ * Forge a loop to process all nodes of a list.
+ */
+#define list_for_each_safe(list, node, tmp) \
+for (node = list_first(list), tmp = list_next(node); \
+ !list_end(list, node); \
+ node = tmp, tmp = list_next(node))
+
+/*
+ * Version of list_for_each() that processes nodes backward.
+ */
+#define list_for_each_reverse(list, node) \
+for (node = list_last(list); \
+ !list_end(list, node); \
+ node = list_prev(node))
+
+/*
+ * Version of list_for_each_safe() that processes nodes backward.
+ */
+#define list_for_each_reverse_safe(list, node, tmp) \
+for (node = list_last(list), tmp = list_prev(node); \
+ !list_end(list, node); \
+ node = tmp, tmp = list_prev(node))
+
+/*
+ * Forge a loop to process all entries of a list.
+ *
+ * The entry node must not be altered during the loop.
+ */
+#define list_for_each_entry(list, entry, member) \
+for (entry = list_entry(list_first(list), typeof(*entry), member); \
+ !list_end(list, &entry->member); \
+ entry = list_entry(list_next(&entry->member), typeof(*entry), \
+ member))
+
+/*
+ * Forge a loop to process all entries of a list.
+ */
+#define list_for_each_entry_safe(list, entry, tmp, member) \
+for (entry = list_entry(list_first(list), typeof(*entry), member), \
+ tmp = list_entry(list_next(&entry->member), typeof(*entry), \
+ member); \
+ !list_end(list, &entry->member); \
+ entry = tmp, tmp = list_entry(list_next(&entry->member), \
+ typeof(*entry), member))
+
+/*
+ * Version of list_for_each_entry() that processes entries backward.
+ */
+#define list_for_each_entry_reverse(list, entry, member) \
+for (entry = list_entry(list_last(list), typeof(*entry), member); \
+ !list_end(list, &entry->member); \
+ entry = list_entry(list_prev(&entry->member), typeof(*entry), \
+ member))
+
+/*
+ * Version of list_for_each_entry_safe() that processes entries backward.
+ */
+#define list_for_each_entry_reverse_safe(list, entry, tmp, member) \
+for (entry = list_entry(list_last(list), typeof(*entry), member), \
+ tmp = list_entry(list_prev(&entry->member), typeof(*entry), \
+ member); \
+ !list_end(list, &entry->member); \
+ entry = tmp, tmp = list_entry(list_prev(&entry->member), \
+ typeof(*entry), member))
+
+#endif /* _LIB_LIST_H */
diff --git a/lib/macros.h b/lib/macros.h
new file mode 100644
index 00000000..ca6379ac
--- /dev/null
+++ b/lib/macros.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2009, 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Helper macros.
+ */
+
+#ifndef _LIB_MACROS_H
+#define _LIB_MACROS_H
+
+#include <lib/stddef.h>
+
+#define MACRO_BEGIN ({
+#define MACRO_END })
+
+#define XQUOTE(x) #x
+#define QUOTE(x) XQUOTE(x)
+
+#define STRLEN(x) (sizeof(x) - 1)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0)
+#define ISP2(x) P2ALIGNED(x, x)
+#define P2ALIGN(x, a) ((x) & -(a))
+#define P2ROUND(x, a) (-(-(x) & -(a)))
+#define P2END(x, a) (-(~(x) & -(a)))
+
+#define structof(ptr, type, member) \
+ ((type *)((char *)ptr - offsetof(type, member)))
+
+#define alignof(x) __alignof__(x)
+
+#define likely(expr) __builtin_expect(!!(expr), 1)
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+
+#define barrier() asm volatile("" : : : "memory")
+
+#define __noreturn __attribute__((noreturn))
+#define __aligned(x) __attribute__((aligned(x)))
+#define __always_inline inline __attribute__((always_inline))
+#define __section(x) __attribute__((section(x)))
+#define __packed __attribute__((packed))
+#define __alias(x) __attribute__((alias(x)))
+
+#define __format_printf(fmt, args) \
+ __attribute__((format(printf, fmt, args)))
+
+#endif /* _LIB_MACROS_H */
diff --git a/lib/rbtree.c b/lib/rbtree.c
new file mode 100644
index 00000000..16718968
--- /dev/null
+++ b/lib/rbtree.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/rbtree.h>
+#include <lib/rbtree_i.h>
+#include <lib/stddef.h>
+
+/*
+ * Return the index of a node in the children array of its parent.
+ *
+ * The parent parameter must not be null, and must be the parent of the
+ * given node.
+ */
+static inline int
+rbtree_node_index(const struct rbtree_node *node,
+ const struct rbtree_node *parent)
+{
+ assert(parent != NULL);
+ assert((node == NULL) || (rbtree_node_parent(node) == parent));
+
+ if (parent->children[RBTREE_LEFT] == node)
+ return RBTREE_LEFT;
+
+ assert(parent->children[RBTREE_RIGHT] == node);
+
+ return RBTREE_RIGHT;
+}
+
+/*
+ * Return the color of a node.
+ */
+static inline int
+rbtree_node_color(const struct rbtree_node *node)
+{
+ return node->parent & RBTREE_COLOR_MASK;
+}
+
+/*
+ * Return true if the node is red.
+ */
+static inline int
+rbtree_node_is_red(const struct rbtree_node *node)
+{
+ return rbtree_node_color(node) == RBTREE_COLOR_RED;
+}
+
+/*
+ * Return true if the node is black.
+ */
+static inline int
+rbtree_node_is_black(const struct rbtree_node *node)
+{
+ return rbtree_node_color(node) == RBTREE_COLOR_BLACK;
+}
+
+/*
+ * Set the parent of a node, retaining its current color.
+ */
+static inline void
+rbtree_node_set_parent(struct rbtree_node *node, struct rbtree_node *parent)
+{
+ assert(rbtree_node_check_alignment(node));
+ assert(rbtree_node_check_alignment(parent));
+
+ node->parent = (unsigned long)parent | (node->parent & RBTREE_COLOR_MASK);
+}
+
+/*
+ * Set the color of a node, retaining its current parent.
+ */
+static inline void
+rbtree_node_set_color(struct rbtree_node *node, int color)
+{
+ assert((color & ~RBTREE_COLOR_MASK) == 0);
+ node->parent = (node->parent & RBTREE_PARENT_MASK) | color;
+}
+
+/*
+ * Set the color of a node to red, retaining its current parent.
+ */
+static inline void
+rbtree_node_set_red(struct rbtree_node *node)
+{
+ rbtree_node_set_color(node, RBTREE_COLOR_RED);
+}
+
+/*
+ * Set the color of a node to black, retaining its current parent.
+ */
+static inline void
+rbtree_node_set_black(struct rbtree_node *node)
+{
+ rbtree_node_set_color(node, RBTREE_COLOR_BLACK);
+}
+
+/*
+ * Return the left-most deepest child node of the given node.
+ */
+static struct rbtree_node *
+rbtree_node_find_deepest(struct rbtree_node *node)
+{
+ struct rbtree_node *parent;
+
+ assert(node != NULL);
+
+ for (;;) {
+ parent = node;
+ node = node->children[RBTREE_LEFT];
+
+ if (node == NULL) {
+ node = parent->children[RBTREE_RIGHT];
+
+ if (node == NULL)
+ return parent;
+ }
+ }
+}
+
+/*
+ * Perform a tree rotation, rooted at the given node.
+ *
+ * The direction parameter defines the rotation direction and is either
+ * RBTREE_LEFT or RBTREE_RIGHT.
+ */
+static void
+rbtree_rotate(struct rbtree *tree, struct rbtree_node *node, int direction)
+{
+ struct rbtree_node *parent, *rnode;
+ int left, right;
+
+ left = direction;
+ right = 1 - left;
+ parent = rbtree_node_parent(node);
+ rnode = node->children[right];
+
+ node->children[right] = rnode->children[left];
+
+ if (rnode->children[left] != NULL)
+ rbtree_node_set_parent(rnode->children[left], node);
+
+ rnode->children[left] = node;
+ rbtree_node_set_parent(rnode, parent);
+
+ if (unlikely(parent == NULL))
+ tree->root = rnode;
+ else
+ parent->children[rbtree_node_index(node, parent)] = rnode;
+
+ rbtree_node_set_parent(node, rnode);
+}
+
+void
+rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent,
+ int index, struct rbtree_node *node)
+{
+ struct rbtree_node *grand_parent, *uncle, *tmp;
+ int left, right;
+
+ assert(rbtree_node_check_alignment(parent));
+ assert(rbtree_node_check_alignment(node));
+
+ node->parent = (unsigned long)parent | RBTREE_COLOR_RED;
+ node->children[RBTREE_LEFT] = NULL;
+ node->children[RBTREE_RIGHT] = NULL;
+
+ if (unlikely(parent == NULL))
+ tree->root = node;
+ else
+ parent->children[index] = node;
+
+ for (;;) {
+ if (parent == NULL) {
+ rbtree_node_set_black(node);
+ break;
+ }
+
+ if (rbtree_node_is_black(parent))
+ break;
+
+ grand_parent = rbtree_node_parent(parent);
+ assert(grand_parent != NULL);
+
+ left = rbtree_node_index(parent, grand_parent);
+ right = 1 - left;
+
+ uncle = grand_parent->children[right];
+
+ /*
+ * Uncle is red. Flip colors and repeat at grand parent.
+ */
+ if ((uncle != NULL) && rbtree_node_is_red(uncle)) {
+ rbtree_node_set_black(uncle);
+ rbtree_node_set_black(parent);
+ rbtree_node_set_red(grand_parent);
+ node = grand_parent;
+ parent = rbtree_node_parent(node);
+ continue;
+ }
+
+ /*
+ * Node is the right child of its parent. Rotate left at parent.
+ */
+ if (parent->children[right] == node) {
+ rbtree_rotate(tree, parent, left);
+ tmp = node;
+ node = parent;
+ parent = tmp;
+ }
+
+ /*
+ * Node is the left child of its parent. Handle colors, rotate right
+ * at grand parent, and leave.
+ */
+ rbtree_node_set_black(parent);
+ rbtree_node_set_red(grand_parent);
+ rbtree_rotate(tree, grand_parent, right);
+ break;
+ }
+
+ assert(rbtree_node_is_black(tree->root));
+}
+
+void
+rbtree_remove(struct rbtree *tree, struct rbtree_node *node)
+{
+ struct rbtree_node *child, *parent, *brother;
+ int color, left, right;
+
+ if (node->children[RBTREE_LEFT] == NULL)
+ child = node->children[RBTREE_RIGHT];
+ else if (node->children[RBTREE_RIGHT] == NULL)
+ child = node->children[RBTREE_LEFT];
+ else {
+ struct rbtree_node *successor;
+
+ /*
+ * Two-children case: replace the node with its successor.
+ */
+
+ successor = node->children[RBTREE_RIGHT];
+
+ while (successor->children[RBTREE_LEFT] != NULL)
+ successor = successor->children[RBTREE_LEFT];
+
+ color = rbtree_node_color(successor);
+ child = successor->children[RBTREE_RIGHT];
+ parent = rbtree_node_parent(node);
+
+ if (unlikely(parent == NULL))
+ tree->root = successor;
+ else
+ parent->children[rbtree_node_index(node, parent)] = successor;
+
+ parent = rbtree_node_parent(successor);
+
+ /*
+ * Set parent directly to keep the original color.
+ */
+ successor->parent = node->parent;
+ successor->children[RBTREE_LEFT] = node->children[RBTREE_LEFT];
+ rbtree_node_set_parent(successor->children[RBTREE_LEFT], successor);
+
+ if (node == parent)
+ parent = successor;
+ else {
+ successor->children[RBTREE_RIGHT] = node->children[RBTREE_RIGHT];
+ rbtree_node_set_parent(successor->children[RBTREE_RIGHT],
+ successor);
+ parent->children[RBTREE_LEFT] = child;
+
+ if (child != NULL)
+ rbtree_node_set_parent(child, parent);
+ }
+
+ goto update_color;
+ }
+
+ /*
+ * Node has at most one child.
+ */
+
+ color = rbtree_node_color(node);
+ parent = rbtree_node_parent(node);
+
+ if (child != NULL)
+ rbtree_node_set_parent(child, parent);
+
+ if (unlikely(parent == NULL))
+ tree->root = child;
+ else
+ parent->children[rbtree_node_index(node, parent)] = child;
+
+ /*
+ * The node has been removed, update the colors. The child pointer can
+ * be null, in which case it is considered a black leaf.
+ */
+update_color:
+ if (color == RBTREE_COLOR_RED)
+ return;
+
+ for (;;) {
+ if ((child != NULL) && rbtree_node_is_red(child)) {
+ rbtree_node_set_black(child);
+ break;
+ }
+
+ if (parent == NULL)
+ break;
+
+ left = rbtree_node_index(child, parent);
+ right = 1 - left;
+
+ brother = parent->children[right];
+
+ /*
+ * Brother is red. Recolor and rotate left at parent so that brother
+ * becomes black.
+ */
+ if (rbtree_node_is_red(brother)) {
+ rbtree_node_set_black(brother);
+ rbtree_node_set_red(parent);
+ rbtree_rotate(tree, parent, left);
+ brother = parent->children[right];
+ }
+
+ /*
+ * Brother has no red child. Recolor and repeat at parent.
+ */
+ if (((brother->children[RBTREE_LEFT] == NULL)
+ || rbtree_node_is_black(brother->children[RBTREE_LEFT]))
+ && ((brother->children[RBTREE_RIGHT] == NULL)
+ || rbtree_node_is_black(brother->children[RBTREE_RIGHT]))) {
+ rbtree_node_set_red(brother);
+ child = parent;
+ parent = rbtree_node_parent(child);
+ continue;
+ }
+
+ /*
+ * Brother's right child is black. Recolor and rotate right at brother.
+ */
+ if ((brother->children[right] == NULL)
+ || rbtree_node_is_black(brother->children[right])) {
+ rbtree_node_set_black(brother->children[left]);
+ rbtree_node_set_red(brother);
+ rbtree_rotate(tree, brother, right);
+ brother = parent->children[right];
+ }
+
+ /*
+ * Brother's left child is black. Exchange parent and brother colors
+ * (we already know brother is black), set brother's right child black,
+ * rotate left at parent and leave.
+ */
+ rbtree_node_set_color(brother, rbtree_node_color(parent));
+ rbtree_node_set_black(parent);
+ rbtree_node_set_black(brother->children[right]);
+ rbtree_rotate(tree, parent, left);
+ break;
+ }
+
+ assert((tree->root == NULL) || rbtree_node_is_black(tree->root));
+}
+
+struct rbtree_node *
+rbtree_nearest(struct rbtree_node *parent, int index, int direction)
+{
+ assert(rbtree_check_index(direction));
+
+ if (parent == NULL)
+ return NULL;
+
+ assert(rbtree_check_index(index));
+
+ if (index != direction)
+ return parent;
+
+ return rbtree_walk(parent, direction);
+}
+
+struct rbtree_node *
+rbtree_firstlast(const struct rbtree *tree, int direction)
+{
+ struct rbtree_node *prev, *cur;
+
+ assert(rbtree_check_index(direction));
+
+ prev = NULL;
+
+ for (cur = tree->root; cur != NULL; cur = cur->children[direction])
+ prev = cur;
+
+ return prev;
+}
+
+struct rbtree_node *
+rbtree_walk(struct rbtree_node *node, int direction)
+{
+ int left, right;
+
+ assert(rbtree_check_index(direction));
+
+ left = direction;
+ right = 1 - left;
+
+ if (node == NULL)
+ return NULL;
+
+ if (node->children[left] != NULL) {
+ node = node->children[left];
+
+ while (node->children[right] != NULL)
+ node = node->children[right];
+ } else {
+ struct rbtree_node *parent;
+ int index;
+
+ for (;;) {
+ parent = rbtree_node_parent(node);
+
+ if (parent == NULL)
+ return NULL;
+
+ index = rbtree_node_index(node, parent);
+ node = parent;
+
+ if (index == right)
+ break;
+ }
+ }
+
+ return node;
+}
+
+struct rbtree_node *
+rbtree_postwalk_deepest(const struct rbtree *tree)
+{
+ struct rbtree_node *node;
+
+ node = tree->root;
+
+ if (node == NULL)
+ return NULL;
+
+ return rbtree_node_find_deepest(node);
+}
+
+struct rbtree_node *
+rbtree_postwalk_unlink(struct rbtree_node *node)
+{
+ struct rbtree_node *parent;
+ int index;
+
+ if (node == NULL)
+ return NULL;
+
+ assert(node->children[RBTREE_LEFT] == NULL);
+ assert(node->children[RBTREE_RIGHT] == NULL);
+
+ parent = rbtree_node_parent(node);
+
+ if (parent == NULL)
+ return NULL;
+
+ index = rbtree_node_index(node, parent);
+ parent->children[index] = NULL;
+ node = parent->children[RBTREE_RIGHT];
+
+ if (node == NULL)
+ return parent;
+
+ return rbtree_node_find_deepest(node);
+}
diff --git a/lib/rbtree.h b/lib/rbtree.h
new file mode 100644
index 00000000..e607fcea
--- /dev/null
+++ b/lib/rbtree.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Red-black tree.
+ */
+
+#ifndef _LIB_RBTREE_H
+#define _LIB_RBTREE_H
+
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+
+/*
+ * Indexes of the left and right nodes in the children array of a node.
+ */
+#define RBTREE_LEFT 0
+#define RBTREE_RIGHT 1
+
+/*
+ * Red-black node.
+ */
+struct rbtree_node;
+
+/*
+ * Red-black tree.
+ */
+struct rbtree;
+
+/*
+ * Static tree initializer.
+ */
+#define RBTREE_INITIALIZER { NULL }
+
+#include "rbtree_i.h"
+
+/*
+ * Initialize a tree.
+ */
+static inline void
+rbtree_init(struct rbtree *tree)
+{
+ tree->root = NULL;
+}
+
+/*
+ * Initialize a node.
+ *
+ * A node is in no tree when its parent points to itself.
+ */
+static inline void
+rbtree_node_init(struct rbtree_node *node)
+{
+ assert(rbtree_node_check_alignment(node));
+
+ node->parent = (unsigned long)node | RBTREE_COLOR_RED;
+ node->children[RBTREE_LEFT] = NULL;
+ node->children[RBTREE_RIGHT] = NULL;
+}
+
+/*
+ * Return true if node is in no tree.
+ */
+static inline int
+rbtree_node_unlinked(const struct rbtree_node *node)
+{
+ return rbtree_node_parent(node) == node;
+}
+
+/*
+ * Macro that evaluates to the address of the structure containing the
+ * given node based on the given type and member.
+ */
+#define rbtree_entry(node, type, member) structof(node, type, member)
+
+/*
+ * Return true if tree is empty.
+ */
+static inline int
+rbtree_empty(const struct rbtree *tree)
+{
+ return tree->root == NULL;
+}
+
+/*
+ * Look up a node in a tree.
+ *
+ * Note that implementing the lookup algorithm as a macro gives two benefits:
+ * First, it avoids the overhead of a callback function. Next, the type of the
+ * cmp_fn parameter isn't rigid. The only guarantee offered by this
+ * implementation is that the key parameter is the first parameter given to
+ * cmp_fn. This way, users can pass only the value they need for comparison
+ * instead of e.g. allocating a full structure on the stack.
+ *
+ * See rbtree_insert().
+ */
+#define rbtree_lookup(tree, key, cmp_fn) \
+MACRO_BEGIN \
+ struct rbtree_node *cur; \
+ int diff; \
+ \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ cur = cur->children[rbtree_d2i(diff)]; \
+ } \
+ \
+ cur; \
+MACRO_END
+
+/*
+ * Look up a node or one of its nearest nodes in a tree.
+ *
+ * This macro essentially acts as rbtree_lookup() but if no entry matched
+ * the key, an additional step is performed to obtain the next or previous
+ * node, depending on the direction (left or right).
+ *
+ * The constraints that apply to the key parameter are the same as for
+ * rbtree_lookup().
+ */
+#define rbtree_lookup_nearest(tree, key, cmp_fn, dir) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = -1; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ if (cur == NULL) \
+ cur = rbtree_nearest(prev, index, dir); \
+ \
+ cur; \
+MACRO_END
+
+/*
+ * Insert a node in a tree.
+ *
+ * This macro performs a standard lookup to obtain the insertion point of
+ * the given node in the tree (it is assumed that the inserted node never
+ * compares equal to any other entry in the tree) and links the node. It
+ * then checks red-black rules violations, and rebalances the tree if
+ * necessary.
+ *
+ * Unlike rbtree_lookup(), the cmp_fn parameter must compare two complete
+ * entries, so it is suggested to use two different comparison inline
+ * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no
+ * guarantee about the order of the nodes given to the comparison function.
+ *
+ * See rbtree_lookup().
+ */
+#define rbtree_insert(tree, node, cmp_fn) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = -1; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(node, cur); \
+ assert(diff != 0); \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ rbtree_insert_rebalance(tree, prev, index, node); \
+MACRO_END
+
+/*
+ * Look up a node/slot pair in a tree.
+ *
+ * This macro essentially acts as rbtree_lookup() but in addition to a node,
+ * it also returns a slot, which identifies an insertion point in the tree.
+ * If the returned node is null, the slot can be used by rbtree_insert_slot()
+ * to insert without the overhead of an additional lookup. The slot is a
+ * simple unsigned long integer.
+ *
+ * The constraints that apply to the key parameter are the same as for
+ * rbtree_lookup().
+ */
+#define rbtree_lookup_slot(tree, key, cmp_fn, slot) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = 0; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ (slot) = rbtree_slot(prev, index); \
+ cur; \
+MACRO_END
+
+/*
+ * Insert a node at an insertion point in a tree.
+ *
+ * This macro essentially acts as rbtree_insert() except that it doesn't
+ * obtain the insertion point with a standard lookup. The insertion point
+ * is obtained by calling rbtree_lookup_slot(). In addition, the new node
+ * must not compare equal to an existing node in the tree (i.e. the slot
+ * must denote a null node).
+ */
+#define rbtree_insert_slot(tree, slot, node) \
+MACRO_BEGIN \
+ struct rbtree_node *parent; \
+ int index; \
+ \
+ parent = rbtree_slot_parent(slot); \
+ index = rbtree_slot_index(slot); \
+ rbtree_insert_rebalance(tree, parent, index, node); \
+MACRO_END
+
+/*
+ * Remove a node from a tree.
+ *
+ * After completion, the node is stale.
+ */
+void rbtree_remove(struct rbtree *tree, struct rbtree_node *node);
+
+/*
+ * Return the first node of a tree.
+ */
+#define rbtree_first(tree) rbtree_firstlast(tree, RBTREE_LEFT)
+
+/*
+ * Return the last node of a tree.
+ */
+#define rbtree_last(tree) rbtree_firstlast(tree, RBTREE_RIGHT)
+
+/*
+ * Return the node previous to the given node.
+ */
+#define rbtree_prev(node) rbtree_walk(node, RBTREE_LEFT)
+
+/*
+ * Return the node next to the given node.
+ */
+#define rbtree_next(node) rbtree_walk(node, RBTREE_RIGHT)
+
+/*
+ * Forge a loop to process all nodes of a tree, removing them when visited.
+ *
+ * This macro can only be used to destroy a tree, so that the resources used
+ * by the entries can be released by the user. It basically removes all nodes
+ * without doing any color checking.
+ *
+ * After completion, all nodes and the tree root member are stale.
+ */
+#define rbtree_for_each_remove(tree, node, tmp) \
+for (node = rbtree_postwalk_deepest(tree), \
+ tmp = rbtree_postwalk_unlink(node); \
+ node != NULL; \
+ node = tmp, tmp = rbtree_postwalk_unlink(node)) \
+
+#endif /* _LIB_RBTREE_H */
diff --git a/lib/rbtree_i.h b/lib/rbtree_i.h
new file mode 100644
index 00000000..1f85c1a9
--- /dev/null
+++ b/lib/rbtree_i.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_RBTREE_I_H
+#define _LIB_RBTREE_I_H
+
+#include <lib/assert.h>
+#include <lib/macros.h>
+#include <lib/stddef.h>
+
+/*
+ * Red-black node structure.
+ *
+ * To reduce the number of branches and the instruction cache footprint,
+ * the left and right child pointers are stored in an array, and the symmetry
+ * of most tree operations is exploited by using left/right variables when
+ * referring to children.
+ *
+ * In addition, this implementation assumes that all nodes are 4-byte aligned,
+ * so that the least significant bit of the parent member can be used to store
+ * the color of the node. This is true for all modern 32 and 64 bits
+ * architectures, as long as the nodes aren't embedded in structures with
+ * special alignment constraints such as member packing.
+ */
+struct rbtree_node {
+ unsigned long parent;
+ struct rbtree_node *children[2];
+};
+
+/*
+ * Red-black tree structure.
+ */
+struct rbtree {
+ struct rbtree_node *root;
+};
+
+/*
+ * Masks applied on the parent member of a node to obtain either the
+ * color or the parent address.
+ */
+#define RBTREE_COLOR_MASK 0x1UL
+#define RBTREE_PARENT_MASK (~0x3UL)
+
+/*
+ * Node colors.
+ */
+#define RBTREE_COLOR_RED 0
+#define RBTREE_COLOR_BLACK 1
+
+/*
+ * Masks applied on slots to obtain either the child index or the parent
+ * address.
+ */
+#define RBTREE_SLOT_INDEX_MASK 0x1UL
+#define RBTREE_SLOT_PARENT_MASK (~RBTREE_SLOT_INDEX_MASK)
+
+/*
+ * Return true if the given index is a valid child index.
+ */
+static inline int
+rbtree_check_index(int index)
+{
+ return index == (index & 1);
+}
+
+/*
+ * Convert the result of a comparison into an index in the children array
+ * (0 or 1).
+ *
+ * This function is mostly used when looking up a node.
+ */
+static inline int
+rbtree_d2i(int diff)
+{
+ return !(diff <= 0);
+}
+
+/*
+ * Return true if the given pointer is suitably aligned.
+ */
+static inline int
+rbtree_node_check_alignment(const struct rbtree_node *node)
+{
+ return ((unsigned long)node & (~RBTREE_PARENT_MASK)) == 0;
+}
+
+/*
+ * Return the parent of a node.
+ */
+static inline struct rbtree_node *
+rbtree_node_parent(const struct rbtree_node *node)
+{
+ return (struct rbtree_node *)(node->parent & RBTREE_PARENT_MASK);
+}
+
+/*
+ * Translate an insertion point into a slot.
+ */
+static inline unsigned long
+rbtree_slot(struct rbtree_node *parent, int index)
+{
+ assert(rbtree_node_check_alignment(parent));
+ assert(rbtree_check_index(index));
+ return (unsigned long)parent | index;
+}
+
+/*
+ * Extract the parent address from a slot.
+ */
+static inline struct rbtree_node *
+rbtree_slot_parent(unsigned long slot)
+{
+ return (struct rbtree_node *)(slot & RBTREE_SLOT_PARENT_MASK);
+}
+
+/*
+ * Extract the index from a slot.
+ */
+static inline int
+rbtree_slot_index(unsigned long slot)
+{
+ return slot & RBTREE_SLOT_INDEX_MASK;
+}
+
+/*
+ * Insert a node in a tree, rebalancing it if necessary.
+ *
+ * The index parameter is the index in the children array of the parent where
+ * the new node is to be inserted. It is ignored if the parent is null.
+ *
+ * This function is intended to be used by the rbtree_insert() macro only.
+ */
+void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent,
+ int index, struct rbtree_node *node);
+
+/*
+ * Return the previous or next node relative to a location in a tree.
+ *
+ * The parent and index parameters define the location, which can be empty.
+ * The direction parameter is either RBTREE_LEFT (to obtain the previous
+ * node) or RBTREE_RIGHT (to obtain the next one).
+ */
+struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index,
+ int direction);
+
+/*
+ * Return the first or last node of a tree.
+ *
+ * The direction parameter is either RBTREE_LEFT (to obtain the first node)
+ * or RBTREE_RIGHT (to obtain the last one).
+ */
+struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction);
+
+/*
+ * Return the node next to, or previous to the given node.
+ *
+ * The direction parameter is either RBTREE_LEFT (to obtain the previous node)
+ * or RBTREE_RIGHT (to obtain the next one).
+ */
+struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction);
+
+/*
+ * Return the left-most deepest node of a tree, which is the starting point of
+ * the postorder traversal performed by rbtree_for_each_remove().
+ */
+struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree);
+
+/*
+ * Unlink a node from its tree and return the next (right) node in postorder.
+ */
+struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node);
+
+#endif /* _LIB_RBTREE_I_H */
diff --git a/lib/sprintf.c b/lib/sprintf.c
new file mode 100644
index 00000000..7cf1e136
--- /dev/null
+++ b/lib/sprintf.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdarg.h>
+
+#include <lib/limits.h>
+#include <lib/sprintf.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+
+/*
+ * Formatting flags.
+ *
+ * FORMAT_LOWER must be 0x20 as it is OR'd with digits, eg.
+ * '0': 0x30 | 0x20 => 0x30 ('0')
+ * 'A': 0x41 | 0x20 => 0x61 ('a')
+ */
+#define SPRINTF_FORMAT_ALT_FORM 0x01
+#define SPRINTF_FORMAT_ZERO_PAD 0x02
+#define SPRINTF_FORMAT_LEFT_JUSTIFY 0x04
+#define SPRINTF_FORMAT_BLANK 0x08
+#define SPRINTF_FORMAT_SIGN 0x10
+#define SPRINTF_FORMAT_LOWER 0x20
+#define SPRINTF_FORMAT_CONV_SIGNED 0x40
+
+enum {
+ SPRINTF_MODIFIER_NONE,
+ SPRINTF_MODIFIER_CHAR,
+ SPRINTF_MODIFIER_SHORT,
+ SPRINTF_MODIFIER_LONG,
+ SPRINTF_MODIFIER_LONGLONG,
+ SPRINTF_MODIFIER_PTR, /* Used only for %p */
+ SPRINTF_MODIFIER_SIZE,
+ SPRINTF_MODIFIER_PTRDIFF
+};
+
+enum {
+ SPRINTF_SPECIFIER_INVALID,
+ SPRINTF_SPECIFIER_INT,
+ SPRINTF_SPECIFIER_CHAR,
+ SPRINTF_SPECIFIER_STR,
+ SPRINTF_SPECIFIER_NRCHARS,
+ SPRINTF_SPECIFIER_PERCENT
+};
+
+/*
+ * Size for the temporary number buffer. The minimum base is 8 so 3 bits
+ * are consumed per digit. Add one to round up. The conversion algorithm
+ * doesn't use the null byte.
+ */
+#define SPRINTF_MAX_NUM_SIZE (((sizeof(uint64_t) * CHAR_BIT) / 3) + 1)
+
+/*
+ * Special size for vsnprintf(), used by sprintf()/vsprintf() when the
+ * buffer size is unknown.
+ */
+#define SPRINTF_NOLIMIT ((size_t)-1)
+
+static const char sprintf_digits[] = "0123456789ABCDEF";
+
+static inline char *
+sprintf_putchar(char *str, char *end, char c)
+{
+ if (str < end)
+ *str = c;
+
+ str++;
+
+ return str;
+}
+
+static inline int
+sprintf_isdigit(char c)
+{
+ return (c >= '0') && (c <= '9');
+}
+
+int
+sprintf(char *str, const char *format, ...)
+{
+ va_list ap;
+ int length;
+
+ va_start(ap, format);
+ length = vsprintf(str, format, ap);
+ va_end(ap);
+
+ return length;
+}
+
+int
+vsprintf(char *str, const char *format, va_list ap)
+{
+ return vsnprintf(str, SPRINTF_NOLIMIT, format, ap);
+}
+
+int
+snprintf(char *str, size_t size, const char *format, ...)
+{
+ va_list ap;
+ int length;
+
+ va_start(ap, format);
+ length = vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ return length;
+}
+
+int
+vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+ unsigned long long n;
+ int i, len, found, flags, width, precision, modifier, specifier, shift;
+ unsigned char r, base, mask;
+ char c, *s, *start, *end, sign, tmp[SPRINTF_MAX_NUM_SIZE];
+
+ start = str;
+
+ if (size == 0)
+ end = NULL;
+ else if (size == SPRINTF_NOLIMIT)
+ end = (char *)-1;
+ else
+ end = start + size - 1;
+
+ while ((c = *format) != '\0') {
+ if (c != '%') {
+ str = sprintf_putchar(str, end, c);
+ format++;
+ continue;
+ }
+
+ /* Flags */
+
+ found = 1;
+ flags = 0;
+
+ do {
+ format++;
+ c = *format;
+
+ switch (c) {
+ case '#':
+ flags |= SPRINTF_FORMAT_ALT_FORM;
+ break;
+ case '0':
+ flags |= SPRINTF_FORMAT_ZERO_PAD;
+ break;
+ case '-':
+ flags |= SPRINTF_FORMAT_LEFT_JUSTIFY;
+ break;
+ case ' ':
+ flags |= SPRINTF_FORMAT_BLANK;
+ break;
+ case '+':
+ flags |= SPRINTF_FORMAT_SIGN;
+ break;
+ default:
+ found = 0;
+ break;
+ }
+ } while (found);
+
+ /* Width */
+
+ if (sprintf_isdigit(c)) {
+ width = 0;
+
+ while (sprintf_isdigit(c)) {
+ width = width * 10 + (c - '0');
+ format++;
+ c = *format;
+ }
+ } else if (c == '*') {
+ width = va_arg(ap, int);
+
+ if (width < 0) {
+ flags |= SPRINTF_FORMAT_LEFT_JUSTIFY;
+ width = -width;
+ }
+
+ format++;
+ c = *format;
+ } else {
+ width = 0;
+ }
+
+ /* Precision */
+
+ if (c == '.') {
+ format++;
+ c = *format;
+
+ if (sprintf_isdigit(c)) {
+ precision = 0;
+
+ while (sprintf_isdigit(c)) {
+ precision = precision * 10 + (c - '0');
+ format++;
+ c = *format;
+ }
+ } else if (c == '*') {
+ precision = va_arg(ap, int);
+
+ if (precision < 0)
+ precision = 0;
+
+ format++;
+ c = *format;
+ } else {
+ precision = 0;
+ }
+ } else {
+ /* precision is >= 0 only if explicit */
+ precision = -1;
+ }
+
+ /* Length modifier */
+
+ switch (c) {
+ case 'h':
+ case 'l':
+ format++;
+
+ if (c == *format) {
+ modifier = (c == 'h')
+ ? SPRINTF_MODIFIER_CHAR
+ : SPRINTF_MODIFIER_LONGLONG;
+ goto skip_modifier;
+ } else {
+ modifier = (c == 'h')
+ ? SPRINTF_MODIFIER_SHORT
+ : SPRINTF_MODIFIER_LONG;
+ c = *format;
+ }
+
+ break;
+ case 'z':
+ modifier = SPRINTF_MODIFIER_SIZE;
+ goto skip_modifier;
+ case 't':
+ modifier = SPRINTF_MODIFIER_PTRDIFF;
+skip_modifier:
+ format++;
+ c = *format;
+ break;
+ default:
+ modifier = SPRINTF_MODIFIER_NONE;
+ break;
+ }
+
+ /* Specifier */
+
+ switch (c) {
+ case 'd':
+ case 'i':
+ flags |= SPRINTF_FORMAT_CONV_SIGNED;
+ case 'u':
+ base = 10;
+ goto integer;
+ case 'o':
+ base = 8;
+ goto integer;
+ case 'p':
+ flags |= SPRINTF_FORMAT_ALT_FORM;
+ modifier = SPRINTF_MODIFIER_PTR;
+ case 'x':
+ flags |= SPRINTF_FORMAT_LOWER;
+ case 'X':
+ base = 16;
+integer:
+ specifier = SPRINTF_SPECIFIER_INT;
+ break;
+ case 'c':
+ specifier = SPRINTF_SPECIFIER_CHAR;
+ break;
+ case 's':
+ specifier = SPRINTF_SPECIFIER_STR;
+ break;
+ case 'n':
+ specifier = SPRINTF_SPECIFIER_NRCHARS;
+ break;
+ case '%':
+ specifier = SPRINTF_SPECIFIER_PERCENT;
+ break;
+ default:
+ specifier = SPRINTF_SPECIFIER_INVALID;
+ break;
+ }
+
+ /* Output */
+
+ switch (specifier) {
+ case SPRINTF_SPECIFIER_INT:
+ switch (modifier) {
+ case SPRINTF_MODIFIER_CHAR:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = (signed char)va_arg(ap, int);
+ else
+ n = (unsigned char)va_arg(ap, int);
+ break;
+ case SPRINTF_MODIFIER_SHORT:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = (short)va_arg(ap, int);
+ else
+ n = (unsigned short)va_arg(ap, int);
+ break;
+ case SPRINTF_MODIFIER_LONG:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = va_arg(ap, long);
+ else
+ n = va_arg(ap, unsigned long);
+ break;
+ case SPRINTF_MODIFIER_LONGLONG:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = va_arg(ap, long long);
+ else
+ n = va_arg(ap, unsigned long long);
+ break;
+ case SPRINTF_MODIFIER_PTR:
+ n = (unsigned long)va_arg(ap, void *);
+ break;
+ case SPRINTF_MODIFIER_SIZE:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = va_arg(ap, ssize_t);
+ else
+ n = va_arg(ap, size_t);
+ break;
+ case SPRINTF_MODIFIER_PTRDIFF:
+ n = va_arg(ap, ptrdiff_t);
+ break;
+ default:
+ if (flags & SPRINTF_FORMAT_CONV_SIGNED)
+ n = va_arg(ap, int);
+ else
+ n = va_arg(ap, unsigned int);
+ break;
+ }
+
+ if ((flags & SPRINTF_FORMAT_LEFT_JUSTIFY) || (precision >= 0))
+ flags &= ~SPRINTF_FORMAT_ZERO_PAD;
+
+ sign = 0;
+
+ if (flags & SPRINTF_FORMAT_ALT_FORM) {
+ /* '0' for octal */
+ width--;
+
+ /* '0x' or '0X' for hexadecimal */
+ if (base == 16)
+ width--;
+ } else if (flags & SPRINTF_FORMAT_CONV_SIGNED) {
+ if ((long long)n < 0) {
+ sign = '-';
+ width--;
+ n = -(long long)n;
+ } else if (flags & SPRINTF_FORMAT_SIGN) {
+ /* SPRINTF_FORMAT_SIGN must precede SPRINTF_FORMAT_BLANK. */
+ sign = '+';
+ width--;
+ } else if (flags & SPRINTF_FORMAT_BLANK) {
+ sign = ' ';
+ width--;
+ }
+ }
+
+ /* Conversion, in reverse order */
+
+ i = 0;
+
+ if (n == 0) {
+ if (precision != 0)
+ tmp[i++] = '0';
+ } else if (base == 10) {
+ /*
+ * Try to avoid 64 bits operations if the processor doesn't
+ * support them. Note that even when using modulus and
+ * division operators close to each other, the compiler will
+ * forge two calls to __udivdi3() and __umoddi3() instead of
+ * one to __udivmoddi3(), whereas processor instructions are
+ * generally correctly used once, giving both the remainder
+ * and the quotient, through plain or reciprocal division.
+ */
+#ifndef __LP64__
+ if (modifier == SPRINTF_MODIFIER_LONGLONG) {
+#endif /* __LP64__ */
+ do {
+ r = n % 10;
+ n /= 10;
+ tmp[i++] = sprintf_digits[r];
+ } while (n != 0);
+#ifndef __LP64__
+ } else {
+ unsigned long m;
+
+ m = (unsigned long)n;
+
+ do {
+ r = m % 10;
+ m /= 10;
+ tmp[i++] = sprintf_digits[r];
+ } while (m != 0);
+ }
+#endif /* __LP64__ */
+ } else {
+ mask = base - 1;
+ shift = (base == 8) ? 3 : 4;
+
+ do {
+ r = (unsigned char)n & mask;
+ n >>= shift;
+ tmp[i++] = sprintf_digits[r]
+ | (flags & SPRINTF_FORMAT_LOWER);
+ } while (n != 0);
+ }
+
+ if (i > precision)
+ precision = i;
+
+ width -= precision;
+
+ if (!(flags & (SPRINTF_FORMAT_LEFT_JUSTIFY
+ | SPRINTF_FORMAT_ZERO_PAD)))
+ while (width-- > 0)
+ str = sprintf_putchar(str, end, ' ');
+
+ if (flags & SPRINTF_FORMAT_ALT_FORM) {
+ str = sprintf_putchar(str, end, '0');
+
+ if (base == 16)
+ str = sprintf_putchar(str, end,
+ 'X' | (flags & SPRINTF_FORMAT_LOWER));
+ } else if (sign) {
+ str = sprintf_putchar(str, end, sign);
+ }
+
+ if (!(flags & SPRINTF_FORMAT_LEFT_JUSTIFY)) {
+ c = (flags & SPRINTF_FORMAT_ZERO_PAD) ? '0' : ' ';
+
+ while (width-- > 0)
+ str = sprintf_putchar(str, end, c);
+ }
+
+ while (i < precision--)
+ str = sprintf_putchar(str, end, '0');
+
+ while (i-- > 0)
+ str = sprintf_putchar(str, end, tmp[i]);
+
+ while (width-- > 0)
+ str = sprintf_putchar(str, end, ' ');
+
+ break;
+ case SPRINTF_SPECIFIER_CHAR:
+ c = (unsigned char)va_arg(ap, int);
+
+ if (!(flags & SPRINTF_FORMAT_LEFT_JUSTIFY))
+ while (--width > 0)
+ str = sprintf_putchar(str, end, ' ');
+
+ str = sprintf_putchar(str, end, c);
+
+ while (--width > 0)
+ str = sprintf_putchar(str, end, ' ');
+
+ break;
+ case SPRINTF_SPECIFIER_STR:
+ s = va_arg(ap, char *);
+
+ if (s == NULL)
+ s = "(null)";
+
+ len = 0;
+
+ for (len = 0; s[len] != '\0'; len++)
+ if (len == precision)
+ break;
+
+ if (!(flags & SPRINTF_FORMAT_LEFT_JUSTIFY))
+ while (len < width--)
+ str = sprintf_putchar(str, end, ' ');
+
+ for (i = 0; i < len; i++) {
+ str = sprintf_putchar(str, end, *s);
+ s++;
+ }
+
+ while (len < width--)
+ str = sprintf_putchar(str, end, ' ');
+
+ break;
+ case SPRINTF_SPECIFIER_NRCHARS:
+ if (modifier == SPRINTF_MODIFIER_CHAR) {
+ signed char *ptr = va_arg(ap, signed char *);
+ *ptr = str - start;
+ } else if (modifier == SPRINTF_MODIFIER_SHORT) {
+ short *ptr = va_arg(ap, short *);
+ *ptr = str - start;
+ } else if (modifier == SPRINTF_MODIFIER_LONG) {
+ long *ptr = va_arg(ap, long *);
+ *ptr = str - start;
+ } else if (modifier == SPRINTF_MODIFIER_LONGLONG) {
+ long long *ptr = va_arg(ap, long long *);
+ *ptr = str - start;
+ } else if (modifier == SPRINTF_MODIFIER_SIZE) {
+ ssize_t *ptr = va_arg(ap, ssize_t *);
+ *ptr = str - start;
+ } else if (modifier == SPRINTF_MODIFIER_PTRDIFF) {
+ ptrdiff_t *ptr = va_arg(ap, ptrdiff_t *);
+ *ptr = str - start;
+ } else {
+ int *ptr = va_arg(ap, int *);
+ *ptr = str - start;
+ }
+
+ break;
+ case SPRINTF_SPECIFIER_PERCENT:
+ case SPRINTF_SPECIFIER_INVALID:
+ str = sprintf_putchar(str, end, '%');
+ break;
+ default:
+ break;
+ }
+
+ if (specifier != SPRINTF_SPECIFIER_INVALID)
+ format++;
+ }
+
+ if (str < end)
+ *str = '\0';
+ else if (end != NULL)
+ *end = '\0';
+
+ return str - start;
+}
diff --git a/lib/sprintf.h b/lib/sprintf.h
new file mode 100644
index 00000000..af9e841b
--- /dev/null
+++ b/lib/sprintf.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Formatted string functions.
+ *
+ * The functions provided by this module implement a subset of the C99
+ * sprintf() like functions, mostly centered around character, string, and
+ * integer conversions.
+ *
+ * The supported specifiers are: d i o u x X c s p n %
+ * The supported length modifiers are: hh h l ll z t
+ */
+
+#ifndef _LIB_SPRINTF_H
+#define _LIB_SPRINTF_H
+
+#include <stdarg.h>
+
+#include <lib/macros.h>
+
+int sprintf(char *str, const char *format, ...) __format_printf(2, 3);
+int vsprintf(char *str, const char *format, va_list ap) __format_printf(2, 0);
+
+int snprintf(char *str, size_t size, const char *format, ...)
+ __format_printf(3, 4);
+int vsnprintf(char *str, size_t size, const char *format, va_list ap)
+ __format_printf(3, 0);
+
+#endif /* _LIB_SPRINTF_H */
diff --git a/lib/stddef.h b/lib/stddef.h
new file mode 100644
index 00000000..2cbb7a9e
--- /dev/null
+++ b/lib/stddef.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_STDDEF_H
+#define _LIB_STDDEF_H
+
+#define NULL ((void *)0)
+
+#define offsetof(type, member) __builtin_offsetof(type, member)
+
+#ifdef __LP64__
+typedef unsigned long size_t;
+typedef long ssize_t;
+typedef long ptrdiff_t;
+#else /* __LP64__ */
+typedef unsigned int size_t;
+typedef int ssize_t;
+typedef int ptrdiff_t;
+#endif /* __LP64__ */
+
+#endif /* _LIB_STDDEF_H */
diff --git a/lib/stdint.h b/lib/stdint.h
new file mode 100644
index 00000000..ee531db3
--- /dev/null
+++ b/lib/stdint.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_STDINT_H
+#define _LIB_STDINT_H
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+
+#ifdef __LP64__
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed long int64_t;
+typedef unsigned long uint64_t;
+#else /* __LP64__ */
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+#endif /* __LP64__ */
+
+#endif /* _LIB_STDINT_H */
diff --git a/lib/string.c b/lib/string.c
new file mode 100644
index 00000000..f0ed626c
--- /dev/null
+++ b/lib/string.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Trivial, portable implementations.
+ */
+
+#include <lib/stddef.h>
+#include <lib/string.h>
+
+void *
+memcpy(void *dest, const void *src, size_t n)
+{
+ const char *src_ptr;
+ char *dest_ptr;
+ size_t i;
+
+ dest_ptr = dest;
+ src_ptr = src;
+
+ for (i = 0; i < n; i++)
+ *dest_ptr++ = *src_ptr++;
+
+ return dest;
+}
+
+void *
+memmove(void *dest, const void *src, size_t n)
+{
+ const char *src_ptr;
+ char *dest_ptr;
+ size_t i;
+
+ if (src == dest)
+ return dest;
+ else if (src < dest) {
+ dest_ptr = (char *)dest + n - 1;
+ src_ptr = (const char *)src + n - 1;
+
+ for (i = 0; i < n; i++)
+ *dest_ptr-- = *src_ptr--;
+ } else if (src > dest) {
+ dest_ptr = dest;
+ src_ptr = src;
+
+ for (i = 0; i < n; i++)
+ *dest_ptr++ = *src_ptr++;
+ }
+
+ return dest;
+}
+
+void *
+memset(void *s, int c, size_t n)
+{
+ char *buffer;
+ size_t i;
+
+ buffer = s;
+
+ for (i = 0; i < n; i++)
+ buffer[i] = c;
+
+ return s;
+}
+
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ const char *a1, *a2;
+ size_t i;
+
+ a1 = s1;
+ a2 = s2;
+
+ for (i = 0; i < n; i++)
+ if (a1[i] != a2[i])
+ return a2[i] - a1[i];
+
+ return 0;
+}
+
+size_t
+strlen(const char *s)
+{
+ size_t i;
+
+ i = 0;
+
+ while (*s++ != '\0')
+ i++;
+
+ return i;
+}
+
+char *
+strcpy(char *dest, const char *src)
+{
+ char *tmp;
+
+ tmp = dest;
+
+ while ((*dest = *src) != '\0') {
+ dest++;
+ src++;
+ }
+
+ return tmp;
+}
+
+int
+strcmp(const char *s1, const char *s2)
+{
+ char c1, c2;
+
+ while ((c1 = *s1) == (c2 = *s2)) {
+ if (c1 == '\0')
+ return 0;
+
+ s1++;
+ s2++;
+ }
+
+ return (c1 < c2) ? -1 : 1;
+}
diff --git a/lib/string.h b/lib/string.h
new file mode 100644
index 00000000..936892bd
--- /dev/null
+++ b/lib/string.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LIB_STRING_H
+#define _LIB_STRING_H
+
+#include <lib/stddef.h>
+
+void * memcpy(void *dest, const void *src, size_t n);
+void * memmove(void *dest, const void *src, size_t n);
+void * memset(void *s, int c, size_t n);
+int memcmp(const void *s1, const void *s2, size_t n);
+size_t strlen(const char *s);
+char * strcpy(char *dest, const char *src);
+int strcmp(const char *s1, const char *s2);
+
+#endif /* _LIB_STRING_H */
diff --git a/vm/vm_inherit.h b/vm/vm_inherit.h
new file mode 100644
index 00000000..b50d490d
--- /dev/null
+++ b/vm/vm_inherit.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_INHERIT_H
+#define _VM_VM_INHERIT_H
+
+/*
+ * Inheritance flags.
+ */
+#define VM_INHERIT_SHARE 0
+#define VM_INHERIT_COPY 1
+#define VM_INHERIT_NONE 2
+#define VM_INHERIT_DEFAULT VM_INHERIT_COPY
+
+#endif /* _VM_VM_INHERIT_H */
diff --git a/vm/vm_kmem.c b/vm/vm_kmem.c
new file mode 100644
index 00000000..c9fd4027
--- /dev/null
+++ b/vm/vm_kmem.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/stddef.h>
+#include <machine/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Kernel map and storage.
+ */
+static struct vm_map kernel_map_store;
+struct vm_map *kernel_map = &kernel_map_store;
+
+/*
+ * Heap boundaries during bootstrap.
+ */
+static unsigned long vm_kmem_boot_start __initdata;
+static unsigned long vm_kmem_boot_end __initdata;
+
+void __init
+vm_kmem_setup(void)
+{
+ pmap_virtual_space(&vm_kmem_boot_start, &vm_kmem_boot_end);
+ assert(vm_page_aligned(vm_kmem_boot_start));
+ assert(vm_page_aligned(vm_kmem_boot_end));
+}
+
+unsigned long __init
+vm_kmem_bootalloc(size_t size)
+{
+ unsigned long start, va;
+ vm_phys_t pa;
+
+ assert(size > 0);
+
+ size = vm_page_round(size);
+
+ if ((vm_kmem_boot_end - vm_kmem_boot_start) < size)
+ panic("vm_kmem: no virtual space available");
+
+ start = vm_kmem_boot_start;
+ vm_kmem_boot_start += size;
+
+ if (pmap_klimit < vm_kmem_boot_start)
+ pmap_growkernel(vm_kmem_boot_start);
+
+ for (va = start; va < vm_kmem_boot_start; va += PAGE_SIZE) {
+ pa = vm_phys_bootalloc();
+ pmap_kenter(va, pa);
+ }
+
+ return start;
+}
+
+void __init
+vm_kmem_boot_space(unsigned long *start, unsigned long *end)
+{
+ *start = VM_MIN_KERNEL_ADDRESS;
+ *end = vm_kmem_boot_start;
+}
+
+static int
+vm_kmem_alloc_check(size_t size)
+{
+ if (size == 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+vm_kmem_free_check(unsigned long addr, size_t size)
+{
+ if (!vm_page_aligned(addr))
+ return -1;
+
+ return vm_kmem_alloc_check(size);
+}
+
+static unsigned long
+vm_kmem_alloc_va(size_t size)
+{
+ unsigned long va;
+ int error, flags;
+
+ size = vm_page_round(size);
+
+ va = 0;
+ flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+ | VM_MAP_ADVISE_NORMAL;
+ error = vm_map_enter(kernel_map, NULL, 0, &va, size, 0, flags);
+
+ if (error)
+ return 0;
+
+ return va;
+}
+
+static void
+vm_kmem_free_va(unsigned long addr, size_t size)
+{
+ unsigned long end;
+
+ end = addr + vm_page_round(size);
+ pmap_kremove(addr, end);
+ vm_map_remove(kernel_map, addr, end);
+}
+
+unsigned long
+vm_kmem_alloc(size_t size)
+{
+ struct vm_page *page;
+ unsigned long va, start, end;
+
+ assert(vm_kmem_alloc_check(size) == 0);
+
+ va = vm_kmem_alloc_va(size);
+
+ if (va == 0)
+ return 0;
+
+ for (start = va, end = va + size; start < end; start += PAGE_SIZE) {
+ page = vm_phys_alloc(0);
+
+ if (page == NULL)
+ goto error_page;
+
+ pmap_kenter(start, vm_page_to_pa(page));
+ }
+
+ return va;
+
+error_page:
+ vm_kmem_free(va, size);
+ return 0;
+}
+
+void
+vm_kmem_free(unsigned long addr, size_t size)
+{
+ struct vm_page *page;
+ unsigned long va, end;
+ vm_phys_t pa;
+
+ assert(vm_kmem_free_check(addr, size) == 0);
+
+ size = vm_page_round(size);
+ end = addr + size;
+
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ pa = pmap_kextract(va);
+
+ if (pa == 0)
+ continue;
+
+ page = vm_phys_lookup_page(pa);
+ assert(page != NULL);
+ vm_phys_free(page, 0);
+ }
+
+ vm_kmem_free_va(addr, size);
+}
+
+void *
+vm_kmem_map_pa(vm_phys_t addr, size_t size, unsigned long *map_addrp,
+ size_t *map_sizep)
+{
+ unsigned long offset, map_addr;
+ size_t map_size;
+ vm_phys_t start;
+
+ assert(vm_kmem_alloc_check(size) == 0);
+
+ start = vm_page_trunc(addr);
+ map_size = vm_page_round(addr + size) - start;
+ map_addr = vm_kmem_alloc_va(map_size);
+
+ if (map_addr == 0)
+ return NULL;
+
+ for (offset = 0; offset < map_size; offset += PAGE_SIZE)
+ pmap_kenter(map_addr + offset, start + offset);
+
+ if (map_addrp != NULL)
+ *map_addrp = map_addr;
+
+ if (map_sizep != NULL)
+ *map_sizep = map_size;
+
+ return (void *)(map_addr + (unsigned long)(addr & PAGE_MASK));
+}
+
+void
+vm_kmem_unmap_pa(unsigned long map_addr, size_t map_size)
+{
+ assert(vm_kmem_free_check(map_addr, map_size) == 0);
+ vm_kmem_free_va(map_addr, map_size);
+}
diff --git a/vm/vm_kmem.h b/vm/vm_kmem.h
new file mode 100644
index 00000000..e23ab9bd
--- /dev/null
+++ b/vm/vm_kmem.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_KMEM_H
+#define _VM_VM_KMEM_H
+
+#include <kern/types.h>
+
+/*
+ * Special kernel addresses.
+ */
+extern char _text;
+extern char _rodata;
+extern char _data;
+extern char _bss;
+extern char _end;
+
+/*
+ * The kernel map.
+ */
+extern struct vm_map *kernel_map;
+
+/*
+ * Initialize the vm_kmem module.
+ */
+void vm_kmem_setup(void);
+
+/*
+ * Early kernel memory allocator.
+ *
+ * The main purpose of this function is to allow the allocation of the
+ * physical page table.
+ */
+unsigned long vm_kmem_bootalloc(size_t size);
+
+/*
+ * Return the range of initial virtual memory used by the kernel.
+ */
+void vm_kmem_boot_space(unsigned long *start, unsigned long *end);
+
+/*
+ * Allocate memory from the kernel map.
+ */
+unsigned long vm_kmem_alloc(size_t size);
+
+/*
+ * Release memory back to the kernel map.
+ */
+void vm_kmem_free(unsigned long addr, size_t size);
+
+/*
+ * Map physical memory in a kernel map.
+ *
+ * Return the address at which the mapped memory can be accessed. If map_addrp
+ * and/or map_sizep aren't NULL, they are updated to the address and size of
+ * the mapping created.
+ *
+ * This is a convenience function for modules that must map random regions of
+ * physical memory, and as such, it doesn't expect a page-aligned input range.
+ *
+ * TODO When mapping attributes are implemented, make this function disable
+ * caching on the mapping.
+ */
+void * vm_kmem_map_pa(vm_phys_t addr, size_t size, unsigned long *map_addrp,
+ size_t *map_sizep);
+
+/*
+ * Unmap physical memory from a kernel map.
+ */
+void vm_kmem_unmap_pa(unsigned long map_addr, size_t map_size);
+
+#endif /* _VM_VM_KMEM_H */
diff --git a/vm/vm_map.c b/vm/vm_map.c
new file mode 100644
index 00000000..4030cdc6
--- /dev/null
+++ b/vm/vm_map.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * XXX This module is far from complete. It just provides the basic support
+ * needed for kernel allocation.
+ */
+
+#include <kern/error.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <lib/assert.h>
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <lib/rbtree.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <machine/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Special threshold which disables the use of the free area cache address.
+ */
+#define VM_MAP_NO_FIND_CACHE (~(size_t)0)
+
+/*
+ * Mapping request.
+ *
+ * Most members are input parameters from a call to e.g. vm_map_enter(). The
+ * start member is also an output argument. The next member is used internally
+ * by the mapping functions.
+ */
+struct vm_map_request {
+ struct vm_object *object;
+ unsigned long offset;
+ unsigned long start;
+ size_t size;
+ size_t align;
+ int flags;
+ struct vm_map_entry *next;
+};
+
+/*
+ * Statically allocated map entry for the first kernel map entry.
+ */
+static struct vm_map_entry vm_map_kernel_entry;
+
+/*
+ * Statically allocated map entry for the kernel map entry allocator.
+ *
+ * The purpose of this entry is to reserve virtual space for the kernel map
+ * entries (those used in the kernel map). The reason is to avoid recursion,
+ * as normal map entries are allocated from the kernel map (like any other
+ * normal kernel object).
+ */
+static struct vm_map_entry vm_map_kentry_entry;
+
+/*
+ * Cache for the map entries used in the kernel map.
+ */
+static struct kmem_cache vm_map_kentry_cache;
+
+/*
+ * Cache for normal map entries.
+ */
+static struct kmem_cache vm_map_entry_cache;
+
+/*
+ * Address of the next free page available for kernel map entry allocation.
+ */
+static unsigned long vm_map_kentry_free;
+
+/*
+ * Allocate pages for the kernel map entry cache.
+ */
+static unsigned long
+vm_map_kentry_pagealloc(size_t size)
+{
+ struct vm_page *page;
+ unsigned long addr, va;
+
+ assert(size > 0);
+ assert(vm_page_aligned(size));
+
+ if ((vm_map_kentry_entry.end - vm_map_kentry_free) < size)
+ panic("vm_map: kentry cache pages exhausted");
+
+ addr = vm_map_kentry_free;
+ vm_map_kentry_free += size;
+
+ for (va = addr; va < vm_map_kentry_free; va += PAGE_SIZE) {
+ page = vm_phys_alloc(0);
+
+ if (page == NULL)
+ panic("vm_map: no physical page for kentry cache");
+
+ pmap_kenter(va, vm_page_to_pa(page));
+ }
+
+ return addr;
+}
+
+static inline struct kmem_cache *
+vm_map_entry_select_cache(const struct vm_map *map)
+{
+ return (map == kernel_map) ? &vm_map_kentry_cache : &vm_map_entry_cache;
+}
+
+static struct vm_map_entry *
+vm_map_entry_create(const struct vm_map *map)
+{
+ struct vm_map_entry *entry;
+
+ entry = kmem_cache_alloc(vm_map_entry_select_cache(map));
+
+ if (entry == NULL)
+ panic("vm_map: can't create map entry");
+
+ return entry;
+}
+
+static void
+vm_map_entry_destroy(struct vm_map_entry *entry, const struct vm_map *map)
+{
+ kmem_cache_free(vm_map_entry_select_cache(map), entry);
+}
+
+static inline int
+vm_map_entry_cmp_lookup(unsigned long addr, const struct rbtree_node *node)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+
+ if (addr >= entry->end)
+ return 1;
+
+ if (addr >= entry->start)
+ return 0;
+
+ return -1;
+}
+
+static inline int
+vm_map_entry_cmp_insert(const struct rbtree_node *a,
+ const struct rbtree_node *b)
+{
+ struct vm_map_entry *entry;
+
+ entry = rbtree_entry(a, struct vm_map_entry, tree_node);
+ return vm_map_entry_cmp_lookup(entry->start, b);
+}
+
+static inline int
+vm_map_get_protection(int flags)
+{
+ return flags & VM_MAP_PROT_MASK;
+}
+
+static inline int
+vm_map_get_max_protection(int flags)
+{
+ return (flags & VM_MAP_MAX_PROT_MASK) >> 4;
+}
+
+#ifndef NDEBUG
+static void
+vm_map_request_assert_valid(const struct vm_map_request *request)
+{
+ int prot, max_prot;
+
+ assert((request->object != NULL) || (request->offset == 0));
+ assert(vm_page_aligned(request->offset));
+ assert(vm_page_aligned(request->start));
+ assert(request->size > 0);
+ assert(vm_page_aligned(request->size));
+ assert((request->start + request->size) > request->start);
+ assert((request->align == 0) || (request->align >= PAGE_SIZE));
+ assert(ISP2(request->align));
+
+ prot = vm_map_get_protection(request->flags);
+ max_prot = vm_map_get_max_protection(request->flags);
+ assert((prot & max_prot) == prot);
+ assert(__builtin_popcount(request->flags & VM_MAP_INHERIT_MASK) == 1);
+ assert(__builtin_popcount(request->flags & VM_MAP_ADVISE_MASK) == 1);
+ assert(!(request->flags & VM_MAP_FIXED)
+ || (request->align == 0)
+ || P2ALIGNED(request->start, request->align));
+}
+#else /* NDEBUG */
+#define vm_map_request_assert_valid(request)
+#endif /* NDEBUG */
+
+/*
+ * Look up an entry in a map.
+ *
+ * This function returns the entry which is closest to the given address
+ * such that addr < entry->end (i.e. either containing or after the requested
+ * address), or NULL if there is no such entry.
+ */
+static struct vm_map_entry *
+vm_map_lookup_nearest(struct vm_map *map, unsigned long addr)
+{
+ struct vm_map_entry *entry;
+ struct rbtree_node *node;
+
+ assert(vm_page_aligned(addr));
+
+ entry = map->lookup_cache;
+
+ if ((entry != NULL) && (addr >= entry->start) && (addr < entry->end))
+ return entry;
+
+ node = rbtree_lookup_nearest(&map->entry_tree, addr,
+ vm_map_entry_cmp_lookup, RBTREE_RIGHT);
+
+ if (node != NULL) {
+ entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+ assert(addr < entry->end);
+ map->lookup_cache = entry;
+ return entry;
+ }
+
+ return NULL;
+}
+
+static void
+vm_map_reset_find_cache(struct vm_map *map)
+{
+ map->find_cache = 0;
+ map->find_cache_threshold = VM_MAP_NO_FIND_CACHE;
+}
+
+static int
+vm_map_find_fixed(struct vm_map *map, struct vm_map_request *request)
+{
+ struct vm_map_entry *next;
+ unsigned long start;
+ size_t size;
+
+ start = request->start;
+ size = request->size;
+
+ if ((start < map->start) || (start + size) > map->end)
+ return ERROR_NOMEM;
+
+ next = vm_map_lookup_nearest(map, start);
+
+ if (next == NULL) {
+ if ((map->end - start) < size)
+ return ERROR_NOMEM;
+
+ request->next = NULL;
+ return 0;
+ }
+
+ if ((start >= next->start) || ((next->start - start) < size))
+ return ERROR_NOMEM;
+
+ request->next = next;
+ return 0;
+}
+
+static int
+vm_map_find_avail(struct vm_map *map, struct vm_map_request *request)
+{
+ struct vm_map_entry *next;
+ struct list *node;
+ unsigned long base, start;
+ size_t size, space;
+ int error;
+
+ /* If there is a hint, try there */
+ if (request->start != 0) {
+ error = vm_map_find_fixed(map, request);
+
+ if (!error)
+ return 0;
+ }
+
+ size = request->size;
+
+ if (size > map->find_cache_threshold)
+ base = map->find_cache;
+ else {
+ base = map->start;
+
+ /*
+ * Searching from the map start means the area which size is the
+ * threshold (or a smaller one) may be selected, making the threshold
+ * invalid. Reset it.
+ */
+ map->find_cache_threshold = 0;
+ }
+
+retry:
+ start = base;
+ next = vm_map_lookup_nearest(map, start);
+
+ for (;;) {
+ assert(start <= map->end);
+
+ /*
+ * The end of the map has been reached, and no space could be found.
+ * If the search didn't start at map->start, retry from there in case
+ * space is available below the previous start address.
+ */
+ if ((map->end - start) < size) {
+ if (base != map->start) {
+ base = map->start;
+ map->find_cache_threshold = 0;
+ goto retry;
+ }
+
+ return ERROR_NOMEM;
+ }
+
+ if (next == NULL)
+ space = map->end - start;
+ else if (start >= next->start)
+ space = 0;
+ else
+ space = next->start - start;
+
+ if (space >= size) {
+ map->find_cache = start + size;
+ request->start = start;
+ request->next = next;
+ return 0;
+ }
+
+ if (space > map->find_cache_threshold)
+ map->find_cache_threshold = space;
+
+ start = next->end;
+ node = list_next(&next->list_node);
+
+ if (list_end(&map->entry_list, node))
+ next = NULL;
+ else
+ next = list_entry(node, struct vm_map_entry, list_node);
+ }
+}
+
+static void
+vm_map_link(struct vm_map *map, struct vm_map_entry *entry,
+ struct vm_map_entry *prev, struct vm_map_entry *next)
+{
+ assert((prev == NULL) || (next == NULL));
+
+ if ((prev == NULL) && (next == NULL))
+ list_insert_tail(&map->entry_list, &entry->list_node);
+ else if (prev == NULL)
+ list_insert_before(&next->list_node, &entry->list_node);
+ else
+ list_insert_after(&prev->list_node, &entry->list_node);
+
+ rbtree_insert(&map->entry_tree, &entry->tree_node, vm_map_entry_cmp_insert);
+ map->nr_entries++;
+}
+
+static void
+vm_map_unlink(struct vm_map *map, struct vm_map_entry *entry)
+{
+ list_remove(&entry->list_node);
+ rbtree_remove(&map->entry_tree, &entry->tree_node);
+ map->nr_entries--;
+}
+
+/*
+ * Check mapping parameters, find a suitable area of virtual memory, and
+ * prepare the mapping request for that region.
+ */
+static int
+vm_map_prepare(struct vm_map *map, struct vm_object *object, unsigned long offset,
+ unsigned long start, size_t size, size_t align, int flags,
+ struct vm_map_request *request)
+{
+ int error;
+
+ request->object = object;
+ request->offset = offset;
+ request->start = start;
+ request->size = size;
+ request->align = align;
+ request->flags = flags;
+ vm_map_request_assert_valid(request);
+
+ if (flags & VM_MAP_FIXED)
+ error = vm_map_find_fixed(map, request);
+ else
+ error = vm_map_find_avail(map, request);
+
+ return error;
+}
+
+/*
+ * Convert a prepared mapping request into an entry in the given map.
+ *
+ * if entry is NULL, a map entry is allocated for the mapping.
+ */
+static int
+vm_map_insert(struct vm_map *map, struct vm_map_entry *entry,
+ const struct vm_map_request *request)
+{
+ /* TODO: merge/extend request with neighbors */
+
+ if (entry == NULL)
+ entry = vm_map_entry_create(map);
+
+ entry->start = request->start;
+ entry->end = request->start + request->size;
+ entry->object = request->object;
+ entry->offset = request->offset;
+ entry->flags = request->flags & VM_MAP_ENTRY_MASK;
+ vm_map_link(map, entry, NULL, request->next);
+ map->size += request->size;
+
+ if ((map == kernel_map) && (pmap_klimit < entry->end))
+ pmap_growkernel(entry->end);
+
+ return 0;
+}
+
+int
+vm_map_enter(struct vm_map *map, struct vm_object *object, uint64_t offset,
+ unsigned long *startp, size_t size, size_t align, int flags)
+{
+ struct vm_map_request request;
+ int error;
+
+ error = vm_map_prepare(map, object, offset, *startp, size, align, flags,
+ &request);
+
+ if (error)
+ goto error_enter;
+
+ error = vm_map_insert(map, NULL, &request);
+
+ if (error)
+ goto error_enter;
+
+ *startp = request.start;
+ return 0;
+
+error_enter:
+ vm_map_reset_find_cache(map);
+ return error;
+}
+
+static void
+vm_map_split_entries(struct vm_map_entry *prev, struct vm_map_entry *next,
+ unsigned long split_addr)
+{
+ unsigned long diff;
+
+ assert(prev->start < split_addr);
+ assert(split_addr < prev->end);
+
+ diff = split_addr - prev->start;
+ prev->end = split_addr;
+ next->start = split_addr;
+
+ if (next->object != NULL)
+ next->offset += diff;
+}
+
+static void
+vm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
+ unsigned long start)
+{
+ struct vm_map_entry *new_entry;
+
+ if (entry->start >= start)
+ return;
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+ vm_map_split_entries(new_entry, entry, start);
+ vm_map_link(map, new_entry, NULL, entry);
+}
+
+static void
+vm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry,
+ unsigned long end)
+{
+ struct vm_map_entry *new_entry;
+
+ if (entry->end <= end)
+ return;
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+ vm_map_split_entries(entry, new_entry, end);
+ vm_map_link(map, new_entry, entry, NULL);
+}
+
+void
+vm_map_remove(struct vm_map *map, unsigned long start, unsigned long end)
+{
+ struct vm_map_entry *entry;
+ struct list *node;
+
+ assert(start >= map->start);
+ assert(end <= map->end);
+ assert(start < end);
+
+ entry = vm_map_lookup_nearest(map, start);
+
+ if (entry == NULL)
+ return;
+
+ vm_map_clip_start(map, entry, start);
+
+ while (!list_end(&map->entry_list, &entry->list_node)
+ && (entry->start < end)) {
+ vm_map_clip_end(map, entry, end);
+ map->size -= entry->end - entry->start;
+ node = list_next(&entry->list_node);
+ vm_map_unlink(map, entry);
+ vm_map_entry_destroy(entry, map);
+ entry = list_entry(node, struct vm_map_entry, list_node);
+ }
+
+ vm_map_reset_find_cache(map);
+}
+
+void
+vm_map_init(struct vm_map *map, struct pmap *pmap, unsigned long start,
+ unsigned long end)
+{
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+
+ list_init(&map->entry_list);
+ rbtree_init(&map->entry_tree);
+ map->nr_entries = 0;
+ map->start = start;
+ map->end = end;
+ map->size = 0;
+ map->lookup_cache = NULL;
+ vm_map_reset_find_cache(map);
+ map->pmap = pmap;
+}
+
+void __init
+vm_map_bootstrap(void)
+{
+ struct vm_map_request request;
+ unsigned long start, end;
+ int error, flags;
+
+ vm_map_init(kernel_map, kernel_pmap, VM_MIN_KERNEL_ADDRESS,
+ VM_MAX_KERNEL_ADDRESS);
+
+ /*
+ * Create the initial kernel mapping. This reserves memory for at least
+ * the kernel image and the physical page table.
+ */
+ vm_kmem_boot_space(&start, &end);
+ flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+ | VM_MAP_ADVISE_NORMAL | VM_MAP_NOMERGE | VM_MAP_FIXED;
+ error = vm_map_prepare(kernel_map, NULL, 0, start, end - start, 0, flags,
+ &request);
+
+ if (error)
+ panic("vm_map: can't map initial kernel mapping");
+
+ error = vm_map_insert(kernel_map, &vm_map_kernel_entry, &request);
+ assert(!error);
+
+ /* Create the kentry mapping */
+ flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+ | VM_MAP_ADVISE_NORMAL | VM_MAP_NOMERGE;
+ error = vm_map_prepare(kernel_map, NULL, 0, 0, VM_MAP_KENTRY_SIZE, 0,
+ flags, &request);
+
+ if (error)
+ panic("vm_map: kentry mapping setup failed");
+
+ error = vm_map_insert(kernel_map, &vm_map_kentry_entry, &request);
+ assert(!error);
+
+ vm_map_kentry_free = vm_map_kentry_entry.start;
+
+ flags = KMEM_CACHE_NOCPUPOOL | KMEM_CACHE_NOOFFSLAB | KMEM_CACHE_NORECLAIM;
+ kmem_cache_init(&vm_map_kentry_cache, "vm_map_kentry",
+ sizeof(struct vm_map_entry), 0, NULL,
+ vm_map_kentry_pagealloc, NULL, flags);
+}
+
+void __init
+vm_map_setup(void)
+{
+ kmem_cache_init(&vm_map_entry_cache, "vm_map_entry",
+ sizeof(struct vm_map_entry), 0, NULL, NULL, NULL, 0);
+}
+
+void
+vm_map_info(struct vm_map *map)
+{
+ struct vm_map_entry *entry;
+ const char *type, *name;
+
+ if (map == kernel_map)
+ name = "kernel map";
+ else
+ name = "map";
+
+ printk("vm_map: %s: %08lx-%08lx\n", name, map->start, map->end);
+ printk("vm_map: start end size offset flags type\n");
+
+ list_for_each_entry(&map->entry_list, entry, list_node) {
+ if (entry->object == NULL)
+ type = "null";
+ else
+ type = "object";
+
+ printk("vm_map: %08lx %08lx %8luk %08llx %08x %s\n", entry->start,
+ entry->end, (entry->end - entry->start) >> 10, entry->offset,
+ entry->flags, type);
+ }
+
+ printk("vm_map: total: %uk\n", map->size >> 10);
+}
diff --git a/vm/vm_map.h b/vm/vm_map.h
new file mode 100644
index 00000000..7231b6a3
--- /dev/null
+++ b/vm/vm_map.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Virtual memory map management.
+ */
+
+#ifndef _VM_VM_MAP_H
+#define _VM_VM_MAP_H
+
+#include <lib/list.h>
+#include <lib/rbtree.h>
+#include <lib/stdint.h>
+#include <machine/pmap.h>
+
+/*
+ * Mapping flags and masks.
+ *
+ * All these flags can be used when creating a mapping. Most of them are
+ * also used as map entry flags.
+ */
+#define VM_MAP_PROT_READ 0x00001
+#define VM_MAP_PROT_WRITE 0x00002
+#define VM_MAP_PROT_EXEC 0x00004
+#define VM_MAP_PROT_ALL (VM_MAP_PROT_READ \
+ | VM_MAP_PROT_WRITE \
+ | VM_MAP_PROT_EXEC)
+#define VM_MAP_PROT_MASK VM_MAP_PROT_ALL
+
+#define VM_MAP_MAX_PROT_READ (VM_MAP_PROT_READ << 4)
+#define VM_MAP_MAX_PROT_WRITE (VM_MAP_PROT_WRITE << 4)
+#define VM_MAP_MAX_PROT_EXEC (VM_MAP_PROT_EXEC << 4)
+#define VM_MAP_MAX_PROT_ALL (VM_MAP_MAX_PROT_READ \
+ | VM_MAP_MAX_PROT_WRITE \
+ | VM_MAP_MAX_PROT_EXEC)
+#define VM_MAP_MAX_PROT_MASK VM_MAP_MAX_PROT_ALL
+
+#define VM_MAP_INHERIT_SHARE 0x00100
+#define VM_MAP_INHERIT_COPY 0x00200
+#define VM_MAP_INHERIT_NONE 0x00400
+#define VM_MAP_INHERIT_MASK (VM_MAP_INHERIT_SHARE \
+ | VM_MAP_INHERIT_COPY \
+ | VM_MAP_INHERIT_NONE)
+
+#define VM_MAP_ADVISE_NORMAL 0x01000
+#define VM_MAP_ADVISE_RAND 0x02000
+#define VM_MAP_ADVISE_SEQ 0x04000
+#define VM_MAP_ADVISE_MASK (VM_MAP_ADVISE_NORMAL \
+ | VM_MAP_ADVISE_RAND \
+ | VM_MAP_ADVISE_SEQ)
+
+#define VM_MAP_NOMERGE 0x10000
+#define VM_MAP_FIXED 0x20000 /* Not an entry flag */
+
+/*
+ * Flags that can be used as map entry flags.
+ */
+#define VM_MAP_ENTRY_MASK (VM_MAP_PROT_MASK \
+ | VM_MAP_MAX_PROT_MASK \
+ | VM_MAP_INHERIT_MASK \
+ | VM_MAP_ADVISE_MASK \
+ | VM_MAP_NOMERGE)
+
+/*
+ * Memory range descriptor.
+ */
+struct vm_map_entry {
+ struct list list_node;
+ struct rbtree_node tree_node;
+ unsigned long start;
+ unsigned long end;
+ struct vm_object *object;
+ uint64_t offset;
+ int flags;
+};
+
+/*
+ * Memory map.
+ */
+struct vm_map {
+ struct list entry_list;
+ struct rbtree entry_tree;
+ unsigned int nr_entries;
+ unsigned long start;
+ unsigned long end;
+ size_t size;
+ struct vm_map_entry *lookup_cache;
+ unsigned long find_cache;
+ size_t find_cache_threshold;
+ struct pmap *pmap;
+};
+
+/*
+ * Create a virtual mapping.
+ */
+int vm_map_enter(struct vm_map *map, struct vm_object *object, uint64_t offset,
+ unsigned long *startp, size_t size, size_t align, int flags);
+
+/*
+ * Remove mappings from start to end.
+ */
+void vm_map_remove(struct vm_map *map, unsigned long start, unsigned long end);
+
+/*
+ * Initialize a VM map.
+ */
+void vm_map_init(struct vm_map *map, struct pmap *pmap, unsigned long start,
+ unsigned long end);
+
+/*
+ * Early initialization of the vm_map module.
+ *
+ * This function creates the kernel map and the kentry cache, making it
+ * possible to map kernel memory.
+ */
+void vm_map_bootstrap(void);
+
+/*
+ * Set up the vm_map module.
+ */
+void vm_map_setup(void);
+
+/*
+ * Display information about a memory map.
+ */
+void vm_map_info(struct vm_map *map);
+
+#endif /* _VM_VM_MAP_H */
diff --git a/vm/vm_page.h b/vm/vm_page.h
new file mode 100644
index 00000000..641cbccf
--- /dev/null
+++ b/vm/vm_page.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_PAGE_H
+#define _VM_VM_PAGE_H
+
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <kern/param.h>
+#include <kern/types.h>
+
+/*
+ * Address/page conversion and rounding macros (not inline functions to
+ * be easily usable on both virtual and physical addresses, which may not
+ * have the same type size).
+ */
+#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
+#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
+#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE)
+#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE)
+#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE)
+
+/*
+ * Physical page descriptor.
+ */
+struct vm_page {
+ struct list node;
+ unsigned short seg_index;
+ unsigned short order;
+ vm_phys_t phys_addr;
+};
+
+static inline vm_phys_t
+vm_page_to_pa(const struct vm_page *page)
+{
+ return page->phys_addr;
+}
+
+#endif /* _VM_VM_PAGE_H */
diff --git a/vm/vm_phys.c b/vm/vm_phys.c
new file mode 100644
index 00000000..3e8a70f0
--- /dev/null
+++ b/vm/vm_phys.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-cpu pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <lib/sprintf.h>
+#include <lib/stddef.h>
+#include <lib/string.h>
+#include <machine/cpu.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PHYS_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PHYS_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PHYS_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PHYS_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_phys_cpu_pool {
+ /* struct mutex mutex; */
+ int size;
+ int transfer_size;
+ int nr_pages;
+ struct list pages;
+};
+
+/*
+ * Special order value.
+ *
+ * When a page is free, its order is the index of its free list.
+ */
+#define VM_PHYS_ORDER_ALLOCATED VM_PHYS_NR_FREE_LISTS
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_phys_free_list {
+ unsigned long size;
+ struct list blocks;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PHYS_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ */
+struct vm_phys_seg {
+ struct vm_phys_cpu_pool cpu_pools[MAX_CPUS];
+
+ struct list node;
+ vm_phys_t start;
+ vm_phys_t end;
+ struct vm_page *pages;
+ struct vm_page *pages_end;
+ /* struct mutex mutex; */
+ struct vm_phys_free_list free_lists[VM_PHYS_NR_FREE_LISTS];
+ unsigned long nr_free_pages;
+ char name[VM_PHYS_NAME_SIZE];
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_phys_boot_seg {
+ vm_phys_t avail_start;
+ vm_phys_t avail_end;
+};
+
+int vm_phys_ready;
+
+/*
+ * Segment lists, ordered by priority (higher priority lists have lower
+ * numerical priorities).
+ */
+static struct list vm_phys_seg_lists[VM_NR_PHYS_SEGLIST];
+
+/*
+ * Segment table.
+ */
+static struct vm_phys_seg vm_phys_segs[VM_MAX_PHYS_SEG];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_phys_boot_seg vm_phys_boot_segs[VM_MAX_PHYS_SEG] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_phys_segs_size;
+
+static int vm_phys_load_initialized __initdata = 0;
+
+static void __init
+vm_phys_init_page(struct vm_page *page, unsigned short seg_index,
+ unsigned short order, vm_phys_t pa)
+{
+ page->seg_index = seg_index;
+ page->order = order;
+ page->phys_addr = pa;
+}
+
+static void __init
+vm_phys_free_list_init(struct vm_phys_free_list *free_list)
+{
+ free_list->size = 0;
+ list_init(&free_list->blocks);
+}
+
+static inline void
+vm_phys_free_list_insert(struct vm_phys_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(page->order == VM_PHYS_ORDER_ALLOCATED);
+
+ free_list->size++;
+ list_insert(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_phys_free_list_remove(struct vm_phys_free_list *free_list,
+ struct vm_page *page)
+{
+ assert(free_list->size != 0);
+ assert(!list_empty(&free_list->blocks));
+ assert(page->order < VM_PHYS_NR_FREE_LISTS);
+
+ free_list->size--;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_phys_seg_alloc_from_buddy(struct vm_phys_seg *seg, unsigned int order)
+{
+ struct vm_phys_free_list *free_list;
+ struct vm_page *page, *buddy;
+ unsigned int i;
+
+ assert(order < VM_PHYS_NR_FREE_LISTS);
+
+ for (i = order; i < VM_PHYS_NR_FREE_LISTS; i++) {
+ free_list = &seg->free_lists[i];
+
+ if (free_list->size != 0)
+ break;
+ }
+
+ if (i == VM_PHYS_NR_FREE_LISTS)
+ return NULL;
+
+ page = list_first_entry(&free_list->blocks, struct vm_page, node);
+ vm_phys_free_list_remove(free_list, page);
+ page->order = VM_PHYS_ORDER_ALLOCATED;
+
+ while (i > order) {
+ i--;
+ buddy = &page[1 << i];
+ vm_phys_free_list_insert(&seg->free_lists[i], buddy);
+ buddy->order = i;
+ }
+
+ seg->nr_free_pages -= (1 << order);
+ return page;
+}
+
+static void
+vm_phys_seg_free_to_buddy(struct vm_phys_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_page *buddy;
+ vm_phys_t pa, buddy_pa;
+ unsigned int nr_pages;
+
+ assert(page >= seg->pages);
+ assert(page < seg->pages_end);
+ assert(page->order == VM_PHYS_ORDER_ALLOCATED);
+ assert(order < VM_PHYS_NR_FREE_LISTS);
+
+ nr_pages = (1 << order);
+ pa = page->phys_addr;
+
+ while (order < (VM_PHYS_NR_FREE_LISTS - 1)) {
+ buddy_pa = pa ^ vm_page_ptoa(1 << order);
+
+ if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+ break;
+
+ buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+ if (buddy->order != order)
+ break;
+
+ vm_phys_free_list_remove(&seg->free_lists[order], buddy);
+ buddy->order = VM_PHYS_ORDER_ALLOCATED;
+ order++;
+ pa &= -vm_page_ptoa(1 << order);
+ page = &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ vm_phys_free_list_insert(&seg->free_lists[order], page);
+ page->order = order;
+ seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_phys_cpu_pool_init(struct vm_phys_cpu_pool *cpu_pool, int size)
+{
+ cpu_pool->size = size;
+ cpu_pool->transfer_size = (size + VM_PHYS_CPU_POOL_TRANSFER_RATIO - 1)
+ / VM_PHYS_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->nr_pages = 0;
+ list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_phys_cpu_pool *
+vm_phys_cpu_pool_get(struct vm_phys_seg *seg)
+{
+ return &seg->cpu_pools[cpu_id()];
+}
+
+static inline struct vm_page *
+vm_phys_cpu_pool_pop(struct vm_phys_cpu_pool *cpu_pool)
+{
+ struct vm_page *page;
+
+ assert(cpu_pool->nr_pages != 0);
+ cpu_pool->nr_pages--;
+ page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+ list_remove(&page->node);
+ return page;
+}
+
+static inline void
+vm_phys_cpu_pool_push(struct vm_phys_cpu_pool *cpu_pool, struct vm_page *page)
+{
+ assert(cpu_pool->nr_pages < cpu_pool->size);
+ cpu_pool->nr_pages++;
+ list_insert(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_phys_cpu_pool_fill(struct vm_phys_cpu_pool *cpu_pool,
+ struct vm_phys_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == 0);
+
+ /* mutex_lock(&seg->mutex); */
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ page = vm_phys_seg_alloc_from_buddy(seg, 0);
+
+ if (page == NULL)
+ break;
+
+ vm_phys_cpu_pool_push(cpu_pool, page);
+ }
+
+ /* mutex_unlock(&seg->mutex); */
+
+ return i;
+}
+
+static void
+vm_phys_cpu_pool_drain(struct vm_phys_cpu_pool *cpu_pool,
+ struct vm_phys_seg *seg)
+{
+ struct vm_page *page;
+ int i;
+
+ assert(cpu_pool->nr_pages == cpu_pool->size);
+
+ /* mutex_lock(&seg->mutex); */
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ page = vm_phys_cpu_pool_pop(cpu_pool);
+ vm_phys_seg_free_to_buddy(seg, page, 0);
+ }
+
+ /* mutex_unlock(&seg->mutex); */
+}
+
+static inline vm_phys_t __init
+vm_phys_seg_size(struct vm_phys_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static int __init
+vm_phys_seg_compute_pool_size(struct vm_phys_seg *seg)
+{
+ vm_phys_t size;
+
+ size = vm_page_atop(vm_phys_seg_size(seg)) / VM_PHYS_CPU_POOL_RATIO;
+
+ if (size == 0)
+ size = 1;
+ else if (size > VM_PHYS_CPU_POOL_MAX_SIZE)
+ size = VM_PHYS_CPU_POOL_MAX_SIZE;
+
+ return size;
+}
+
+static void __init
+vm_phys_seg_init(struct vm_phys_seg *seg, struct vm_page *pages)
+{
+ vm_phys_t pa;
+ int pool_size;
+ unsigned int i;
+
+ pool_size = vm_phys_seg_compute_pool_size(seg);
+
+ for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+ vm_phys_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+ seg->pages = pages;
+ seg->pages_end = pages + vm_page_atop(vm_phys_seg_size(seg));
+ /* mutex_init(&seg->mutex); */
+
+ for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+ vm_phys_free_list_init(&seg->free_lists[i]);
+
+ seg->nr_free_pages = 0;
+ i = seg - vm_phys_segs;
+
+ for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+ vm_phys_init_page(&pages[vm_page_atop(pa - seg->start)], i,
+ VM_PHYS_ORDER_ALLOCATED, pa);
+}
+
+static struct vm_page *
+vm_phys_seg_alloc(struct vm_phys_seg *seg, unsigned int order)
+{
+ struct vm_phys_cpu_pool *cpu_pool;
+ struct vm_page *page;
+ int filled;
+
+ assert(order < VM_PHYS_NR_FREE_LISTS);
+
+ if (order == 0) {
+ cpu_pool = vm_phys_cpu_pool_get(seg);
+
+ /* mutex_lock(&cpu_pool->mutex); */
+
+ if (cpu_pool->nr_pages == 0) {
+ filled = vm_phys_cpu_pool_fill(cpu_pool, seg);
+
+ if (!filled) {
+ /* mutex_unlock(&cpu_pool->mutex); */
+ return NULL;
+ }
+ }
+
+ page = vm_phys_cpu_pool_pop(cpu_pool);
+ /* mutex_unlock(&cpu_pool->mutex); */
+ } else {
+ /* mutex_lock(&seg->mutex); */
+ page = vm_phys_seg_alloc_from_buddy(seg, order);
+ /* mutex_unlock(&seg->mutex); */
+ }
+
+ return page;
+}
+
+static void
+vm_phys_seg_free(struct vm_phys_seg *seg, struct vm_page *page,
+ unsigned int order)
+{
+ struct vm_phys_cpu_pool *cpu_pool;
+
+ assert(order < VM_PHYS_NR_FREE_LISTS);
+
+ if (order == 0) {
+ cpu_pool = vm_phys_cpu_pool_get(seg);
+
+ /* mutex_lock(&cpu_pool->mutex); */
+
+ if (cpu_pool->nr_pages == cpu_pool->size)
+ vm_phys_cpu_pool_drain(cpu_pool, seg);
+
+ vm_phys_cpu_pool_push(cpu_pool, page);
+ /* mutex_unlock(&cpu_pool->mutex); */
+ } else {
+ /* mutex_lock(&seg->mutex); */
+ vm_phys_seg_free_to_buddy(seg, page, order);
+ /* mutex_unlock(&seg->mutex); */
+ }
+}
+
+void __init
+vm_phys_load(const char *name, vm_phys_t start, vm_phys_t end,
+ vm_phys_t avail_start, vm_phys_t avail_end,
+ unsigned int seglist_prio)
+{
+ struct vm_phys_boot_seg *boot_seg;
+ struct vm_phys_seg *seg;
+ struct list *seg_list;
+ unsigned int i;
+
+ assert(name != NULL);
+ assert(start < end);
+ assert(seglist_prio < ARRAY_SIZE(vm_phys_seg_lists));
+
+ if (!vm_phys_load_initialized) {
+ for (i = 0; i < ARRAY_SIZE(vm_phys_seg_lists); i++)
+ list_init(&vm_phys_seg_lists[i]);
+
+ vm_phys_segs_size = 0;
+ vm_phys_load_initialized = 1;
+ }
+
+ if (vm_phys_segs_size >= ARRAY_SIZE(vm_phys_segs))
+ panic("vm_phys: too many physical segments");
+
+ seg_list = &vm_phys_seg_lists[seglist_prio];
+ seg = &vm_phys_segs[vm_phys_segs_size];
+ boot_seg = &vm_phys_boot_segs[vm_phys_segs_size];
+
+ list_insert_tail(seg_list, &seg->node);
+ seg->start = start;
+ seg->end = end;
+ strcpy(seg->name, name); /* TODO: strlcpy */
+ boot_seg->avail_start = avail_start;
+ boot_seg->avail_end = avail_end;
+
+ vm_phys_segs_size++;
+}
+
+vm_phys_t __init
+vm_phys_bootalloc(void)
+{
+ struct vm_phys_boot_seg *boot_seg;
+ struct vm_phys_seg *seg;
+ struct list *seg_list;
+ vm_phys_t pa;
+
+ for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
+ seg_list >= vm_phys_seg_lists;
+ seg_list--)
+ list_for_each_entry(seg_list, seg, node) {
+ boot_seg = &vm_phys_boot_segs[seg - vm_phys_segs];
+
+ if ((boot_seg->avail_end - boot_seg->avail_start) > 1) {
+ pa = boot_seg->avail_start;
+ boot_seg->avail_start += PAGE_SIZE;
+ return pa;
+ }
+ }
+
+ panic("vm_phys: no physical memory available");
+}
+
+void __init
+vm_phys_setup(void)
+{
+ struct vm_phys_boot_seg *boot_seg;
+ struct vm_phys_seg *seg;
+ struct vm_page *map, *start, *end;
+ size_t pages, map_size;
+ unsigned int i;
+
+ /*
+ * Compute the memory map size.
+ */
+ pages = 0;
+
+ for (i = 0; i < vm_phys_segs_size; i++)
+ pages += vm_page_atop(vm_phys_seg_size(&vm_phys_segs[i]));
+
+ map_size = P2ROUND(pages * sizeof(struct vm_page), PAGE_SIZE);
+ printk("vm_phys: page table size: %u entries (%uk)\n", pages,
+ map_size >> 10);
+ map = (struct vm_page *)vm_kmem_bootalloc(map_size);
+
+ /*
+ * Initialize the segments, associating them to the memory map. When
+ * the segments are initialized, all their pages are set allocated,
+ * with a block size of one (order 0). They are then released, which
+ * populates the free lists.
+ */
+ for (i = 0; i < vm_phys_segs_size; i++) {
+ seg = &vm_phys_segs[i];
+ boot_seg = &vm_phys_boot_segs[i];
+ vm_phys_seg_init(seg, map);
+
+ start = seg->pages + vm_page_atop(boot_seg->avail_start - seg->start);
+ end = seg->pages + vm_page_atop(boot_seg->avail_end - seg->start);
+
+ while (start < end) {
+ vm_phys_seg_free_to_buddy(seg, start, 0);
+ start++;
+ }
+
+ map += vm_page_atop(vm_phys_seg_size(seg));
+ }
+
+ vm_phys_ready = 1;
+}
+
+void __init
+vm_phys_manage(struct vm_page *page)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
+
+ vm_phys_seg_free_to_buddy(&vm_phys_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_phys_lookup_page(vm_phys_t pa)
+{
+ struct vm_phys_seg *seg;
+ unsigned int i;
+
+ for (i = 0; i < vm_phys_segs_size; i++) {
+ seg = &vm_phys_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end))
+ return &seg->pages[vm_page_atop(pa - seg->start)];
+ }
+
+ return NULL;
+}
+
+struct vm_page *
+vm_phys_alloc(unsigned int order)
+{
+ struct vm_phys_seg *seg;
+ struct list *seg_list;
+ struct vm_page *page;
+
+ for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
+ seg_list >= vm_phys_seg_lists;
+ seg_list--)
+ list_for_each_entry(seg_list, seg, node) {
+ page = vm_phys_seg_alloc(seg, order);
+
+ if (page != NULL)
+ return page;
+ }
+
+ return NULL;
+}
+
+void
+vm_phys_free(struct vm_page *page, unsigned int order)
+{
+ assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
+
+ vm_phys_seg_free(&vm_phys_segs[page->seg_index], page, order);
+}
+
+void
+vm_phys_info(void)
+{
+ struct vm_phys_seg *seg;
+ unsigned long pages;
+ unsigned int i;
+
+ for (i = 0; i < vm_phys_segs_size; i++) {
+ seg = &vm_phys_segs[i];
+ pages = (unsigned long)(seg->pages_end - seg->pages);
+ printk("vm_phys: %s: pages: %lu (%luM), free: %lu (%luM)\n", seg->name,
+ pages, pages >> (20 - PAGE_SHIFT), seg->nr_free_pages,
+ seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ }
+}
diff --git a/vm/vm_phys.h b/vm/vm_phys.h
new file mode 100644
index 00000000..a5a7d32f
--- /dev/null
+++ b/vm/vm_phys.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Physical page allocator.
+ */
+
+#ifndef _VM_VM_PHYS_H
+#define _VM_VM_PHYS_H
+
+#include <kern/types.h>
+#include <vm/vm_page.h>
+
+/*
+ * True if the vm_phys module is completely initialized, false otherwise
+ * (in which case only vm_phys_bootalloc() can be used for allocations).
+ */
+extern int vm_phys_ready;
+
+/*
+ * Load physical memory into the vm_phys module at boot time.
+ *
+ * The avail_start and avail_end parameters are used to maintain a simple
+ * heap for bootstrap allocations.
+ */
+void vm_phys_load(const char *name, vm_phys_t start, vm_phys_t end,
+ vm_phys_t avail_start, vm_phys_t avail_end,
+ unsigned int seglist_prio);
+
+/*
+ * Allocate one physical page.
+ *
+ * This function is used to allocate physical memory at boot time, before the
+ * vm_phys module is ready, but after the physical memory has been loaded.
+ */
+vm_phys_t vm_phys_bootalloc(void);
+
+/*
+ * Set up the vm_phys module.
+ *
+ * Once this function returns, the vm_phys module is ready, and normal
+ * allocation functions can be used.
+ */
+void vm_phys_setup(void);
+
+/*
+ * Make the given page managed by the vm_phys module.
+ *
+ * If additional memory can be made usable after the VM system is initialized,
+ * it should be reported through this function.
+ */
+void vm_phys_manage(struct vm_page *page);
+
+/*
+ * Return the page descriptor for the given physical address.
+ */
+struct vm_page * vm_phys_lookup_page(vm_phys_t pa);
+
+/*
+ * Allocate a block of 2^order physical pages.
+ */
+struct vm_page * vm_phys_alloc(unsigned int order);
+
+/*
+ * Release a block of 2^order physical pages.
+ */
+void vm_phys_free(struct vm_page *page, unsigned int order);
+
+/*
+ * Display internal information about the module.
+ */
+void vm_phys_info(void);
+
+#endif /* _VM_VM_PHYS_H */
diff --git a/vm/vm_prot.h b/vm/vm_prot.h
new file mode 100644
index 00000000..27738c72
--- /dev/null
+++ b/vm/vm_prot.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_PROT_H
+#define _VM_VM_PROT_H
+
+/*
+ * Protection flags.
+ */
+#define VM_PROT_NONE 0
+#define VM_PROT_READ 1
+#define VM_PROT_WRITE 2
+#define VM_PROT_EXECUTE 4
+#define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE)
+#define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
+
+#endif /* _VM_VM_PROT_H */
diff --git a/vm/vm_setup.c b/vm/vm_setup.c
new file mode 100644
index 00000000..33b07257
--- /dev/null
+++ b/vm/vm_setup.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/kmem.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_setup.h>
+
+void
+vm_setup(void)
+{
+ vm_kmem_setup();
+ vm_phys_setup();
+ kmem_bootstrap();
+ vm_map_bootstrap();
+ kmem_setup();
+ vm_map_setup();
+}
diff --git a/vm/vm_setup.h b/vm/vm_setup.h
new file mode 100644
index 00000000..f52ddb24
--- /dev/null
+++ b/vm/vm_setup.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_SETUP_H
+#define _VM_VM_SETUP_H
+
+/*
+ * Set up the VM system.
+ *
+ * This function also initializes the kmem (kernel memory) allocator.
+ */
+void vm_setup(void);
+
+#endif /* _VM_VM_SETUP_H */