summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/Changes15
-rw-r--r--Documentation/SubmittingPatches76
-rw-r--r--Documentation/development-process/5.Posting31
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--Documentation/filesystems/debugfs.txt158
-rw-r--r--Documentation/x86/x86_64/boot-options.txt44
-rw-r--r--Documentation/x86/x86_64/machinecheck8
-rw-r--r--arch/x86/Kconfig45
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c267
-rw-r--r--arch/x86/crypto/fpu.c166
-rw-r--r--arch/x86/include/asm/entry_arch.h11
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h17
-rw-r--r--arch/x86/include/asm/mce.h88
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile10
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c218
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c1964
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c76
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c1188
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c203
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c74
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c65
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c57
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c86
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c73
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c17
-rw-r--r--arch/x86/kernel/entry_64.S11
-rw-r--r--arch/x86/kernel/irq.c19
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smp.c28
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--crypto/Kconfig10
-rw-r--r--crypto/algboss.c18
-rw-r--r--crypto/api.c14
-rw-r--r--crypto/cryptd.c14
-rw-r--r--crypto/internal.h3
-rw-r--r--crypto/pcompress.c1
-rw-r--r--crypto/tcrypt.c183
-rw-r--r--crypto/testmgr.c470
-rw-r--r--crypto/testmgr.h645
-rw-r--r--crypto/zlib.c24
-rw-r--r--drivers/char/hw_random/Kconfig2
-rw-r--r--drivers/char/hw_random/omap-rng.c2
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c26
-rw-r--r--drivers/char/hw_random/via-rng.c15
-rw-r--r--drivers/crypto/Kconfig2
-rw-r--r--drivers/crypto/hifn_795x.c8
-rw-r--r--drivers/crypto/padlock-aes.c13
-rw-r--r--drivers/crypto/talitos.c713
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c523
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c25
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c53
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c479
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c67
-rw-r--r--fs/xfs/quota/xfs_dquot.c5
-rw-r--r--fs/xfs/quota/xfs_dquot.h1
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c1
-rw-r--r--fs/xfs/quota/xfs_qm.c168
-rw-r--r--fs/xfs/quota/xfs_qm.h21
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c77
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c113
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c66
-rw-r--r--fs/xfs/xfs_acl.c874
-rw-r--r--fs/xfs/xfs_acl.h97
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_arch.h32
-rw-r--r--fs/xfs/xfs_attr.c13
-rw-r--r--fs/xfs/xfs_bmap.c34
-rw-r--r--fs/xfs/xfs_bmap_btree.c4
-rw-r--r--fs/xfs/xfs_filestream.c6
-rw-r--r--fs/xfs/xfs_fs.h11
-rw-r--r--fs/xfs/xfs_iget.c8
-rw-r--r--fs/xfs/xfs_inode.c1
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_iomap.c13
-rw-r--r--fs/xfs/xfs_log_recover.c38
-rw-r--r--fs/xfs/xfs_mount.c105
-rw-r--r--fs/xfs/xfs_mount.h84
-rw-r--r--fs/xfs/xfs_qmops.c152
-rw-r--r--fs/xfs/xfs_quota.h129
-rw-r--r--fs/xfs/xfs_rename.c3
-rw-r--r--fs/xfs/xfs_rw.c1
-rw-r--r--fs/xfs/xfs_trans.c15
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c114
-rw-r--r--fs/xfs/xfs_vnodeops.h1
-rw-r--r--kernel/timer.c1
107 files changed, 6706 insertions, 4227 deletions
diff --git a/Documentation/Changes b/Documentation/Changes
index b95082be4d5..d21b3b5aa54 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version
o oprofile 0.9 # oprofiled --version
o udev 081 # udevinfo -V
o grub 0.93 # grub --version
+o mcelog 0.6
Kernel compilation
==================
@@ -276,6 +277,16 @@ before running exportfs or mountd. It is recommended that all NFS
services be protected from the internet-at-large by a firewall where
that is possible.
+mcelog
+------
+
+In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
+as a regular cronjob similar to the x86-64 kernel to process and log
+machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
+events are errors reported by the CPU. Processing them is strongly encouraged.
+All x86-64 kernels since 2.6.4 require the mcelog utility to
+process machine checks.
+
Getting updated software
========================
@@ -365,6 +376,10 @@ FUSE
----
o <http://sourceforge.net/projects/fuse>
+mcelog
+------
+o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
+
Networking
**********
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index f309d3c6221..6c456835c1f 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -91,6 +91,10 @@ Be as specific as possible. The WORST descriptions possible include
things like "update driver X", "bug fix for driver X", or "this patch
includes updates for subsystem X. Please apply."
+The maintainer will thank you if you write your patch description in a
+form which can be easily pulled into Linux's source code management
+system, git, as a "commit log". See #15, below.
+
If your description starts to get long, that's a sign that you probably
need to split up your patch. See #3, next.
@@ -405,7 +409,14 @@ person it names. This tag documents that potentially interested parties
have been included in the discussion
-14) Using Tested-by: and Reviewed-by:
+14) Using Reported-by:, Tested-by: and Reviewed-by:
+
+If this patch fixes a problem reported by somebody else, consider adding a
+Reported-by: tag to credit the reporter for their contribution. Please
+note that this tag should not be added without the reporter's permission,
+especially if the problem was not reported in a public forum. That said,
+if we diligently credit our bug reporters, they will, hopefully, be
+inspired to help us again in the future.
A Tested-by: tag indicates that the patch has been successfully tested (in
some environment) by the person named. This tag informs maintainers that
@@ -444,7 +455,7 @@ offer a Reviewed-by tag for a patch. This tag serves to give credit to
reviewers and to inform maintainers of the degree of review which has been
done on the patch. Reviewed-by: tags, when supplied by reviewers known to
understand the subject area and to perform thorough reviews, will normally
-increase the liklihood of your patch getting into the kernel.
+increase the likelihood of your patch getting into the kernel.
15) The canonical patch format
@@ -485,12 +496,33 @@ phrase" should not be a filename. Do not use the same "summary
phrase" for every patch in a whole patch series (where a "patch
series" is an ordered sequence of multiple, related patches).
-Bear in mind that the "summary phrase" of your email becomes
-a globally-unique identifier for that patch. It propagates
-all the way into the git changelog. The "summary phrase" may
-later be used in developer discussions which refer to the patch.
-People will want to google for the "summary phrase" to read
-discussion regarding that patch.
+Bear in mind that the "summary phrase" of your email becomes a
+globally-unique identifier for that patch. It propagates all the way
+into the git changelog. The "summary phrase" may later be used in
+developer discussions which refer to the patch. People will want to
+google for the "summary phrase" to read discussion regarding that
+patch. It will also be the only thing that people may quickly see
+when, two or three months later, they are going through perhaps
+thousands of patches using tools such as "gitk" or "git log
+--oneline".
+
+For these reasons, the "summary" must be no more than 70-75
+characters, and it must describe both what the patch changes, as well
+as why the patch might be necessary. It is challenging to be both
+succinct and descriptive, but that is what a well-written summary
+should do.
+
+The "summary phrase" may be prefixed by tags enclosed in square
+brackets: "Subject: [PATCH tag] <summary phrase>". The tags are not
+considered part of the summary phrase, but describe how the patch
+should be treated. Common tags might include a version descriptor if
+the multiple versions of the patch have been sent out in response to
+comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
+comments. If there are four patches in a patch series the individual
+patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures
+that developers understand the order in which the patches should be
+applied and that they have reviewed or applied all of the patches in
+the patch series.
A couple of example Subjects:
@@ -510,19 +542,31 @@ the patch author in the changelog.
The explanation body will be committed to the permanent source
changelog, so should make sense to a competent reader who has long
since forgotten the immediate details of the discussion that might
-have led to this patch.
+have led to this patch. Including symptoms of the failure which the
+patch addresses (kernel log messages, oops messages, etc.) is
+especially useful for people who might be searching the commit logs
+looking for the applicable patch. If a patch fixes a compile failure,
+it may not be necessary to include _all_ of the compile failures; just
+enough that it is likely that someone searching for the patch can find
+it. As in the "summary phrase", it is important to be both succinct as
+well as descriptive.
The "---" marker line serves the essential purpose of marking for patch
handling tools where the changelog message ends.
One good use for the additional comments after the "---" marker is for
-a diffstat, to show what files have changed, and the number of inserted
-and deleted lines per file. A diffstat is especially useful on bigger
-patches. Other comments relevant only to the moment or the maintainer,
-not suitable for the permanent changelog, should also go here.
-Use diffstat options "-p 1 -w 70" so that filenames are listed from the
-top of the kernel source tree and don't use too much horizontal space
-(easily fit in 80 columns, maybe with some indentation).
+a diffstat, to show what files have changed, and the number of
+inserted and deleted lines per file. A diffstat is especially useful
+on bigger patches. Other comments relevant only to the moment or the
+maintainer, not suitable for the permanent changelog, should also go
+here. A good example of such comments might be "patch changelogs"
+which describe what has changed between the v1 and v2 version of the
+patch.
+
+If you are going to include a diffstat after the "---" marker, please
+use diffstat options "-p 1 -w 70" so that filenames are listed from
+the top of the kernel source tree and don't use too much horizontal
+space (easily fit in 80 columns, maybe with some indentation).
See more details on the proper patch format in the following
references.
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index dd48132a74d..f622c1e9f0f 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting
@@ -119,7 +119,7 @@ which takes quite a bit of time and thought after the "real work" has been
done. When done properly, though, it is time well spent.
-5.4: PATCH FORMATTING
+5.4: PATCH FORMATTING AND CHANGELOGS
So now you have a perfect series of patches for posting, but the work is
not done quite yet. Each patch needs to be formatted into a message which
@@ -146,8 +146,33 @@ that end, each patch will be composed of the following:
- One or more tag lines, with, at a minimum, one Signed-off-by: line from
the author of the patch. Tags will be described in more detail below.
-The above three items should, normally, be the text used when committing
-the change to a revision control system. They are followed by:
+The items above, together, form the changelog for the patch. Writing good
+changelogs is a crucial but often-neglected art; it's worth spending
+another moment discussing this issue. When writing a changelog, you should
+bear in mind that a number of different people will be reading your words.
+These include subsystem maintainers and reviewers who need to decide
+whether the patch should be included, distributors and other maintainers
+trying to decide whether a patch should be backported to other kernels, bug
+hunters wondering whether the patch is responsible for a problem they are
+chasing, users who want to know how the kernel has changed, and more. A
+good changelog conveys the needed information to all of these people in the
+most direct and concise way possible.
+
+To that end, the summary line should describe the effects of and motivation
+for the change as well as possible given the one-line constraint. The
+detailed description can then amplify on those topics and provide any
+needed additional information. If the patch fixes a bug, cite the commit
+which introduced the bug if possible. If a problem is associated with
+specific log or compiler output, include that output to help others
+searching for a solution to the same problem. If the change is meant to
+support other changes coming in later patch, say so. If internal APIs are
+changed, detail those changes and how other developers should respond. In
+general, the more you can put yourself into the shoes of everybody who will
+be reading your changelog, the better that changelog (and the kernel as a
+whole) will be.
+
+Needless to say, the changelog should be the text used when committing the
+change to a revision control system. It will be followed by:
- The patch itself, in the unified ("-u") patch format. Using the "-p"
option to diff will associate function names with changes, making the
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index de491a3e231..ec9ef5d0d7b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -437,3 +437,13 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate
driver but this caused driver conflicts.
Who: Jean Delvare <khali@linux-fr.org>
Krzysztof Helt <krzysztof.h1@wp.pl>
+
+----------------------------
+
+What: CONFIG_X86_OLD_MCE
+When: 2.6.32
+Why: Remove the old legacy 32bit machine check code. This has been
+ superseded by the newer machine check code from the 64bit port,
+ but the old version has been kept around for easier testing. Note this
+ doesn't impact the old P5 and WinChip machine check handlers.
+Who: Andi Kleen <andi@firstfloor.org>
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
new file mode 100644
index 00000000000..ed52af60c2d
--- /dev/null
+++ b/Documentation/filesystems/debugfs.txt
@@ -0,0 +1,158 @@
+Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+
+Debugfs exists as a simple way for kernel developers to make information
+available to user space. Unlike /proc, which is only meant for information
+about a process, or sysfs, which has strict one-value-per-file rules,
+debugfs has no rules at all. Developers can put any information they want
+there. The debugfs filesystem is also intended to not serve as a stable
+ABI to user space; in theory, there are no stability constraints placed on
+files exported there. The real world is not always so simple, though [1];
+even debugfs interfaces are best designed with the idea that they will need
+to be maintained forever.
+
+Debugfs is typically mounted with a command like:
+
+ mount -t debugfs none /sys/kernel/debug
+
+(Or an equivalent /etc/fstab line).
+
+Note that the debugfs API is exported GPL-only to modules.
+
+Code using debugfs should include <linux/debugfs.h>. Then, the first order
+of business will be to create at least one directory to hold a set of
+debugfs files:
+
+ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+This call, if successful, will make a directory called name underneath the
+indicated parent directory. If parent is NULL, the directory will be
+created in the debugfs root. On success, the return value is a struct
+dentry pointer which can be used to create files in the directory (and to
+clean it up at the end). A NULL return value indicates that something went
+wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the
+kernel has been built without debugfs support and none of the functions
+described below will work.
+
+The most general way to create a file within a debugfs directory is with:
+
+ struct dentry *debugfs_create_file(const char *name, mode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops);
+
+Here, name is the name of the file to create, mode describes the access
+permissions the file should have, parent indicates the directory which
+should hold the file, data will be stored in the i_private field of the
+resulting inode structure, and fops is a set of file operations which
+implement the file's behavior. At a minimum, the read() and/or write()
+operations should be provided; others can be included as needed. Again,
+the return value will be a dentry pointer to the created file, NULL for
+error, or ERR_PTR(-ENODEV) if debugfs support is missing.
+
+In a number of cases, the creation of a set of file operations is not
+actually necessary; the debugfs code provides a number of helper functions
+for simple situations. Files containing a single integer value can be
+created with any of:
+
+ struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+ struct dentry *parent, u8 *value);
+ struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+ struct dentry *parent, u16 *value);
+ struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+ struct dentry *parent, u32 *value);
+ struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+ struct dentry *parent, u64 *value);
+
+These files support both reading and writing the given value; if a specific
+file should not be written to, simply set the mode bits accordingly. The
+values in these files are in decimal; if hexadecimal is more appropriate,
+the following functions can be used instead:
+
+ struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+ struct dentry *parent, u8 *value);
+ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+ struct dentry *parent, u16 *value);
+ struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+ struct dentry *parent, u32 *value);
+
+Note that there is no debugfs_create_x64().
+
+These functions are useful as long as the developer knows the size of the
+value to be exported. Some types can have different widths on different
+architectures, though, complicating the situation somewhat. There is a
+function meant to help out in one special case:
+
+ struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+ struct dentry *parent,
+ size_t *value);
+
+As might be expected, this function will create a debugfs file to represent
+a variable of type size_t.
+
+Boolean values can be placed in debugfs with:
+
+ struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+ struct dentry *parent, u32 *value);
+
+A read on the resulting file will yield either Y (for non-zero values) or
+N, followed by a newline. If written to, it will accept either upper- or
+lower-case values, or 1 or 0. Any other input will be silently ignored.
+
+Finally, a block of arbitrary binary data can be exported with:
+
+ struct debugfs_blob_wrapper {
+ void *data;
+ unsigned long size;
+ };
+
+ struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+ struct dentry *parent,
+ struct debugfs_blob_wrapper *blob);
+
+A read of this file will return the data pointed to by the
+debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way
+to return several lines of (static) formatted text output. This function
+can be used to export binary information, but there does not appear to be
+any code which does so in the mainline. Note that all files created with
+debugfs_create_blob() are read-only.
+
+There are a couple of other directory-oriented helper functions:
+
+ struct dentry *debugfs_rename(struct dentry *old_dir,
+ struct dentry *old_dentry,
+ struct dentry *new_dir,
+ const char *new_name);
+
+ struct dentry *debugfs_create_symlink(const char *name,
+ struct dentry *parent,
+ const char *target);
+
+A call to debugfs_rename() will give a new name to an existing debugfs
+file, possibly in a different directory. The new_name must not exist prior
+to the call; the return value is old_dentry with updated information.
+Symbolic links can be created with debugfs_create_symlink().
+
+There is one important thing that all debugfs users must take into account:
+there is no automatic cleanup of any directories created in debugfs. If a
+module is unloaded without explicitly removing debugfs entries, the result
+will be a lot of stale pointers and no end of highly antisocial behavior.
+So all debugfs users - at least those which can be built as modules - must
+be prepared to remove all files and directories they create there. A file
+can be removed with:
+
+ void debugfs_remove(struct dentry *dentry);
+
+The dentry value can be NULL, in which case nothing will be removed.
+
+Once upon a time, debugfs users were required to remember the dentry
+pointer for every debugfs file they created so that all files could be
+cleaned up. We live in more civilized times now, though, and debugfs users
+can call:
+
+ void debugfs_remove_recursive(struct dentry *dentry);
+
+If this function is passed a pointer for the dentry corresponding to the
+top-level directory, the entire hierarchy below that directory will be
+removed.
+
+Notes:
+ [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 2db5893d6c9..29a6ff8bc7d 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here.
Machine check
- mce=off disable machine check
- mce=bootlog Enable logging of machine checks left over from booting.
- Disabled by default on AMD because some BIOS leave bogus ones.
- If your BIOS doesn't do that it's a good idea to enable though
- to make sure you log even machine check events that result
- in a reboot. On Intel systems it is enabled by default.
+ Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+
+ mce=off
+ Disable machine check
+ mce=no_cmci
+ Disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your hardware
+ is misbehaving.
+ Note that you'll get more problems without CMCI than with
+ due to the shared banks, i.e. you might get duplicated
+ error logs.
+ mce=dont_log_ce
+ Don't make logs for corrected errors. All events reported
+ as corrected are silently cleared by OS.
+ This option will be useful if you have no interest in any
+ of corrected errors.
+ mce=ignore_ce
+ Disable features for corrected errors, e.g. polling timer
+ and CMCI. All events reported as corrected are not cleared
+ by OS and remained in its error banks.
+ Usually this disablement is not recommended, however if
+ there is an agent checking/clearing corrected errors
+ (e.g. BIOS or hardware monitoring applications), conflicting
+ with OS's error handling, and you cannot deactivate the agent,
+ then this option will be a help.
+ mce=bootlog
+ Enable logging of machine checks left over from booting.
+ Disabled by default on AMD because some BIOS leave bogus ones.
+ If your BIOS doesn't do that it's a good idea to enable though
+ to make sure you log even machine check events that result
+ in a reboot. On Intel systems it is enabled by default.
mce=nobootlog
Disable boot machine check logging.
- mce=tolerancelevel (number)
+ mce=tolerancelevel[,monarchtimeout] (number,number)
+ tolerance levels:
0: always panic on uncorrected errors, log corrected errors
1: panic or SIGBUS on uncorrected errors, log corrected errors
2: SIGBUS or log uncorrected errors, log corrected errors
3: never panic or SIGBUS, log all errors (for testing only)
Default is 1
Can be also set using sysfs which is preferable.
+ monarchtimeout:
+ Sets the time in us to wait for other CPUs on machine checks. 0
+ to disable.
nomce (for compatibility with i386): same as mce=off
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a05e58e7b15..b1fb3027328 100644
--- a/Documentation/x86/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
@@ -41,7 +41,9 @@ check_interval
the polling interval. When the poller stops finding MCEs, it
triggers an exponential backoff (poll less often) on the polling
interval. The check_interval variable is both the initial and
- maximum polling interval.
+ maximum polling interval. 0 means no polling for corrected machine
+ check errors (but some corrected errors might be still reported
+ in other ways)
tolerant
Tolerance level. When a machine check exception occurs for a non
@@ -67,6 +69,10 @@ trigger
Program to run when a machine check event is detected.
This is an alternative to running mcelog regularly from cron
and allows to detect events faster.
+monarch_timeout
+ How long to wait for the other CPUs to machine check too on a
+ exception. 0 to disable waiting for other CPUs.
+ Unit: us
TBD document entries for AMD threshold interrupt configuration
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68f5578fe38..356d2ec8e2f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -789,10 +789,26 @@ config X86_MCE
to disable it. MCE support simply ignores non-MCE processors like
the 386 and 486, so nearly everyone can say Y here.
+config X86_OLD_MCE
+ depends on X86_32 && X86_MCE
+ bool "Use legacy machine check code (will go away)"
+ default n
+ select X86_ANCIENT_MCE
+ ---help---
+ Use the old i386 machine check code. This is merely intended for
+ testing in a transition period. Try this if you run into any machine
+ check related software problems, but report the problem to
+ linux-kernel. When in doubt say no.
+
+config X86_NEW_MCE
+ depends on X86_MCE
+ bool
+ default y if (!X86_OLD_MCE && X86_32) || X86_64
+
config X86_MCE_INTEL
def_bool y
prompt "Intel MCE features"
- depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+ depends on X86_NEW_MCE && X86_LOCAL_APIC
---help---
Additional support for intel specific MCE features such as
the thermal monitor.
@@ -800,19 +816,36 @@ config X86_MCE_INTEL
config X86_MCE_AMD
def_bool y
prompt "AMD MCE features"
- depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+ depends on X86_NEW_MCE && X86_LOCAL_APIC
---help---
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
+config X86_ANCIENT_MCE
+ def_bool n
+ depends on X86_32
+ prompt "Support for old Pentium 5 / WinChip machine checks"
+ ---help---
+ Include support for machine check handling on old Pentium 5 or WinChip
+ systems. These typically need to be enabled explicitely on the command
+ line.
+
config X86_MCE_THRESHOLD
depends on X86_MCE_AMD || X86_MCE_INTEL
bool
default y
+config X86_MCE_INJECT
+ depends on X86_NEW_MCE
+ tristate "Machine check injector support"
+ ---help---
+ Provide support for injecting machine checks for testing purposes.
+ If you don't know what a machine check is and you don't do kernel
+ QA it is safe to say n.
+
config X86_MCE_NONFATAL
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
- depends on X86_32 && X86_MCE
+ depends on X86_OLD_MCE
---help---
Enabling this feature starts a timer that triggers every 5 seconds which
will look at the machine check registers to see if anything happened.
@@ -825,11 +858,15 @@ config X86_MCE_NONFATAL
config X86_MCE_P4THERMAL
bool "check for P4 thermal throttling interrupt."
- depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
+ depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
---help---
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
+config X86_THERMAL_VECTOR
+ def_bool y
+ depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+
config VM86
bool "Enable VM86 support" if EMBEDDED
default y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index ebe7deedd5b..cfb0010fa94 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,8 @@
# Arch-specific CryptoAPI modules.
#
+obj-$(CONFIG_CRYPTO_FPU) += fpu.o
+
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 02af0af6549..4e663398f77 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,6 +21,22 @@
#include <asm/i387.h>
#include <asm/aes.h>
+#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
+#define HAS_CTR
+#endif
+
+#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
+#define HAS_LRW
+#endif
+
+#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
+#define HAS_PCBC
+#endif
+
+#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
+#define HAS_XTS
+#endif
+
struct async_aes_ctx {
struct cryptd_ablkcipher *cryptd_tfm;
};
@@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg = {
}
};
+static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ aesni_enc(ctx, dst, src);
+}
+
+static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
+
+ aesni_dec(ctx, dst, src);
+}
+
+static struct crypto_alg __aesni_alg = {
+ .cra_name = "__aes-aesni",
+ .cra_driver_name = "__driver-aes-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = aes_set_key,
+ .cia_encrypt = __aes_encrypt,
+ .cia_decrypt = __aes_decrypt
+ }
+ }
+};
+
static int ecb_encrypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
@@ -277,8 +328,16 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+ struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+ int err;
- return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
+ crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+ & CRYPTO_TFM_REQ_MASK);
+ err = crypto_ablkcipher_setkey(child, key, key_len);
+ crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+ & CRYPTO_TFM_RES_MASK);
+ return err;
}
static int ablk_encrypt(struct ablkcipher_request *req)
@@ -411,6 +470,163 @@ static struct crypto_alg ablk_cbc_alg = {
},
};
+#ifdef HAS_CTR
+static int ablk_ctr_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
+ 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_ctr_alg = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
+ .cra_init = ablk_ctr_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ .geniv = "chainiv",
+ },
+ },
+};
+#endif
+
+#ifdef HAS_LRW
+static int ablk_lrw_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))",
+ 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_lrw_alg = {
+ .cra_name = "lrw(aes)",
+ .cra_driver_name = "lrw-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
+ .cra_init = ablk_lrw_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+#endif
+
+#ifdef HAS_PCBC
+static int ablk_pcbc_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))",
+ 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_pcbc_alg = {
+ .cra_name = "pcbc(aes)",
+ .cra_driver_name = "pcbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list),
+ .cra_init = ablk_pcbc_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+#endif
+
+#ifdef HAS_XTS
+static int ablk_xts_init(struct crypto_tfm *tfm)
+{
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))",
+ 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ablk_init_common(tfm, cryptd_tfm);
+ return 0;
+}
+
+static struct crypto_alg ablk_xts_alg = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list),
+ .cra_init = ablk_xts_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+};
+#endif
+
static int __init aesni_init(void)
{
int err;
@@ -421,6 +637,8 @@ static int __init aesni_init(void)
}
if ((err = crypto_register_alg(&aesni_alg)))
goto aes_err;
+ if ((err = crypto_register_alg(&__aesni_alg)))
+ goto __aes_err;
if ((err = crypto_register_alg(&blk_ecb_alg)))
goto blk_ecb_err;
if ((err = crypto_register_alg(&blk_cbc_alg)))
@@ -429,9 +647,41 @@ static int __init aesni_init(void)
goto ablk_ecb_err;
if ((err = crypto_register_alg(&ablk_cbc_alg)))
goto ablk_cbc_err;
+#ifdef HAS_CTR
+ if ((err = crypto_register_alg(&ablk_ctr_alg)))
+ goto ablk_ctr_err;
+#endif
+#ifdef HAS_LRW
+ if ((err = crypto_register_alg(&ablk_lrw_alg)))
+ goto ablk_lrw_err;
+#endif
+#ifdef HAS_PCBC
+ if ((err = crypto_register_alg(&ablk_pcbc_alg)))
+ goto ablk_pcbc_err;
+#endif
+#ifdef HAS_XTS
+ if ((err = crypto_register_alg(&ablk_xts_alg)))
+ goto ablk_xts_err;
+#endif
return err;
+#ifdef HAS_XTS
+ablk_xts_err:
+#endif
+#ifdef HAS_PCBC
+ crypto_unregister_alg(&ablk_pcbc_alg);
+ablk_pcbc_err:
+#endif
+#ifdef HAS_LRW
+ crypto_unregister_alg(&ablk_lrw_alg);
+ablk_lrw_err:
+#endif
+#ifdef HAS_CTR
+ crypto_unregister_alg(&ablk_ctr_alg);
+ablk_ctr_err:
+#endif
+ crypto_unregister_alg(&ablk_cbc_alg);
ablk_cbc_err:
crypto_unregister_alg(&ablk_ecb_alg);
ablk_ecb_err:
@@ -439,6 +689,8 @@ ablk_ecb_err:
blk_cbc_err:
crypto_unregister_alg(&blk_ecb_alg);
blk_ecb_err:
+ crypto_unregister_alg(&__aesni_alg);
+__aes_err:
crypto_unregister_alg(&aesni_alg);
aes_err:
return err;
@@ -446,10 +698,23 @@ aes_err:
static void __exit aesni_exit(void)
{
+#ifdef HAS_XTS
+ crypto_unregister_alg(&ablk_xts_alg);
+#endif
+#ifdef HAS_PCBC
+ crypto_unregister_alg(&ablk_pcbc_alg);
+#endif
+#ifdef HAS_LRW
+ crypto_unregister_alg(&ablk_lrw_alg);
+#endif
+#ifdef HAS_CTR
+ crypto_unregister_alg(&ablk_ctr_alg);
+#endif
crypto_unregister_alg(&ablk_cbc_alg);
crypto_unregister_alg(&ablk_ecb_alg);
crypto_unregister_alg(&blk_cbc_alg);
crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_alg(&__aesni_alg);
crypto_unregister_alg(&aesni_alg);
}
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
new file mode 100644
index 00000000000..5f9781a3815
--- /dev/null
+++ b/arch/x86/crypto/fpu.c
@@ -0,0 +1,166 @@
+/*
+ * FPU: Wrapper for blkcipher touching fpu
+ *
+ * Copyright (c) Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/i387.h>
+
+struct crypto_fpu_ctx {
+ struct crypto_blkcipher *child;
+};
+
+static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+ unsigned int keylen)
+{
+ struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
+ struct crypto_blkcipher *child = ctx->child;
+ int err;
+
+ crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_blkcipher_setkey(child, key, keylen);
+ crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
+ CRYPTO_TFM_RES_MASK);
+ return err;
+}
+
+static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ int err;
+ struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
+ struct crypto_blkcipher *child = ctx->child;
+ struct blkcipher_desc desc = {
+ .tfm = child,
+ .info = desc_in->info,
+ .flags = desc_in->flags,
+ };
+
+ kernel_fpu_begin();
+ err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
+ kernel_fpu_end();
+ return err;
+}
+
+static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
+ struct scatterlist *dst, struct scatterlist *src,
+ unsigned int nbytes)
+{
+ int err;
+ struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
+ struct crypto_blkcipher *child = ctx->child;
+ struct blkcipher_desc desc = {
+ .tfm = child,
+ .info = desc_in->info,
+ .flags = desc_in->flags,
+ };
+
+ kernel_fpu_begin();
+ err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
+ kernel_fpu_end();
+ return err;
+}
+
+static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+{
+ struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+ struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+ struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_blkcipher *cipher;
+
+ cipher = crypto_spawn_blkcipher(spawn);
+ if (IS_ERR(cipher))
+ return PTR_ERR(cipher);
+
+ ctx->child = cipher;
+ return 0;
+}
+
+static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
+ crypto_free_blkcipher(ctx->child);
+}
+
+static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+{
+ struct crypto_instance *inst;
+ struct crypto_alg *alg;
+ int err;
+
+ err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+ if (err)
+ return ERR_PTR(err);
+
+ alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
+ CRYPTO_ALG_TYPE_MASK);
+ if (IS_ERR(alg))
+ return ERR_CAST(alg);
+
+ inst = crypto_alloc_instance("fpu", alg);
+ if (IS_ERR(inst))
+ goto out_put_alg;
+
+ inst->alg.cra_flags = alg->cra_flags;
+ inst->alg.cra_priority = alg->cra_priority;
+ inst->alg.cra_blocksize = alg->cra_blocksize;
+ inst->alg.cra_alignmask = alg->cra_alignmask;
+ inst->alg.cra_type = alg->cra_type;
+ inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
+ inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
+ inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
+ inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
+ inst->alg.cra_init = crypto_fpu_init_tfm;
+ inst->alg.cra_exit = crypto_fpu_exit_tfm;
+ inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
+ inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
+ inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
+
+out_put_alg:
+ crypto_mod_put(alg);
+ return inst;
+}
+
+static void crypto_fpu_free(struct crypto_instance *inst)
+{
+ crypto_drop_spawn(crypto_instance_ctx(inst));
+ kfree(inst);
+}
+
+static struct crypto_template crypto_fpu_tmpl = {
+ .name = "fpu",
+ .alloc = crypto_fpu_alloc,
+ .free = crypto_fpu_free,
+ .module = THIS_MODULE,
+};
+
+static int __init crypto_fpu_module_init(void)
+{
+ return crypto_register_template(&crypto_fpu_tmpl);
+}
+
+static void __exit crypto_fpu_module_exit(void)
+{
+ crypto_unregister_template(&crypto_fpu_tmpl);
+}
+
+module_init(crypto_fpu_module_init);
+module_exit(crypto_fpu_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index d750a10ccad..ff8cbfa0785 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
smp_invalidate_interrupt)
@@ -52,8 +53,16 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_THERMAL_VECTOR
BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
+BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
+#endif
+
+#ifdef CONFIG_X86_NEW_MCE
+BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
+#endif
+
#endif
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 9ebc5c25503..82e3e8f0104 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,7 +22,7 @@ typedef struct {
#endif
#ifdef CONFIG_X86_MCE
unsigned int irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count;
# endif
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 6df45f63966..ba180d93b08 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -34,6 +34,7 @@ extern void perf_pending_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);
+extern void mce_self_interrupt(void);
extern void invalidate_interrupt(void);
extern void invalidate_interrupt0(void);
@@ -46,6 +47,7 @@ extern void invalidate_interrupt6(void);
extern void invalidate_interrupt7(void);
extern void irq_move_cleanup_interrupt(void);
+extern void reboot_interrupt(void);
extern void threshold_interrupt(void);
extern void call_function_interrupt(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e997be98c9b..5b21f0ec3df 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -25,6 +25,7 @@
*/
#define NMI_VECTOR 0x02
+#define MCE_VECTOR 0x12
/*
* IDT vectors usable for external interrupt sources start
@@ -87,13 +88,8 @@
#define CALL_FUNCTION_VECTOR 0xfc
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
-
-#ifdef CONFIG_X86_32
-/* 0xf8 - 0xf9 : free */
-#else
-# define THRESHOLD_APIC_VECTOR 0xf9
-# define UV_BAU_MESSAGE 0xf8
-#endif
+#define THRESHOLD_APIC_VECTOR 0xf9
+#define REBOOT_VECTOR 0xf8
/* f0-f7 used for spreading out TLB flushes: */
#define INVALIDATE_TLB_VECTOR_END 0xf7
@@ -117,6 +113,13 @@
*/
#define LOCAL_PENDING_VECTOR 0xec
+#define UV_BAU_MESSAGE 0xec
+
+/*
+ * Self IPI vector for machine checks
+ */
+#define MCE_SELF_VECTOR 0xeb
+
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4f8c199584e..540a466e50f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -1,8 +1,6 @@
#ifndef _ASM_X86_MCE_H
#define _ASM_X86_MCE_H
-#ifdef __x86_64__
-
#include <linux/types.h>
#include <asm/ioctls.h>
@@ -10,21 +8,35 @@
* Machine Check support for x86
*/
-#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
-#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
-#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
-
-#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */
-#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */
-#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */
-
-#define MCI_STATUS_VAL (1UL<<63) /* valid error */
-#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */
-#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */
-#define MCI_STATUS_EN (1UL<<60) /* error enabled */
-#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */
-#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */
-#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */
+#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
+#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
+#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
+#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT 16
+#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
+#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+
+#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
+#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+
+/* MISC register defines */
+#define MCM_ADDR_SEGOFF 0 /* segment offset */
+#define MCM_ADDR_LINEAR 1 /* linear address */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCM_ADDR_MEM 3 /* memory address */
+#define MCM_ADDR_GENERIC 7 /* generic */
/* Fields are zero when not available */
struct mce {
@@ -34,13 +46,19 @@ struct mce {
__u64 mcgstatus;
__u64 ip;
__u64 tsc; /* cpu time stamp counter */
- __u64 res1; /* for future extension */
- __u64 res2; /* dito. */
+ __u64 time; /* wall time_t when error was detected */
+ __u8 cpuvendor; /* cpu vendor as encoded in system.h */
+ __u8 pad1;
+ __u16 pad2;
+ __u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */
__u8 bank; /* machine check bank */
- __u8 cpu; /* cpu that raised the error */
+ __u8 cpu; /* cpu number; obsolete; use extcpu now */
__u8 finished; /* entry is valid */
- __u32 pad;
+ __u32 extcpu; /* linux cpu number that detected the error */
+ __u32 socketid; /* CPU socket ID */
+ __u32 apicid; /* CPU initial apic ID */
+ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
};
/*
@@ -57,7 +75,7 @@ struct mce_log {
unsigned len; /* = MCE_LOG_LEN */
unsigned next;
unsigned flags;
- unsigned pad0;
+ unsigned recordlen; /* length of struct mce */
struct mce entry[MCE_LOG_LEN];
};
@@ -82,19 +100,16 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
-#endif /* __x86_64__ */
-
#ifdef __KERNEL__
-#ifdef CONFIG_X86_32
extern int mce_disabled;
-#else /* CONFIG_X86_32 */
#include <asm/atomic.h>
+#include <linux/percpu.h>
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, device_mce);
+DECLARE_PER_CPU(struct sys_device, mce_dev);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/*
@@ -104,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
#ifdef CONFIG_X86_MCE_INTEL
+extern int mce_cmci_disabled;
+extern int mce_ignore_ce;
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
@@ -123,13 +140,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
-extern int mce_available(struct cpuinfo_x86 *c);
+int mce_available(struct cpuinfo_x86 *c);
+
+DECLARE_PER_CPU(unsigned, mce_exception_count);
+DECLARE_PER_CPU(unsigned, mce_poll_count);
void mce_log_therm_throt_event(__u64 status);
extern atomic_t mce_entry;
-extern void do_machine_check(struct pt_regs *, long);
+void do_machine_check(struct pt_regs *, long);
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
@@ -139,14 +159,16 @@ enum mcp_flags {
MCP_UC = (1 << 1), /* log uncorrected errors */
MCP_DONTLOG = (1 << 2), /* only clear, don't log */
};
-extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-extern int mce_notify_user(void);
+int mce_notify_irq(void);
+void mce_notify_process(void);
-#endif /* !CONFIG_X86_32 */
+DECLARE_PER_CPU(struct mce, injectm);
+extern struct file_operations mce_chrdev_ops;
#ifdef CONFIG_X86_MCE
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
#else
#define mcheck_init(c) do { } while (0)
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d58d04fca8..1692fb5050e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -207,7 +207,14 @@
#define MSR_IA32_THERM_CONTROL 0x0000019a
#define MSR_IA32_THERM_INTERRUPT 0x0000019b
+
+#define THERM_INT_LOW_ENABLE (1 << 0)
+#define THERM_INT_HIGH_ENABLE (1 << 1)
+
#define MSR_IA32_THERM_STATUS 0x0000019c
+
+#define THERM_STATUS_PROCHOT (1 << 0)
+
#define MSR_IA32_MISC_ENABLE 0x000001a0
/* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 076d3881f3d..8c7c042ecad 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -899,7 +899,7 @@ void clear_local_APIC(void)
}
/* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
if (maxlvt >= 5) {
v = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#ifdef CONFIG_X86_THERMAL_VECTOR
if (maxlvt >= 5)
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
#endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index a691302dc3f..b3025b43b63 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
static inline int mce_in_progress(void)
{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#if defined(CONFIG_X86_NEW_MCE)
return atomic_read(&mce_entry) > 0;
#endif
return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index b2f89829bbe..45004faf67e 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,7 +1,11 @@
-obj-y = mce_$(BITS).o therm_throt.o
+obj-y = mce.o therm_throt.o
-obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
-obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
+obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
+obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
+obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
+obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o
+obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
+obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index dd3af6e7b39..89e51042415 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -2,11 +2,10 @@
* Athlon specific Machine Check Exception Reporting
* (C) Copyright 2002 Dave Jones <davej@redhat.com>
*/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
@@ -15,12 +14,12 @@
#include "mce.h"
-/* Machine Check Handler For AMD Athlon/Duron */
+/* Machine Check Handler For AMD Athlon/Duron: */
static void k7_machine_check(struct pt_regs *regs, long error_code)
{
- int recover = 1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
+ int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
for (i = 1; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
- if (high&(1<<31)) {
+ if (high & (1<<31)) {
char misc[20];
char addr[24];
- misc[0] = addr[0] = '\0';
+
+ misc[0] = '\0';
+ addr[0] = '\0';
+
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
high &= ~(1<<31);
+
if (high & (1<<27)) {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
+
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
smp_processor_id(), i, high, low, misc, addr);
- /* Clear it */
+
+ /* Clear it: */
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
- /* Serialize */
+ /* Serialize: */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
}
- if (recover&2)
+ if (recover & 2)
panic("CPU context corrupt");
- if (recover&1)
+ if (recover & 1)
panic("Unable to continue");
+
printk(KERN_EMERG "Attempting to continue.\n");
+
mcgstl &= ~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
-/* AMD K7 machine check is Intel like */
+/* AMD K7 machine check is Intel like: */
void amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
return;
machine_check_vector = k7_machine_check;
+ /* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
printk(KERN_INFO "Intel machine check architecture supported.\n");
+
rdmsr(MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
nr_mce_banks = l & 0xff;
- /* Clear status for MC index 0 separately, we don't touch CTL,
- * as some K7 Athlons cause spurious MCEs when its enabled. */
+ /*
+ * Clear status for MC index 0 separately, we don't touch CTL,
+ * as some K7 Athlons cause spurious MCEs when its enabled:
+ */
if (boot_cpu_data.x86 == 6) {
wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
i = 1;
} else
i = 0;
+
for (; i < nr_mce_banks; i++) {
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
new file mode 100644
index 00000000000..a3a235a53f0
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -0,0 +1,127 @@
+/*
+ * Machine check injection support.
+ * Copyright 2008 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Authors:
+ * Andi Kleen
+ * Ying Huang
+ */
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <asm/mce.h>
+
+/* Update fake mce registers on current CPU. */
+static void inject_mce(struct mce *m)
+{
+ struct mce *i = &per_cpu(injectm, m->extcpu);
+
+ /* Make sure noone reads partially written injectm */
+ i->finished = 0;
+ mb();
+ m->finished = 0;
+ /* First set the fields after finished */
+ i->extcpu = m->extcpu;
+ mb();
+ /* Now write record in order, finished last (except above) */
+ memcpy(i, m, sizeof(struct mce));
+ /* Finally activate it */
+ mb();
+ i->finished = 1;
+}
+
+struct delayed_mce {
+ struct timer_list timer;
+ struct mce m;
+};
+
+/* Inject mce on current CPU */
+static void raise_mce(unsigned long data)
+{
+ struct delayed_mce *dm = (struct delayed_mce *)data;
+ struct mce *m = &dm->m;
+ int cpu = m->extcpu;
+
+ inject_mce(m);
+ if (m->status & MCI_STATUS_UC) {
+ struct pt_regs regs;
+ memset(&regs, 0, sizeof(struct pt_regs));
+ regs.ip = m->ip;
+ regs.cs = m->cs;
+ printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+ do_machine_check(&regs, 0);
+ printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+ } else {
+ mce_banks_t b;
+ memset(&b, 0xff, sizeof(mce_banks_t));
+ printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+ machine_check_poll(0, &b);
+ mce_notify_irq();
+ printk(KERN_INFO "Finished machine check poll on CPU %d\n",
+ cpu);
+ }
+ kfree(dm);
+}
+
+/* Error injection interface */
+static ssize_t mce_write(struct file *filp, const char __user *ubuf,
+ size_t usize, loff_t *off)
+{
+ struct delayed_mce *dm;
+ struct mce m;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /*
+ * There are some cases where real MSR reads could slip
+ * through.
+ */
+ if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
+ return -EIO;
+
+ if ((unsigned long)usize > sizeof(struct mce))
+ usize = sizeof(struct mce);
+ if (copy_from_user(&m, ubuf, usize))
+ return -EFAULT;
+
+ if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
+ return -EINVAL;
+
+ dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
+ if (!dm)
+ return -ENOMEM;
+
+ /*
+ * Need to give user space some time to set everything up,
+ * so do it a jiffie or two later everywhere.
+ * Should we use a hrtimer here for better synchronization?
+ */
+ memcpy(&dm->m, &m, sizeof(struct mce));
+ setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
+ dm->timer.expires = jiffies + 2;
+ add_timer_on(&dm->timer, m.extcpu);
+ return usize;
+}
+
+static int inject_init(void)
+{
+ printk(KERN_INFO "Machine check injector initialized\n");
+ mce_chrdev_ops.write = mce_write;
+ return 0;
+}
+
+module_init(inject_init);
+/*
+ * Cannot tolerate unloading currently because we cannot
+ * guarantee all openers of mce_chrdev will get a reference to us.
+ */
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
new file mode 100644
index 00000000000..54dcb8ff12e
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -0,0 +1,15 @@
+#include <asm/mce.h>
+
+enum severity_level {
+ MCE_NO_SEVERITY,
+ MCE_KEEP_SEVERITY,
+ MCE_SOME_SEVERITY,
+ MCE_AO_SEVERITY,
+ MCE_UC_SEVERITY,
+ MCE_AR_SEVERITY,
+ MCE_PANIC_SEVERITY,
+};
+
+int mce_severity(struct mce *a, int tolerant, char **msg);
+
+extern int mce_ser;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
new file mode 100644
index 00000000000..ff0807f9705
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -0,0 +1,218 @@
+/*
+ * MCE grading rules.
+ * Copyright 2008, 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Author: Andi Kleen
+ */
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <asm/mce.h>
+
+#include "mce-internal.h"
+
+/*
+ * Grade an mce by severity. In general the most severe ones are processed
+ * first. Since there are quite a lot of combinations test the bits in a
+ * table-driven way. The rules are simply processed in order, first
+ * match wins.
+ *
+ * Note this is only used for machine check exceptions, the corrected
+ * errors use much simpler rules. The exceptions still check for the corrected
+ * errors, but only to leave them alone for the CMCI handler (except for
+ * panic situations)
+ */
+
+enum context { IN_KERNEL = 1, IN_USER = 2 };
+enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+
+static struct severity {
+ u64 mask;
+ u64 result;
+ unsigned char sev;
+ unsigned char mcgmask;
+ unsigned char mcgres;
+ unsigned char ser;
+ unsigned char context;
+ unsigned char covered;
+ char *msg;
+} severities[] = {
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
+#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
+#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
+#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
+#define MCGMASK(x, res, s, m, r...) \
+ { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
+#define MASK(x, y, s, m, r...) \
+ { .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
+#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCACOD 0xffff
+
+ BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
+ BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
+ BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
+ /* When MCIP is not set something is very confused */
+ MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+ /* Neither return not error IP -- no chance to recover -> PANIC */
+ MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
+ "Neither restart nor error IP"),
+ MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
+ KERNEL),
+ BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+ MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
+ "Spurious not enabled", SER),
+
+ /* ignore OVER for UCNA */
+ MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
+ "Uncorrected no action required", SER),
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
+ "Illegal combination (UCNA with AR=1)", SER),
+ MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+
+ /* AR add known MCACODs here */
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
+ "Action required with lost events", SER),
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+ "Action required; unknown MCACOD", SER),
+
+ /* known AO MCACODs: */
+ MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
+ "Action optional: memory scrubbing error", SER),
+ MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
+ "Action optional: last level cache writeback error", SER),
+
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
+ "Action optional unknown MCACOD", SER),
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
+ "Action optional with lost events", SER),
+ BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
+ BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
+ BITSET(0, SOME, "No match") /* always matches. keep at end */
+};
+
+/*
+ * If the EIPV bit is set, it means the saved IP is the
+ * instruction which caused the MCE.
+ */
+static int error_context(struct mce *m)
+{
+ if (m->mcgstatus & MCG_STATUS_EIPV)
+ return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
+ /* Unknown, assume kernel */
+ return IN_KERNEL;
+}
+
+int mce_severity(struct mce *a, int tolerant, char **msg)
+{
+ enum context ctx = error_context(a);
+ struct severity *s;
+
+ for (s = severities;; s++) {
+ if ((a->status & s->mask) != s->result)
+ continue;
+ if ((a->mcgstatus & s->mcgmask) != s->mcgres)
+ continue;
+ if (s->ser == SER_REQUIRED && !mce_ser)
+ continue;
+ if (s->ser == NO_SER && mce_ser)
+ continue;
+ if (s->context && ctx != s->context)
+ continue;
+ if (msg)
+ *msg = s->msg;
+ s->covered = 1;
+ if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
+ if (panic_on_oops || tolerant < 1)
+ return MCE_PANIC_SEVERITY;
+ }
+ return s->sev;
+ }
+}
+
+static void *s_start(struct seq_file *f, loff_t *pos)
+{
+ if (*pos >= ARRAY_SIZE(severities))
+ return NULL;
+ return &severities[*pos];
+}
+
+static void *s_next(struct seq_file *f, void *data, loff_t *pos)
+{
+ if (++(*pos) >= ARRAY_SIZE(severities))
+ return NULL;
+ return &severities[*pos];
+}
+
+static void s_stop(struct seq_file *f, void *data)
+{
+}
+
+static int s_show(struct seq_file *f, void *data)
+{
+ struct severity *ser = data;
+ seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
+ return 0;
+}
+
+static const struct seq_operations severities_seq_ops = {
+ .start = s_start,
+ .next = s_next,
+ .stop = s_stop,
+ .show = s_show,
+};
+
+static int severities_coverage_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &severities_seq_ops);
+}
+
+static ssize_t severities_coverage_write(struct file *file,
+ const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(severities); i++)
+ severities[i].covered = 0;
+ return count;
+}
+
+static const struct file_operations severities_coverage_fops = {
+ .open = severities_coverage_open,
+ .release = seq_release,
+ .read = seq_read,
+ .write = severities_coverage_write,
+};
+
+static int __init severities_debugfs_init(void)
+{
+ struct dentry *dmce = NULL, *fseverities_coverage = NULL;
+
+ dmce = debugfs_create_dir("mce", NULL);
+ if (dmce == NULL)
+ goto err_out;
+ fseverities_coverage = debugfs_create_file("severities-coverage",
+ 0444, dmce, NULL,
+ &severities_coverage_fops);
+ if (fseverities_coverage == NULL)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ if (fseverities_coverage)
+ debugfs_remove(fseverities_coverage);
+ if (dmce)
+ debugfs_remove(dmce);
+ return -ENOMEM;
+}
+late_initcall(severities_debugfs_init);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
new file mode 100644
index 00000000000..fabba15e455
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,1964 @@
+/*
+ * Machine check handler.
+ *
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s).
+ * 2004 Andi Kleen. Rewrote most of it.
+ * Copyright 2008 Intel Corporation
+ * Author: Andi Kleen
+ */
+#include <linux/thread_info.h>
+#include <linux/capability.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/ratelimit.h>
+#include <linux/kallsyms.h>
+#include <linux/rcupdate.h>
+#include <linux/kobject.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/string.h>
+#include <linux/sysdev.h>
+#include <linux/delay.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/poll.h>
+#include <linux/nmi.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/idle.h>
+#include <asm/ipi.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+
+#include "mce-internal.h"
+#include "mce.h"
+
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+ printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+ smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+ unexpected_machine_check;
+
+int mce_disabled;
+
+#ifdef CONFIG_X86_NEW_MCE
+
+#define MISC_MCELOG_MINOR 227
+
+#define SPINUNIT 100 /* 100ns */
+
+atomic_t mce_entry;
+
+DEFINE_PER_CPU(unsigned, mce_exception_count);
+
+/*
+ * Tolerant levels:
+ * 0: always panic on uncorrected errors, log corrected errors
+ * 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
+ * 3: never panic or SIGBUS, log all errors (for testing only)
+ */
+static int tolerant = 1;
+static int banks;
+static u64 *bank;
+static unsigned long notify_user;
+static int rip_msr;
+static int mce_bootlog = -1;
+static int monarch_timeout = -1;
+static int mce_panic_timeout;
+static int mce_dont_log_ce;
+int mce_cmci_disabled;
+int mce_ignore_ce;
+int mce_ser;
+
+static char trigger[128];
+static char *trigger_argv[2] = { trigger, NULL };
+
+static unsigned long dont_init_banks;
+
+static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+static DEFINE_PER_CPU(struct mce, mces_seen);
+static int cpu_missing;
+
+
+/* MCA banks polled by the period polling timer for corrected events */
+DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
+ [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
+};
+
+static inline int skip_bank_init(int i)
+{
+ return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
+}
+
+static DEFINE_PER_CPU(struct work_struct, mce_work);
+
+/* Do initial initialization of a struct mce */
+void mce_setup(struct mce *m)
+{
+ memset(m, 0, sizeof(struct mce));
+ m->cpu = m->extcpu = smp_processor_id();
+ rdtscll(m->tsc);
+ /* We hope get_seconds stays lockless */
+ m->time = get_seconds();
+ m->cpuvendor = boot_cpu_data.x86_vendor;
+ m->cpuid = cpuid_eax(1);
+#ifdef CONFIG_SMP
+ m->socketid = cpu_data(m->extcpu).phys_proc_id;
+#endif
+ m->apicid = cpu_data(m->extcpu).initial_apicid;
+ rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
+}
+
+DEFINE_PER_CPU(struct mce, injectm);
+EXPORT_PER_CPU_SYMBOL_GPL(injectm);
+
+/*
+ * Lockless MCE logging infrastructure.
+ * This avoids deadlocks on printk locks without having to break locks. Also
+ * separate MCEs from kernel messages to avoid bogus bug reports.
+ */
+
+static struct mce_log mcelog = {
+ .signature = MCE_LOG_SIGNATURE,
+ .len = MCE_LOG_LEN,
+ .recordlen = sizeof(struct mce),
+};
+
+void mce_log(struct mce *mce)
+{
+ unsigned next, entry;
+
+ mce->finished = 0;
+ wmb();
+ for (;;) {
+ entry = rcu_dereference(mcelog.next);
+ for (;;) {
+ /*
+ * When the buffer fills up discard new entries.
+ * Assume that the earlier errors are the more
+ * interesting ones:
+ */
+ if (entry >= MCE_LOG_LEN) {
+ set_bit(MCE_OVERFLOW,
+ (unsigned long *)&mcelog.flags);
+ return;
+ }
+ /* Old left over entry. Skip: */
+ if (mcelog.entry[entry].finished) {
+ entry++;
+ continue;
+ }
+ break;
+ }
+ smp_rmb();
+ next = entry + 1;
+ if (cmpxchg(&mcelog.next, entry, next) == entry)
+ break;
+ }
+ memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
+ wmb();
+ mcelog.entry[entry].finished = 1;
+ wmb();
+
+ mce->finished = 1;
+ set_bit(0, &notify_user);
+}
+
+static void print_mce(struct mce *m)
+{
+ printk(KERN_EMERG
+ "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+ m->extcpu, m->mcgstatus, m->bank, m->status);
+ if (m->ip) {
+ printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
+ !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+ m->cs, m->ip);
+ if (m->cs == __KERNEL_CS)
+ print_symbol("{%s}", m->ip);
+ printk("\n");
+ }
+ printk(KERN_EMERG "TSC %llx ", m->tsc);
+ if (m->addr)
+ printk("ADDR %llx ", m->addr);
+ if (m->misc)
+ printk("MISC %llx ", m->misc);
+ printk("\n");
+ printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid,
+ m->apicid);
+}
+
+static void print_mce_head(void)
+{
+ printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n");
+}
+
+static void print_mce_tail(void)
+{
+ printk(KERN_EMERG "This is not a software problem!\n"
+ KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n");
+}
+
+#define PANIC_TIMEOUT 5 /* 5 seconds */
+
+static atomic_t mce_paniced;
+
+/* Panic in progress. Enable interrupts and wait for final IPI */
+static void wait_for_panic(void)
+{
+ long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+ preempt_disable();
+ local_irq_enable();
+ while (timeout-- > 0)
+ udelay(1);
+ if (panic_timeout == 0)
+ panic_timeout = mce_panic_timeout;
+ panic("Panicing machine check CPU died");
+}
+
+static void mce_panic(char *msg, struct mce *final, char *exp)
+{
+ int i;
+
+ /*
+ * Make sure only one CPU runs in machine check panic
+ */
+ if (atomic_add_return(1, &mce_paniced) > 1)
+ wait_for_panic();
+ barrier();
+
+ bust_spinlocks(1);
+ console_verbose();
+ print_mce_head();
+ /* First print corrected ones that are still unlogged */
+ for (i = 0; i < MCE_LOG_LEN; i++) {
+ struct mce *m = &mcelog.entry[i];
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+ if (!(m->status & MCI_STATUS_UC))
+ print_mce(m);
+ }
+ /* Now print uncorrected but with the final one last */
+ for (i = 0; i < MCE_LOG_LEN; i++) {
+ struct mce *m = &mcelog.entry[i];
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+ if (!(m->status & MCI_STATUS_UC))
+ continue;
+ if (!final || memcmp(m, final, sizeof(struct mce)))
+ print_mce(m);
+ }
+ if (final)
+ print_mce(final);
+ if (cpu_missing)
+ printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
+ print_mce_tail();
+ if (exp)
+ printk(KERN_EMERG "Machine check: %s\n", exp);
+ if (panic_timeout == 0)
+ panic_timeout = mce_panic_timeout;
+ panic(msg);
+}
+
+/* Support code for software error injection */
+
+static int msr_to_offset(u32 msr)
+{
+ unsigned bank = __get_cpu_var(injectm.bank);
+ if (msr == rip_msr)
+ return offsetof(struct mce, ip);
+ if (msr == MSR_IA32_MC0_STATUS + bank*4)
+ return offsetof(struct mce, status);
+ if (msr == MSR_IA32_MC0_ADDR + bank*4)
+ return offsetof(struct mce, addr);
+ if (msr == MSR_IA32_MC0_MISC + bank*4)
+ return offsetof(struct mce, misc);
+ if (msr == MSR_IA32_MCG_STATUS)
+ return offsetof(struct mce, mcgstatus);
+ return -1;
+}
+
+/* MSR access wrappers used for error injection */
+static u64 mce_rdmsrl(u32 msr)
+{
+ u64 v;
+ if (__get_cpu_var(injectm).finished) {
+ int offset = msr_to_offset(msr);
+ if (offset < 0)
+ return 0;
+ return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
+ }
+ rdmsrl(msr, v);
+ return v;
+}
+
+static void mce_wrmsrl(u32 msr, u64 v)
+{
+ if (__get_cpu_var(injectm).finished) {
+ int offset = msr_to_offset(msr);
+ if (offset >= 0)
+ *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
+ return;
+ }
+ wrmsrl(msr, v);
+}
+
+/*
+ * Simple lockless ring to communicate PFNs from the exception handler with the
+ * process context work function. This is vastly simplified because there's
+ * only a single reader and a single writer.
+ */
+#define MCE_RING_SIZE 16 /* we use one entry less */
+
+struct mce_ring {
+ unsigned short start;
+ unsigned short end;
+ unsigned long ring[MCE_RING_SIZE];
+};
+static DEFINE_PER_CPU(struct mce_ring, mce_ring);
+
+/* Runs with CPU affinity in workqueue */
+static int mce_ring_empty(void)
+{
+ struct mce_ring *r = &__get_cpu_var(mce_ring);
+
+ return r->start == r->end;
+}
+
+static int mce_ring_get(unsigned long *pfn)
+{
+ struct mce_ring *r;
+ int ret = 0;
+
+ *pfn = 0;
+ get_cpu();
+ r = &__get_cpu_var(mce_ring);
+ if (r->start == r->end)
+ goto out;
+ *pfn = r->ring[r->start];
+ r->start = (r->start + 1) % MCE_RING_SIZE;
+ ret = 1;
+out:
+ put_cpu();
+ return ret;
+}
+
+/* Always runs in MCE context with preempt off */
+static int mce_ring_add(unsigned long pfn)
+{
+ struct mce_ring *r = &__get_cpu_var(mce_ring);
+ unsigned next;
+
+ next = (r->end + 1) % MCE_RING_SIZE;
+ if (next == r->start)
+ return -1;
+ r->ring[r->end] = pfn;
+ wmb();
+ r->end = next;
+ return 0;
+}
+
+int mce_available(struct cpuinfo_x86 *c)
+{
+ if (mce_disabled)
+ return 0;
+ return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+static void mce_schedule_work(void)
+{
+ if (!mce_ring_empty()) {
+ struct work_struct *work = &__get_cpu_var(mce_work);
+ if (!work_pending(work))
+ schedule_work(work);
+ }
+}
+
+/*
+ * Get the address of the instruction at the time of the machine check
+ * error.
+ */
+static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
+{
+
+ if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+ } else {
+ m->ip = 0;
+ m->cs = 0;
+ }
+ if (rip_msr)
+ m->ip = mce_rdmsrl(rip_msr);
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Called after interrupts have been reenabled again
+ * when a MCE happened during an interrupts off region
+ * in the kernel.
+ */
+asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ exit_idle();
+ irq_enter();
+ mce_notify_irq();
+ mce_schedule_work();
+ irq_exit();
+}
+#endif
+
+static void mce_report_event(struct pt_regs *regs)
+{
+ if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
+ mce_notify_irq();
+ /*
+ * Triggering the work queue here is just an insurance
+ * policy in case the syscall exit notify handler
+ * doesn't run soon enough or ends up running on the
+ * wrong CPU (can happen when audit sleeps)
+ */
+ mce_schedule_work();
+ return;
+ }
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Without APIC do not notify. The event will be picked
+ * up eventually.
+ */
+ if (!cpu_has_apic)
+ return;
+
+ /*
+ * When interrupts are disabled we cannot use
+ * kernel services safely. Trigger an self interrupt
+ * through the APIC to instead do the notification
+ * after interrupts are reenabled again.
+ */
+ apic->send_IPI_self(MCE_SELF_VECTOR);
+
+ /*
+ * Wait for idle afterwards again so that we don't leave the
+ * APIC in a non idle state because the normal APIC writes
+ * cannot exclude us.
+ */
+ apic_wait_icr_idle();
+#endif
+}
+
+DEFINE_PER_CPU(unsigned, mce_poll_count);
+
+/*
+ * Poll for corrected events or events that happened before reset.
+ * Those are just logged through /dev/mcelog.
+ *
+ * This is executed in standard interrupt context.
+ *
+ * Note: spec recommends to panic for fatal unsignalled
+ * errors here. However this would be quite problematic --
+ * we would need to reimplement the Monarch handling and
+ * it would mess up the exclusion between exception handler
+ * and poll hander -- * so we skip this for now.
+ * These cases should not happen anyways, or only when the CPU
+ * is already totally * confused. In this case it's likely it will
+ * not fully execute the machine check handler either.
+ */
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+{
+ struct mce m;
+ int i;
+
+ __get_cpu_var(mce_poll_count)++;
+
+ mce_setup(&m);
+
+ m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ for (i = 0; i < banks; i++) {
+ if (!bank[i] || !test_bit(i, *b))
+ continue;
+
+ m.misc = 0;
+ m.addr = 0;
+ m.bank = i;
+ m.tsc = 0;
+
+ barrier();
+ m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ if (!(m.status & MCI_STATUS_VAL))
+ continue;
+
+ /*
+ * Uncorrected or signalled events are handled by the exception
+ * handler when it is enabled, so don't process those here.
+ *
+ * TBD do the same check for MCI_STATUS_EN here?
+ */
+ if (!(flags & MCP_UC) &&
+ (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
+ continue;
+
+ if (m.status & MCI_STATUS_MISCV)
+ m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+ if (m.status & MCI_STATUS_ADDRV)
+ m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+
+ if (!(flags & MCP_TIMESTAMP))
+ m.tsc = 0;
+ /*
+ * Don't get the IP here because it's unlikely to
+ * have anything to do with the actual error location.
+ */
+ if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
+ mce_log(&m);
+ add_taint(TAINT_MACHINE_CHECK);
+ }
+
+ /*
+ * Clear state for this bank.
+ */
+ mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ }
+
+ /*
+ * Don't clear MCG_STATUS here because it's only defined for
+ * exceptions.
+ */
+
+ sync_core();
+}
+EXPORT_SYMBOL_GPL(machine_check_poll);
+
+/*
+ * Do a quick check if any of the events requires a panic.
+ * This decides if we keep the events around or clear them.
+ */
+static int mce_no_way_out(struct mce *m, char **msg)
+{
+ int i;
+
+ for (i = 0; i < banks; i++) {
+ m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Variable to establish order between CPUs while scanning.
+ * Each CPU spins initially until executing is equal its number.
+ */
+static atomic_t mce_executing;
+
+/*
+ * Defines order of CPUs on entry. First CPU becomes Monarch.
+ */
+static atomic_t mce_callin;
+
+/*
+ * Check if a timeout waiting for other CPUs happened.
+ */
+static int mce_timed_out(u64 *t)
+{
+ /*
+ * The others already did panic for some reason.
+ * Bail out like in a timeout.
+ * rmb() to tell the compiler that system_state
+ * might have been modified by someone else.
+ */
+ rmb();
+ if (atomic_read(&mce_paniced))
+ wait_for_panic();
+ if (!monarch_timeout)
+ goto out;
+ if ((s64)*t < SPINUNIT) {
+ /* CHECKME: Make panic default for 1 too? */
+ if (tolerant < 1)
+ mce_panic("Timeout synchronizing machine check over CPUs",
+ NULL, NULL);
+ cpu_missing = 1;
+ return 1;
+ }
+ *t -= SPINUNIT;
+out:
+ touch_nmi_watchdog();
+ return 0;
+}
+
+/*
+ * The Monarch's reign. The Monarch is the CPU who entered
+ * the machine check handler first. It waits for the others to
+ * raise the exception too and then grades them. When any
+ * error is fatal panic. Only then let the others continue.
+ *
+ * The other CPUs entering the MCE handler will be controlled by the
+ * Monarch. They are called Subjects.
+ *
+ * This way we prevent any potential data corruption in a unrecoverable case
+ * and also makes sure always all CPU's errors are examined.
+ *
+ * Also this detects the case of an machine check event coming from outer
+ * space (not detected by any CPUs) In this case some external agent wants
+ * us to shut down, so panic too.
+ *
+ * The other CPUs might still decide to panic if the handler happens
+ * in a unrecoverable place, but in this case the system is in a semi-stable
+ * state and won't corrupt anything by itself. It's ok to let the others
+ * continue for a bit first.
+ *
+ * All the spin loops have timeouts; when a timeout happens a CPU
+ * typically elects itself to be Monarch.
+ */
+static void mce_reign(void)
+{
+ int cpu;
+ struct mce *m = NULL;
+ int global_worst = 0;
+ char *msg = NULL;
+ char *nmsg = NULL;
+
+ /*
+ * This CPU is the Monarch and the other CPUs have run
+ * through their handlers.
+ * Grade the severity of the errors of all the CPUs.
+ */
+ for_each_possible_cpu(cpu) {
+ int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
+ &nmsg);
+ if (severity > global_worst) {
+ msg = nmsg;
+ global_worst = severity;
+ m = &per_cpu(mces_seen, cpu);
+ }
+ }
+
+ /*
+ * Cannot recover? Panic here then.
+ * This dumps all the mces in the log buffer and stops the
+ * other CPUs.
+ */
+ if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
+ mce_panic("Fatal Machine check", m, msg);
+
+ /*
+ * For UC somewhere we let the CPU who detects it handle it.
+ * Also must let continue the others, otherwise the handling
+ * CPU could deadlock on a lock.
+ */
+
+ /*
+ * No machine check event found. Must be some external
+ * source or one CPU is hung. Panic.
+ */
+ if (!m && tolerant < 3)
+ mce_panic("Machine check from unknown source", NULL, NULL);
+
+ /*
+ * Now clear all the mces_seen so that they don't reappear on
+ * the next mce.
+ */
+ for_each_possible_cpu(cpu)
+ memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
+}
+
+static atomic_t global_nwo;
+
+/*
+ * Start of Monarch synchronization. This waits until all CPUs have
+ * entered the exception handler and then determines if any of them
+ * saw a fatal event that requires panic. Then it executes them
+ * in the entry order.
+ * TBD double check parallel CPU hotunplug
+ */
+static int mce_start(int no_way_out, int *order)
+{
+ int nwo;
+ int cpus = num_online_cpus();
+ u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+
+ if (!timeout) {
+ *order = -1;
+ return no_way_out;
+ }
+
+ atomic_add(no_way_out, &global_nwo);
+
+ /*
+ * Wait for everyone.
+ */
+ while (atomic_read(&mce_callin) != cpus) {
+ if (mce_timed_out(&timeout)) {
+ atomic_set(&global_nwo, 0);
+ *order = -1;
+ return no_way_out;
+ }
+ ndelay(SPINUNIT);
+ }
+
+ /*
+ * Cache the global no_way_out state.
+ */
+ nwo = atomic_read(&global_nwo);
+
+ /*
+ * Monarch starts executing now, the others wait.
+ */
+ if (*order == 1) {
+ atomic_set(&mce_executing, 1);
+ return nwo;
+ }
+
+ /*
+ * Now start the scanning loop one by one
+ * in the original callin order.
+ * This way when there are any shared banks it will
+ * be only seen by one CPU before cleared, avoiding duplicates.
+ */
+ while (atomic_read(&mce_executing) < *order) {
+ if (mce_timed_out(&timeout)) {
+ atomic_set(&global_nwo, 0);
+ *order = -1;
+ return no_way_out;
+ }
+ ndelay(SPINUNIT);
+ }
+ return nwo;
+}
+
+/*
+ * Synchronize between CPUs after main scanning loop.
+ * This invokes the bulk of the Monarch processing.
+ */
+static int mce_end(int order)
+{
+ int ret = -1;
+ u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+
+ if (!timeout)
+ goto reset;
+ if (order < 0)
+ goto reset;
+
+ /*
+ * Allow others to run.
+ */
+ atomic_inc(&mce_executing);
+
+ if (order == 1) {
+ /* CHECKME: Can this race with a parallel hotplug? */
+ int cpus = num_online_cpus();
+
+ /*
+ * Monarch: Wait for everyone to go through their scanning
+ * loops.
+ */
+ while (atomic_read(&mce_executing) <= cpus) {
+ if (mce_timed_out(&timeout))
+ goto reset;
+ ndelay(SPINUNIT);
+ }
+
+ mce_reign();
+ barrier();
+ ret = 0;
+ } else {
+ /*
+ * Subject: Wait for Monarch to finish.
+ */
+ while (atomic_read(&mce_executing) != 0) {
+ if (mce_timed_out(&timeout))
+ goto reset;
+ ndelay(SPINUNIT);
+ }
+
+ /*
+ * Don't reset anything. That's done by the Monarch.
+ */
+ return 0;
+ }
+
+ /*
+ * Reset all global state.
+ */
+reset:
+ atomic_set(&global_nwo, 0);
+ atomic_set(&mce_callin, 0);
+ barrier();
+
+ /*
+ * Let others run again.
+ */
+ atomic_set(&mce_executing, 0);
+ return ret;
+}
+
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses upto page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+ if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
+ return 0;
+ if ((m->misc & 0x3f) > PAGE_SHIFT)
+ return 0;
+ if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
+ return 0;
+ return 1;
+}
+
+static void mce_clear_state(unsigned long *toclear)
+{
+ int i;
+
+ for (i = 0; i < banks; i++) {
+ if (test_bit(i, toclear))
+ mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ }
+}
+
+/*
+ * The actual machine check handler. This only handles real
+ * exceptions when something got corrupted coming in through int 18.
+ *
+ * This is executed in NMI context not subject to normal locking rules. This
+ * implies that most kernel services cannot be safely used. Don't even
+ * think about putting a printk in there!
+ *
+ * On Intel systems this is entered on all CPUs in parallel through
+ * MCE broadcast. However some CPUs might be broken beyond repair,
+ * so be always careful when synchronizing with others.
+ */
+void do_machine_check(struct pt_regs *regs, long error_code)
+{
+ struct mce m, *final;
+ int i;
+ int worst = 0;
+ int severity;
+ /*
+ * Establish sequential order between the CPUs entering the machine
+ * check handler.
+ */
+ int order;
+
+ /*
+ * If no_way_out gets set, there is no safe way to recover from this
+ * MCE. If tolerant is cranked up, we'll try anyway.
+ */
+ int no_way_out = 0;
+ /*
+ * If kill_it gets set, there might be a way to recover from this
+ * error.
+ */
+ int kill_it = 0;
+ DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+ char *msg = "Unknown";
+
+ atomic_inc(&mce_entry);
+
+ __get_cpu_var(mce_exception_count)++;
+
+ if (notify_die(DIE_NMI, "machine check", regs, error_code,
+ 18, SIGKILL) == NOTIFY_STOP)
+ goto out;
+ if (!banks)
+ goto out;
+
+ order = atomic_add_return(1, &mce_callin);
+ mce_setup(&m);
+
+ m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ no_way_out = mce_no_way_out(&m, &msg);
+
+ final = &__get_cpu_var(mces_seen);
+ *final = m;
+
+ barrier();
+
+ /*
+ * When no restart IP must always kill or panic.
+ */
+ if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ kill_it = 1;
+
+ /*
+ * Go through all the banks in exclusion of the other CPUs.
+ * This way we don't report duplicated events on shared banks
+ * because the first one to see it will clear it.
+ */
+ no_way_out = mce_start(no_way_out, &order);
+ for (i = 0; i < banks; i++) {
+ __clear_bit(i, toclear);
+ if (!bank[i])
+ continue;
+
+ m.misc = 0;
+ m.addr = 0;
+ m.bank = i;
+
+ m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ if ((m.status & MCI_STATUS_VAL) == 0)
+ continue;
+
+ /*
+ * Non uncorrected or non signaled errors are handled by
+ * machine_check_poll. Leave them alone, unless this panics.
+ */
+ if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ !no_way_out)
+ continue;
+
+ /*
+ * Set taint even when machine check was not enabled.
+ */
+ add_taint(TAINT_MACHINE_CHECK);
+
+ severity = mce_severity(&m, tolerant, NULL);
+
+ /*
+ * When machine check was for corrected handler don't touch,
+ * unless we're panicing.
+ */
+ if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+ continue;
+ __set_bit(i, toclear);
+ if (severity == MCE_NO_SEVERITY) {
+ /*
+ * Machine check event was not enabled. Clear, but
+ * ignore.
+ */
+ continue;
+ }
+
+ /*
+ * Kill on action required.
+ */
+ if (severity == MCE_AR_SEVERITY)
+ kill_it = 1;
+
+ if (m.status & MCI_STATUS_MISCV)
+ m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+ if (m.status & MCI_STATUS_ADDRV)
+ m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+
+ /*
+ * Action optional error. Queue address for later processing.
+ * When the ring overflows we just ignore the AO error.
+ * RED-PEN add some logging mechanism when
+ * usable_address or mce_add_ring fails.
+ * RED-PEN don't ignore overflow for tolerant == 0
+ */
+ if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
+ mce_ring_add(m.addr >> PAGE_SHIFT);
+
+ mce_get_rip(&m, regs);
+ mce_log(&m);
+
+ if (severity > worst) {
+ *final = m;
+ worst = severity;
+ }
+ }
+
+ if (!no_way_out)
+ mce_clear_state(toclear);
+
+ /*
+ * Do most of the synchronization with other CPUs.
+ * When there's any problem use only local no_way_out state.
+ */
+ if (mce_end(order) < 0)
+ no_way_out = worst >= MCE_PANIC_SEVERITY;
+
+ /*
+ * If we have decided that we just CAN'T continue, and the user
+ * has not set tolerant to an insane level, give up and die.
+ *
+ * This is mainly used in the case when the system doesn't
+ * support MCE broadcasting or it has been disabled.
+ */
+ if (no_way_out && tolerant < 3)
+ mce_panic("Fatal machine check on current CPU", final, msg);
+
+ /*
+ * If the error seems to be unrecoverable, something should be
+ * done. Try to kill as little as possible. If we can kill just
+ * one task, do that. If the user has set the tolerance very
+ * high, don't try to do anything at all.
+ */
+
+ if (kill_it && tolerant < 3)
+ force_sig(SIGBUS, current);
+
+ /* notify userspace ASAP */
+ set_thread_flag(TIF_MCE_NOTIFY);
+
+ if (worst > 0)
+ mce_report_event(regs);
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+out:
+ atomic_dec(&mce_entry);
+ sync_core();
+}
+EXPORT_SYMBOL_GPL(do_machine_check);
+
+/* dummy to break dependency. actual code is in mm/memory-failure.c */
+void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+{
+ printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+}
+
+/*
+ * Called after mce notification in process context. This code
+ * is allowed to sleep. Call the high level VM handler to process
+ * any corrupted pages.
+ * Assume that the work queue code only calls this one at a time
+ * per CPU.
+ * Note we don't disable preemption, so this code might run on the wrong
+ * CPU. In this case the event is picked up by the scheduled work queue.
+ * This is merely a fast path to expedite processing in some common
+ * cases.
+ */
+void mce_notify_process(void)
+{
+ unsigned long pfn;
+ mce_notify_irq();
+ while (mce_ring_get(&pfn))
+ memory_failure(pfn, MCE_VECTOR);
+}
+
+static void mce_process_work(struct work_struct *dummy)
+{
+ mce_notify_process();
+}
+
+#ifdef CONFIG_X86_MCE_INTEL
+/***
+ * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
+ * @cpu: The CPU on which the event occurred.
+ * @status: Event status information
+ *
+ * This function should be called by the thermal interrupt after the
+ * event has been processed and the decision was made to log the event
+ * further.
+ *
+ * The status parameter will be saved to the 'status' field of 'struct mce'
+ * and historically has been the register value of the
+ * MSR_IA32_THERMAL_STATUS (Intel) msr.
+ */
+void mce_log_therm_throt_event(__u64 status)
+{
+ struct mce m;
+
+ mce_setup(&m);
+ m.bank = MCE_THERMAL_BANK;
+ m.status = status;
+ mce_log(&m);
+}
+#endif /* CONFIG_X86_MCE_INTEL */
+
+/*
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll 2x faster. When the poller finds no more
+ * errors, poll 2x slower (up to check_interval seconds).
+ */
+static int check_interval = 5 * 60; /* 5 minutes */
+
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
+static DEFINE_PER_CPU(struct timer_list, mce_timer);
+
+static void mcheck_timer(unsigned long data)
+{
+ struct timer_list *t = &per_cpu(mce_timer, data);
+ int *n;
+
+ WARN_ON(smp_processor_id() != data);
+
+ if (mce_available(&current_cpu_data)) {
+ machine_check_poll(MCP_TIMESTAMP,
+ &__get_cpu_var(mce_poll_banks));
+ }
+
+ /*
+ * Alert userspace if needed. If we logged an MCE, reduce the
+ * polling interval, otherwise increase the polling interval.
+ */
+ n = &__get_cpu_var(next_interval);
+ if (mce_notify_irq())
+ *n = max(*n/2, HZ/100);
+ else
+ *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+
+ t->expires = jiffies + *n;
+ add_timer(t);
+}
+
+static void mce_do_trigger(struct work_struct *work)
+{
+ call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+}
+
+static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+
+/*
+ * Notify the user(s) about new machine check events.
+ * Can be called from interrupt context, but not from machine check/NMI
+ * context.
+ */
+int mce_notify_irq(void)
+{
+ /* Not more than two messages every minute */
+ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
+
+ clear_thread_flag(TIF_MCE_NOTIFY);
+
+ if (test_and_clear_bit(0, &notify_user)) {
+ wake_up_interruptible(&mce_wait);
+
+ /*
+ * There is no risk of missing notifications because
+ * work_pending is always cleared before the function is
+ * executed.
+ */
+ if (trigger[0] && !work_pending(&mce_trigger_work))
+ schedule_work(&mce_trigger_work);
+
+ if (__ratelimit(&ratelimit))
+ printk(KERN_INFO "Machine check events logged\n");
+
+ return 1;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mce_notify_irq);
+
+/*
+ * Initialize Machine Checks for a CPU.
+ */
+static int mce_cap_init(void)
+{
+ unsigned b;
+ u64 cap;
+
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+
+ b = cap & MCG_BANKCNT_MASK;
+ printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+
+ if (b > MAX_NR_BANKS) {
+ printk(KERN_WARNING
+ "MCE: Using only %u machine check banks out of %u\n",
+ MAX_NR_BANKS, b);
+ b = MAX_NR_BANKS;
+ }
+
+ /* Don't support asymmetric configurations today */
+ WARN_ON(banks != 0 && b != banks);
+ banks = b;
+ if (!bank) {
+ bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
+ if (!bank)
+ return -ENOMEM;
+ memset(bank, 0xff, banks * sizeof(u64));
+ }
+
+ /* Use accurate RIP reporting if available. */
+ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
+ rip_msr = MSR_IA32_MCG_EIP;
+
+ if (cap & MCG_SER_P)
+ mce_ser = 1;
+
+ return 0;
+}
+
+static void mce_init(void)
+{
+ mce_banks_t all_banks;
+ u64 cap;
+ int i;
+
+ /*
+ * Log the machine checks left over from the previous reset.
+ */
+ bitmap_fill(all_banks, MAX_NR_BANKS);
+ machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
+
+ set_in_cr4(X86_CR4_MCE);
+
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+ if (cap & MCG_CTL_P)
+ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+ for (i = 0; i < banks; i++) {
+ if (skip_bank_init(i))
+ continue;
+ wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ }
+}
+
+/* Add per CPU specific workarounds here */
+static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+{
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (c->x86 == 15 && banks > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&bank[4]);
+ }
+ if (c->x86 <= 17 && mce_bootlog < 0) {
+ /*
+ * Lots of broken BIOS around that don't clear them
+ * by default and leave crap in there. Don't log:
+ */
+ mce_bootlog = 0;
+ }
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6)
+ bank[0] = 0;
+ }
+
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ */
+
+ if (c->x86 == 6 && c->x86_model < 0x1A)
+ __set_bit(0, &dont_init_banks);
+
+ /*
+ * All newer Intel systems support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
+ monarch_timeout < 0)
+ monarch_timeout = USEC_PER_SEC;
+ }
+ if (monarch_timeout < 0)
+ monarch_timeout = 0;
+ if (mce_bootlog != 0)
+ mce_panic_timeout = 30;
+}
+
+static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+{
+ if (c->x86 != 5)
+ return;
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (mce_p5_enabled())
+ intel_p5_mcheck_init(c);
+ break;
+ case X86_VENDOR_CENTAUR:
+ winchip_mcheck_init(c);
+ break;
+ }
+}
+
+static void mce_cpu_features(struct cpuinfo_x86 *c)
+{
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ mce_intel_feature_init(c);
+ break;
+ case X86_VENDOR_AMD:
+ mce_amd_feature_init(c);
+ break;
+ default:
+ break;
+ }
+}
+
+static void mce_init_timer(void)
+{
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ int *n = &__get_cpu_var(next_interval);
+
+ if (mce_ignore_ce)
+ return;
+
+ *n = check_interval * HZ;
+ if (!*n)
+ return;
+ setup_timer(t, mcheck_timer, smp_processor_id());
+ t->expires = round_jiffies(jiffies + *n);
+ add_timer(t);
+}
+
+/*
+ * Called for each booted CPU to set up machine checks.
+ * Must be called with preempt off:
+ */
+void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+{
+ if (mce_disabled)
+ return;
+
+ mce_ancient_init(c);
+
+ if (!mce_available(c))
+ return;
+
+ if (mce_cap_init() < 0) {
+ mce_disabled = 1;
+ return;
+ }
+ mce_cpu_quirks(c);
+
+ machine_check_vector = do_machine_check;
+
+ mce_init();
+ mce_cpu_features(c);
+ mce_init_timer();
+ INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+}
+
+/*
+ * Character device to read and clear the MCE log.
+ */
+
+static DEFINE_SPINLOCK(mce_state_lock);
+static int open_count; /* #times opened */
+static int open_exclu; /* already open exclusive? */
+
+static int mce_open(struct inode *inode, struct file *file)
+{
+ spin_lock(&mce_state_lock);
+
+ if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&mce_state_lock);
+
+ return -EBUSY;
+ }
+
+ if (file->f_flags & O_EXCL)
+ open_exclu = 1;
+ open_count++;
+
+ spin_unlock(&mce_state_lock);
+
+ return nonseekable_open(inode, file);
+}
+
+static int mce_release(struct inode *inode, struct file *file)
+{
+ spin_lock(&mce_state_lock);
+
+ open_count--;
+ open_exclu = 0;
+
+ spin_unlock(&mce_state_lock);
+
+ return 0;
+}
+
+static void collect_tscs(void *data)
+{
+ unsigned long *cpu_tsc = (unsigned long *)data;
+
+ rdtscll(cpu_tsc[smp_processor_id()]);
+}
+
+static DEFINE_MUTEX(mce_read_mutex);
+
+static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
+ loff_t *off)
+{
+ char __user *buf = ubuf;
+ unsigned long *cpu_tsc;
+ unsigned prev, next;
+ int i, err;
+
+ cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
+ if (!cpu_tsc)
+ return -ENOMEM;
+
+ mutex_lock(&mce_read_mutex);
+ next = rcu_dereference(mcelog.next);
+
+ /* Only supports full reads right now */
+ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
+ mutex_unlock(&mce_read_mutex);
+ kfree(cpu_tsc);
+
+ return -EINVAL;
+ }
+
+ err = 0;
+ prev = 0;
+ do {
+ for (i = prev; i < next; i++) {
+ unsigned long start = jiffies;
+
+ while (!mcelog.entry[i].finished) {
+ if (time_after_eq(jiffies, start + 2)) {
+ memset(mcelog.entry + i, 0,
+ sizeof(struct mce));
+ goto timeout;
+ }
+ cpu_relax();
+ }
+ smp_rmb();
+ err |= copy_to_user(buf, mcelog.entry + i,
+ sizeof(struct mce));
+ buf += sizeof(struct mce);
+timeout:
+ ;
+ }
+
+ memset(mcelog.entry + prev, 0,
+ (next - prev) * sizeof(struct mce));
+ prev = next;
+ next = cmpxchg(&mcelog.next, prev, 0);
+ } while (next != prev);
+
+ synchronize_sched();
+
+ /*
+ * Collect entries that were still getting written before the
+ * synchronize.
+ */
+ on_each_cpu(collect_tscs, cpu_tsc, 1);
+
+ for (i = next; i < MCE_LOG_LEN; i++) {
+ if (mcelog.entry[i].finished &&
+ mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
+ err |= copy_to_user(buf, mcelog.entry+i,
+ sizeof(struct mce));
+ smp_rmb();
+ buf += sizeof(struct mce);
+ memset(&mcelog.entry[i], 0, sizeof(struct mce));
+ }
+ }
+ mutex_unlock(&mce_read_mutex);
+ kfree(cpu_tsc);
+
+ return err ? -EFAULT : buf - ubuf;
+}
+
+static unsigned int mce_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &mce_wait, wait);
+ if (rcu_dereference(mcelog.next))
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+
+static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ int __user *p = (int __user *)arg;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case MCE_GET_RECORD_LEN:
+ return put_user(sizeof(struct mce), p);
+ case MCE_GET_LOG_LEN:
+ return put_user(MCE_LOG_LEN, p);
+ case MCE_GETCLEAR_FLAGS: {
+ unsigned flags;
+
+ do {
+ flags = mcelog.flags;
+ } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
+
+ return put_user(flags, p);
+ }
+ default:
+ return -ENOTTY;
+ }
+}
+
+/* Modified in mce-inject.c, so not static or const */
+struct file_operations mce_chrdev_ops = {
+ .open = mce_open,
+ .release = mce_release,
+ .read = mce_read,
+ .poll = mce_poll,
+ .unlocked_ioctl = mce_ioctl,
+};
+EXPORT_SYMBOL_GPL(mce_chrdev_ops);
+
+static struct miscdevice mce_log_device = {
+ MISC_MCELOG_MINOR,
+ "mcelog",
+ &mce_chrdev_ops,
+};
+
+/*
+ * mce=off Disables machine check
+ * mce=no_cmci Disables CMCI
+ * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
+ * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
+ * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
+ * monarchtimeout is how long to wait for other CPUs on machine
+ * check, or 0 to not wait
+ * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+ * mce=nobootlog Don't log MCEs from before booting.
+ */
+static int __init mcheck_enable(char *str)
+{
+ if (*str == 0)
+ enable_p5_mce();
+ if (*str == '=')
+ str++;
+ if (!strcmp(str, "off"))
+ mce_disabled = 1;
+ else if (!strcmp(str, "no_cmci"))
+ mce_cmci_disabled = 1;
+ else if (!strcmp(str, "dont_log_ce"))
+ mce_dont_log_ce = 1;
+ else if (!strcmp(str, "ignore_ce"))
+ mce_ignore_ce = 1;
+ else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
+ mce_bootlog = (str[0] == 'b');
+ else if (isdigit(str[0])) {
+ get_option(&str, &tolerant);
+ if (*str == ',') {
+ ++str;
+ get_option(&str, &monarch_timeout);
+ }
+ } else {
+ printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
+ str);
+ return 0;
+ }
+ return 1;
+}
+__setup("mce", mcheck_enable);
+
+/*
+ * Sysfs support
+ */
+
+/*
+ * Disable machine checks on suspend and shutdown. We can't really handle
+ * them later.
+ */
+static int mce_disable(void)
+{
+ int i;
+
+ for (i = 0; i < banks; i++) {
+ if (!skip_bank_init(i))
+ wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+ }
+ return 0;
+}
+
+static int mce_suspend(struct sys_device *dev, pm_message_t state)
+{
+ return mce_disable();
+}
+
+static int mce_shutdown(struct sys_device *dev)
+{
+ return mce_disable();
+}
+
+/*
+ * On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+ * Only one CPU is active at this time, the others get re-added later using
+ * CPU hotplug:
+ */
+static int mce_resume(struct sys_device *dev)
+{
+ mce_init();
+ mce_cpu_features(&current_cpu_data);
+
+ return 0;
+}
+
+static void mce_cpu_restart(void *data)
+{
+ del_timer_sync(&__get_cpu_var(mce_timer));
+ if (mce_available(&current_cpu_data))
+ mce_init();
+ mce_init_timer();
+}
+
+/* Reinit MCEs after user configuration changes */
+static void mce_restart(void)
+{
+ on_each_cpu(mce_cpu_restart, NULL, 1);
+}
+
+static struct sysdev_class mce_sysclass = {
+ .suspend = mce_suspend,
+ .shutdown = mce_shutdown,
+ .resume = mce_resume,
+ .name = "machinecheck",
+};
+
+DEFINE_PER_CPU(struct sys_device, mce_dev);
+
+__cpuinitdata
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
+
+static struct sysdev_attribute *bank_attrs;
+
+static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
+ char *buf)
+{
+ u64 b = bank[attr - bank_attrs];
+
+ return sprintf(buf, "%llx\n", b);
+}
+
+static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
+ const char *buf, size_t size)
+{
+ u64 new;
+
+ if (strict_strtoull(buf, 0, &new) < 0)
+ return -EINVAL;
+
+ bank[attr - bank_attrs] = new;
+ mce_restart();
+
+ return size;
+}
+
+static ssize_t
+show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
+{
+ strcpy(buf, trigger);
+ strcat(buf, "\n");
+ return strlen(trigger) + 1;
+}
+
+static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ const char *buf, size_t siz)
+{
+ char *p;
+ int len;
+
+ strncpy(trigger, buf, sizeof(trigger));
+ trigger[sizeof(trigger)-1] = 0;
+ len = strlen(trigger);
+ p = strchr(trigger, '\n');
+
+ if (*p)
+ *p = 0;
+
+ return len;
+}
+
+static ssize_t store_int_with_restart(struct sys_device *s,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t size)
+{
+ ssize_t ret = sysdev_store_int(s, attr, buf, size);
+ mce_restart();
+ return ret;
+}
+
+static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
+static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
+static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
+
+static struct sysdev_ext_attribute attr_check_interval = {
+ _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
+ store_int_with_restart),
+ &check_interval
+};
+
+static struct sysdev_attribute *mce_attrs[] = {
+ &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
+ &attr_monarch_timeout.attr,
+ NULL
+};
+
+static cpumask_var_t mce_dev_initialized;
+
+/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
+static __cpuinit int mce_create_device(unsigned int cpu)
+{
+ int err;
+ int i;
+
+ if (!mce_available(&boot_cpu_data))
+ return -EIO;
+
+ memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
+ per_cpu(mce_dev, cpu).id = cpu;
+ per_cpu(mce_dev, cpu).cls = &mce_sysclass;
+
+ err = sysdev_register(&per_cpu(mce_dev, cpu));
+ if (err)
+ return err;
+
+ for (i = 0; mce_attrs[i]; i++) {
+ err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ if (err)
+ goto error;
+ }
+ for (i = 0; i < banks; i++) {
+ err = sysdev_create_file(&per_cpu(mce_dev, cpu),
+ &bank_attrs[i]);
+ if (err)
+ goto error2;
+ }
+ cpumask_set_cpu(cpu, mce_dev_initialized);
+
+ return 0;
+error2:
+ while (--i >= 0)
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+error:
+ while (--i >= 0)
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+
+ sysdev_unregister(&per_cpu(mce_dev, cpu));
+
+ return err;
+}
+
+static __cpuinit void mce_remove_device(unsigned int cpu)
+{
+ int i;
+
+ if (!cpumask_test_cpu(cpu, mce_dev_initialized))
+ return;
+
+ for (i = 0; mce_attrs[i]; i++)
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+
+ for (i = 0; i < banks; i++)
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+
+ sysdev_unregister(&per_cpu(mce_dev, cpu));
+ cpumask_clear_cpu(cpu, mce_dev_initialized);
+}
+
+/* Make sure there are no machine checks on offlined CPUs. */
+static void mce_disable_cpu(void *h)
+{
+ unsigned long action = *(unsigned long *)h;
+ int i;
+
+ if (!mce_available(&current_cpu_data))
+ return;
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_clear();
+ for (i = 0; i < banks; i++) {
+ if (!skip_bank_init(i))
+ wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+ }
+}
+
+static void mce_reenable_cpu(void *h)
+{
+ unsigned long action = *(unsigned long *)h;
+ int i;
+
+ if (!mce_available(&current_cpu_data))
+ return;
+
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_reenable();
+ for (i = 0; i < banks; i++) {
+ if (!skip_bank_init(i))
+ wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+ }
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static int __cpuinit
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ struct timer_list *t = &per_cpu(mce_timer, cpu);
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ mce_create_device(cpu);
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ if (threshold_cpu_callback)
+ threshold_cpu_callback(action, cpu);
+ mce_remove_device(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ del_timer_sync(t);
+ smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+ break;
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ t->expires = round_jiffies(jiffies +
+ __get_cpu_var(next_interval));
+ add_timer_on(t, cpu);
+ smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+ break;
+ case CPU_POST_DEAD:
+ /* intentionally ignoring frozen here */
+ cmci_rediscover(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier __cpuinitdata = {
+ .notifier_call = mce_cpu_callback,
+};
+
+static __init int mce_init_banks(void)
+{
+ int i;
+
+ bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
+ GFP_KERNEL);
+ if (!bank_attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < banks; i++) {
+ struct sysdev_attribute *a = &bank_attrs[i];
+
+ a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
+ if (!a->attr.name)
+ goto nomem;
+
+ a->attr.mode = 0644;
+ a->show = show_bank;
+ a->store = set_bank;
+ }
+ return 0;
+
+nomem:
+ while (--i >= 0)
+ kfree(bank_attrs[i].attr.name);
+ kfree(bank_attrs);
+ bank_attrs = NULL;
+
+ return -ENOMEM;
+}
+
+static __init int mce_init_device(void)
+{
+ int err;
+ int i = 0;
+
+ if (!mce_available(&boot_cpu_data))
+ return -EIO;
+
+ alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
+
+ err = mce_init_banks();
+ if (err)
+ return err;
+
+ err = sysdev_class_register(&mce_sysclass);
+ if (err)
+ return err;
+
+ for_each_online_cpu(i) {
+ err = mce_create_device(i);
+ if (err)
+ return err;
+ }
+
+ register_hotcpu_notifier(&mce_cpu_notifier);
+ misc_register(&mce_log_device);
+
+ return err;
+}
+
+device_initcall(mce_init_device);
+
+#else /* CONFIG_X86_OLD_MCE: */
+
+int nr_mce_banks;
+EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+ if (mce_disabled == 1)
+ return;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ amd_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ if (c->x86 == 5)
+ intel_p5_mcheck_init(c);
+ if (c->x86 == 6)
+ intel_p6_mcheck_init(c);
+ if (c->x86 == 15)
+ intel_p4_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_CENTAUR:
+ if (c->x86 == 5)
+ winchip_mcheck_init(c);
+ break;
+
+ default:
+ break;
+ }
+ printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
+}
+
+static int __init mcheck_enable(char *str)
+{
+ mce_disabled = -1;
+ return 1;
+}
+
+__setup("mce", mcheck_enable);
+
+#endif /* CONFIG_X86_OLD_MCE */
+
+/*
+ * Old style boot options parsing. Only for compatibility.
+ */
+static int __init mcheck_disable(char *str)
+{
+ mce_disabled = 1;
+ return 1;
+}
+__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
index ae9f628838f..84a552b458c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,14 +1,38 @@
#include <linux/init.h>
#include <asm/mce.h>
+#ifdef CONFIG_X86_OLD_MCE
void amd_mcheck_init(struct cpuinfo_x86 *c);
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+#endif
+
+#ifdef CONFIG_X86_ANCIENT_MCE
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
+extern int mce_p5_enable;
+static inline int mce_p5_enabled(void) { return mce_p5_enable; }
+static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
+#else
+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline int mce_p5_enabled(void) { return 0; }
+static inline void enable_p5_mce(void) { }
+#endif
/* Call the installed machine check handler for this CPU setup. */
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
+#ifdef CONFIG_X86_OLD_MCE
+
extern int nr_mce_banks;
+void intel_set_thermal_handler(void);
+
+#else
+
+static inline void intel_set_thermal_handler(void) { }
+
+#endif
+
+void intel_init_thermal(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
deleted file mode 100644
index 3552119b091..00000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-
-#include "mce.h"
-
-int mce_disabled;
-int nr_mce_banks;
-
-EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
-
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
- if (mce_disabled == 1)
- return;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- amd_mcheck_init(c);
- break;
-
- case X86_VENDOR_INTEL:
- if (c->x86 == 5)
- intel_p5_mcheck_init(c);
- if (c->x86 == 6)
- intel_p6_mcheck_init(c);
- if (c->x86 == 15)
- intel_p4_mcheck_init(c);
- break;
-
- case X86_VENDOR_CENTAUR:
- if (c->x86 == 5)
- winchip_mcheck_init(c);
- break;
-
- default:
- break;
- }
-}
-
-static int __init mcheck_disable(char *str)
-{
- mce_disabled = 1;
- return 1;
-}
-
-static int __init mcheck_enable(char *str)
-{
- mce_disabled = -1;
- return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
deleted file mode 100644
index 289cc481502..00000000000
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ /dev/null
@@ -1,1188 +0,0 @@
-/*
- * Machine check handler.
- * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
- * Rest from unknown author(s).
- * 2004 Andi Kleen. Rewrote most of it.
- * Copyright 2008 Intel Corporation
- * Author: Andi Kleen
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/smp_lock.h>
-#include <linux/string.h>
-#include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
-#include <linux/sysdev.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/capability.h>
-#include <linux/cpu.h>
-#include <linux/percpu.h>
-#include <linux/poll.h>
-#include <linux/thread_info.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/kdebug.h>
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/ratelimit.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/mce.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/idle.h>
-
-#define MISC_MCELOG_MINOR 227
-
-atomic_t mce_entry;
-
-static int mce_dont_init;
-
-/*
- * Tolerant levels:
- * 0: always panic on uncorrected errors, log corrected errors
- * 1: panic or SIGBUS on uncorrected errors, log corrected errors
- * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- * 3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int tolerant = 1;
-static int banks;
-static u64 *bank;
-static unsigned long notify_user;
-static int rip_msr;
-static int mce_bootlog = -1;
-static atomic_t mce_events;
-
-static char trigger[128];
-static char *trigger_argv[2] = { trigger, NULL };
-
-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
-
-/* MCA banks polled by the period polling timer for corrected events */
-DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
- [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
-};
-
-/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
-{
- memset(m, 0, sizeof(struct mce));
- m->cpu = smp_processor_id();
- rdtscll(m->tsc);
-}
-
-/*
- * Lockless MCE logging infrastructure.
- * This avoids deadlocks on printk locks without having to break locks. Also
- * separate MCEs from kernel messages to avoid bogus bug reports.
- */
-
-static struct mce_log mcelog = {
- MCE_LOG_SIGNATURE,
- MCE_LOG_LEN,
-};
-
-void mce_log(struct mce *mce)
-{
- unsigned next, entry;
- atomic_inc(&mce_events);
- mce->finished = 0;
- wmb();
- for (;;) {
- entry = rcu_dereference(mcelog.next);
- for (;;) {
- /* When the buffer fills up discard new entries. Assume
- that the earlier errors are the more interesting. */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
- return;
- }
- /* Old left over entry. Skip. */
- if (mcelog.entry[entry].finished) {
- entry++;
- continue;
- }
- break;
- }
- smp_rmb();
- next = entry + 1;
- if (cmpxchg(&mcelog.next, entry, next) == entry)
- break;
- }
- memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- wmb();
- mcelog.entry[entry].finished = 1;
- wmb();
-
- set_bit(0, &notify_user);
-}
-
-static void print_mce(struct mce *m)
-{
- printk(KERN_EMERG "\n"
- KERN_EMERG "HARDWARE ERROR\n"
- KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
- m->cpu, m->mcgstatus, m->bank, m->status);
- if (m->ip) {
- printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
- if (m->cs == __KERNEL_CS)
- print_symbol("{%s}", m->ip);
- printk("\n");
- }
- printk(KERN_EMERG "TSC %llx ", m->tsc);
- if (m->addr)
- printk("ADDR %llx ", m->addr);
- if (m->misc)
- printk("MISC %llx ", m->misc);
- printk("\n");
- printk(KERN_EMERG "This is not a software problem!\n");
- printk(KERN_EMERG "Run through mcelog --ascii to decode "
- "and contact your hardware vendor\n");
-}
-
-static void mce_panic(char *msg, struct mce *backup, unsigned long start)
-{
- int i;
-
- oops_begin();
- for (i = 0; i < MCE_LOG_LEN; i++) {
- unsigned long tsc = mcelog.entry[i].tsc;
-
- if (time_before(tsc, start))
- continue;
- print_mce(&mcelog.entry[i]);
- if (backup && mcelog.entry[i].tsc == backup->tsc)
- backup = NULL;
- }
- if (backup)
- print_mce(backup);
- panic(msg);
-}
-
-int mce_available(struct cpuinfo_x86 *c)
-{
- if (mce_dont_init)
- return 0;
- return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
-}
-
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
- if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
- m->ip = regs->ip;
- m->cs = regs->cs;
- } else {
- m->ip = 0;
- m->cs = 0;
- }
- if (rip_msr) {
- /* Assume the RIP in the MSR is exact. Is this true? */
- m->mcgstatus |= MCG_STATUS_EIPV;
- rdmsrl(rip_msr, m->ip);
- m->cs = 0;
- }
-}
-
-/*
- * Poll for corrected events or events that happened before reset.
- * Those are just logged through /dev/mcelog.
- *
- * This is executed in standard interrupt context.
- */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
-{
- struct mce m;
- int i;
-
- mce_setup(&m);
-
- rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
- for (i = 0; i < banks; i++) {
- if (!bank[i] || !test_bit(i, *b))
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
- m.tsc = 0;
-
- barrier();
- rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
- if (!(m.status & MCI_STATUS_VAL))
- continue;
-
- /*
- * Uncorrected events are handled by the exception handler
- * when it is enabled. But when the exception is disabled log
- * everything.
- *
- * TBD do the same check for MCI_STATUS_EN here?
- */
- if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
- continue;
-
- if (m.status & MCI_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
- if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
-
- if (!(flags & MCP_TIMESTAMP))
- m.tsc = 0;
- /*
- * Don't get the IP here because it's unlikely to
- * have anything to do with the actual error location.
- */
- if (!(flags & MCP_DONTLOG)) {
- mce_log(&m);
- add_taint(TAINT_MACHINE_CHECK);
- }
-
- /*
- * Clear state for this bank.
- */
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- }
-
- /*
- * Don't clear MCG_STATUS here because it's only defined for
- * exceptions.
- */
-}
-
-/*
- * The actual machine check handler. This only handles real
- * exceptions when something got corrupted coming in through int 18.
- *
- * This is executed in NMI context not subject to normal locking rules. This
- * implies that most kernel services cannot be safely used. Don't even
- * think about putting a printk in there!
- */
-void do_machine_check(struct pt_regs * regs, long error_code)
-{
- struct mce m, panicm;
- u64 mcestart = 0;
- int i;
- int panicm_found = 0;
- /*
- * If no_way_out gets set, there is no safe way to recover from this
- * MCE. If tolerant is cranked up, we'll try anyway.
- */
- int no_way_out = 0;
- /*
- * If kill_it gets set, there might be a way to recover from this
- * error.
- */
- int kill_it = 0;
- DECLARE_BITMAP(toclear, MAX_NR_BANKS);
-
- atomic_inc(&mce_entry);
-
- if (notify_die(DIE_NMI, "machine check", regs, error_code,
- 18, SIGKILL) == NOTIFY_STOP)
- goto out2;
- if (!banks)
- goto out2;
-
- mce_setup(&m);
-
- rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
- /* if the restart IP is not valid, we're done for */
- if (!(m.mcgstatus & MCG_STATUS_RIPV))
- no_way_out = 1;
-
- rdtscll(mcestart);
- barrier();
-
- for (i = 0; i < banks; i++) {
- __clear_bit(i, toclear);
- if (!bank[i])
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
-
- rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
- if ((m.status & MCI_STATUS_VAL) == 0)
- continue;
-
- /*
- * Non uncorrected errors are handled by machine_check_poll
- * Leave them alone.
- */
- if ((m.status & MCI_STATUS_UC) == 0)
- continue;
-
- /*
- * Set taint even when machine check was not enabled.
- */
- add_taint(TAINT_MACHINE_CHECK);
-
- __set_bit(i, toclear);
-
- if (m.status & MCI_STATUS_EN) {
- /* if PCC was set, there's no way out */
- no_way_out |= !!(m.status & MCI_STATUS_PCC);
- /*
- * If this error was uncorrectable and there was
- * an overflow, we're in trouble. If no overflow,
- * we might get away with just killing a task.
- */
- if (m.status & MCI_STATUS_UC) {
- if (tolerant < 1 || m.status & MCI_STATUS_OVER)
- no_way_out = 1;
- kill_it = 1;
- }
- } else {
- /*
- * Machine check event was not enabled. Clear, but
- * ignore.
- */
- continue;
- }
-
- if (m.status & MCI_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
- if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
-
- mce_get_rip(&m, regs);
- mce_log(&m);
-
- /* Did this bank cause the exception? */
- /* Assume that the bank with uncorrectable errors did it,
- and that there is only a single one. */
- if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
- panicm = m;
- panicm_found = 1;
- }
- }
-
- /* If we didn't find an uncorrectable error, pick
- the last one (shouldn't happen, just being safe). */
- if (!panicm_found)
- panicm = m;
-
- /*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- */
- if (no_way_out && tolerant < 3)
- mce_panic("Machine check", &panicm, mcestart);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
- */
- if (kill_it && tolerant < 3) {
- int user_space = 0;
-
- /*
- * If the EIPV bit is set, it means the saved IP is the
- * instruction which caused the MCE.
- */
- if (m.mcgstatus & MCG_STATUS_EIPV)
- user_space = panicm.ip && (panicm.cs & 3);
-
- /*
- * If we know that the error was in user space, send a
- * SIGBUS. Otherwise, panic if tolerance is low.
- *
- * force_sig() takes an awful lot of locks and has a slight
- * risk of deadlocking.
- */
- if (user_space) {
- force_sig(SIGBUS, current);
- } else if (panic_on_oops || tolerant < 2) {
- mce_panic("Uncorrected machine check",
- &panicm, mcestart);
- }
- }
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
-
- /* the last thing we do is clear state */
- for (i = 0; i < banks; i++) {
- if (test_bit(i, toclear))
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- }
- wrmsrl(MSR_IA32_MCG_STATUS, 0);
- out2:
- atomic_dec(&mce_entry);
-}
-EXPORT_SYMBOL_GPL(do_machine_check);
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
- struct mce m;
-
- mce_setup(&m);
- m.bank = MCE_THERMAL_BANK;
- m.status = status;
- mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
- * Periodic polling timer for "silent" machine check errors. If the
- * poller finds an MCE, poll 2x faster. When the poller finds no more
- * errors, poll 2x slower (up to check_interval seconds).
- */
-
-static int check_interval = 5 * 60; /* 5 minutes */
-static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
-static void mcheck_timer(unsigned long);
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
-
-static void mcheck_timer(unsigned long data)
-{
- struct timer_list *t = &per_cpu(mce_timer, data);
- int *n;
-
- WARN_ON(smp_processor_id() != data);
-
- if (mce_available(&current_cpu_data))
- machine_check_poll(MCP_TIMESTAMP,
- &__get_cpu_var(mce_poll_banks));
-
- /*
- * Alert userspace if needed. If we logged an MCE, reduce the
- * polling interval, otherwise increase the polling interval.
- */
- n = &__get_cpu_var(next_interval);
- if (mce_notify_user()) {
- *n = max(*n/2, HZ/100);
- } else {
- *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
- }
-
- t->expires = jiffies + *n;
- add_timer(t);
-}
-
-static void mce_do_trigger(struct work_struct *work)
-{
- call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
-}
-
-static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
-
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-int mce_notify_user(void)
-{
- /* Not more than two messages every minute */
- static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
- clear_thread_flag(TIF_MCE_NOTIFY);
- if (test_and_clear_bit(0, &notify_user)) {
- wake_up_interruptible(&mce_wait);
-
- /*
- * There is no risk of missing notifications because
- * work_pending is always cleared before the function is
- * executed.
- */
- if (trigger[0] && !work_pending(&mce_trigger_work))
- schedule_work(&mce_trigger_work);
-
- if (__ratelimit(&ratelimit))
- printk(KERN_INFO "Machine check events logged\n");
-
- return 1;
- }
- return 0;
-}
-
-/* see if the idle task needs to notify userspace */
-static int
-mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
-{
- /* IDLE_END should be safe - interrupts are back on */
- if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
- mce_notify_user();
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block mce_idle_notifier = {
- .notifier_call = mce_idle_callback,
-};
-
-static __init int periodic_mcheck_init(void)
-{
- idle_notifier_register(&mce_idle_notifier);
- return 0;
-}
-__initcall(periodic_mcheck_init);
-
-/*
- * Initialize Machine Checks for a CPU.
- */
-static int mce_cap_init(void)
-{
- u64 cap;
- unsigned b;
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- b = cap & 0xff;
- if (b > MAX_NR_BANKS) {
- printk(KERN_WARNING
- "MCE: Using only %u machine check banks out of %u\n",
- MAX_NR_BANKS, b);
- b = MAX_NR_BANKS;
- }
-
- /* Don't support asymmetric configurations today */
- WARN_ON(banks != 0 && b != banks);
- banks = b;
- if (!bank) {
- bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
- if (!bank)
- return -ENOMEM;
- memset(bank, 0xff, banks * sizeof(u64));
- }
-
- /* Use accurate RIP reporting if available. */
- if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
- rip_msr = MSR_IA32_MCG_EIP;
-
- return 0;
-}
-
-static void mce_init(void *dummy)
-{
- u64 cap;
- int i;
- mce_banks_t all_banks;
-
- /*
- * Log the machine checks left over from the previous reset.
- */
- bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
-
- set_in_cr4(X86_CR4_MCE);
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- if (cap & MCG_CTL_P)
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-
- for (i = 0; i < banks; i++) {
- wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
- }
-}
-
-/* Add per CPU specific workarounds here */
-static void mce_cpu_quirks(struct cpuinfo_x86 *c)
-{
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && banks > 4)
- /* disable GART TBL walk error reporting, which trips off
- incorrectly with the IOMMU & 3ware & Cerberus. */
- clear_bit(10, (unsigned long *)&bank[4]);
- if(c->x86 <= 17 && mce_bootlog < 0)
- /* Lots of broken BIOS around that don't clear them
- by default and leave crap in there. Don't log. */
- mce_bootlog = 0;
- }
-
-}
-
-static void mce_cpu_features(struct cpuinfo_x86 *c)
-{
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- mce_intel_feature_init(c);
- break;
- case X86_VENDOR_AMD:
- mce_amd_feature_init(c);
- break;
- default:
- break;
- }
-}
-
-static void mce_init_timer(void)
-{
- struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(next_interval);
-
- *n = check_interval * HZ;
- if (!*n)
- return;
- setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies(jiffies + *n);
- add_timer(t);
-}
-
-/*
- * Called for each booted CPU to set up machine checks.
- * Must be called with preempt off.
- */
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
-{
- if (!mce_available(c))
- return;
-
- if (mce_cap_init() < 0) {
- mce_dont_init = 1;
- return;
- }
- mce_cpu_quirks(c);
-
- mce_init(NULL);
- mce_cpu_features(c);
- mce_init_timer();
-}
-
-/*
- * Character device to read and clear the MCE log.
- */
-
-static DEFINE_SPINLOCK(mce_state_lock);
-static int open_count; /* #times opened */
-static int open_exclu; /* already open exclusive? */
-
-static int mce_open(struct inode *inode, struct file *file)
-{
- lock_kernel();
- spin_lock(&mce_state_lock);
-
- if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
- spin_unlock(&mce_state_lock);
- unlock_kernel();
- return -EBUSY;
- }
-
- if (file->f_flags & O_EXCL)
- open_exclu = 1;
- open_count++;
-
- spin_unlock(&mce_state_lock);
- unlock_kernel();
-
- return nonseekable_open(inode, file);
-}
-
-static int mce_release(struct inode *inode, struct file *file)
-{
- spin_lock(&mce_state_lock);
-
- open_count--;
- open_exclu = 0;
-
- spin_unlock(&mce_state_lock);
-
- return 0;
-}
-
-static void collect_tscs(void *data)
-{
- unsigned long *cpu_tsc = (unsigned long *)data;
-
- rdtscll(cpu_tsc[smp_processor_id()]);
-}
-
-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
- loff_t *off)
-{
- unsigned long *cpu_tsc;
- static DEFINE_MUTEX(mce_read_mutex);
- unsigned prev, next;
- char __user *buf = ubuf;
- int i, err;
-
- cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
- if (!cpu_tsc)
- return -ENOMEM;
-
- mutex_lock(&mce_read_mutex);
- next = rcu_dereference(mcelog.next);
-
- /* Only supports full reads right now */
- if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
- mutex_unlock(&mce_read_mutex);
- kfree(cpu_tsc);
- return -EINVAL;
- }
-
- err = 0;
- prev = 0;
- do {
- for (i = prev; i < next; i++) {
- unsigned long start = jiffies;
-
- while (!mcelog.entry[i].finished) {
- if (time_after_eq(jiffies, start + 2)) {
- memset(mcelog.entry + i, 0,
- sizeof(struct mce));
- goto timeout;
- }
- cpu_relax();
- }
- smp_rmb();
- err |= copy_to_user(buf, mcelog.entry + i,
- sizeof(struct mce));
- buf += sizeof(struct mce);
-timeout:
- ;
- }
-
- memset(mcelog.entry + prev, 0,
- (next - prev) * sizeof(struct mce));
- prev = next;
- next = cmpxchg(&mcelog.next, prev, 0);
- } while (next != prev);
-
- synchronize_sched();
-
- /*
- * Collect entries that were still getting written before the
- * synchronize.
- */
- on_each_cpu(collect_tscs, cpu_tsc, 1);
- for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
- mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
- err |= copy_to_user(buf, mcelog.entry+i,
- sizeof(struct mce));
- smp_rmb();
- buf += sizeof(struct mce);
- memset(&mcelog.entry[i], 0, sizeof(struct mce));
- }
- }
- mutex_unlock(&mce_read_mutex);
- kfree(cpu_tsc);
- return err ? -EFAULT : buf - ubuf;
-}
-
-static unsigned int mce_poll(struct file *file, poll_table *wait)
-{
- poll_wait(file, &mce_wait, wait);
- if (rcu_dereference(mcelog.next))
- return POLLIN | POLLRDNORM;
- return 0;
-}
-
-static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
- int __user *p = (int __user *)arg;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- switch (cmd) {
- case MCE_GET_RECORD_LEN:
- return put_user(sizeof(struct mce), p);
- case MCE_GET_LOG_LEN:
- return put_user(MCE_LOG_LEN, p);
- case MCE_GETCLEAR_FLAGS: {
- unsigned flags;
-
- do {
- flags = mcelog.flags;
- } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
- return put_user(flags, p);
- }
- default:
- return -ENOTTY;
- }
-}
-
-static const struct file_operations mce_chrdev_ops = {
- .open = mce_open,
- .release = mce_release,
- .read = mce_read,
- .poll = mce_poll,
- .unlocked_ioctl = mce_ioctl,
-};
-
-static struct miscdevice mce_log_device = {
- MISC_MCELOG_MINOR,
- "mcelog",
- &mce_chrdev_ops,
-};
-
-/*
- * Old style boot options parsing. Only for compatibility.
- */
-static int __init mcheck_disable(char *str)
-{
- mce_dont_init = 1;
- return 1;
-}
-
-/* mce=off disables machine check.
- mce=TOLERANCELEVEL (number, see above)
- mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
- mce=nobootlog Don't log MCEs from before booting. */
-static int __init mcheck_enable(char *str)
-{
- if (!strcmp(str, "off"))
- mce_dont_init = 1;
- else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
- mce_bootlog = str[0] == 'b';
- else if (isdigit(str[0]))
- get_option(&str, &tolerant);
- else
- printk("mce= argument %s ignored. Please use /sys", str);
- return 1;
-}
-
-__setup("nomce", mcheck_disable);
-__setup("mce=", mcheck_enable);
-
-/*
- * Sysfs support
- */
-
-/*
- * Disable machine checks on suspend and shutdown. We can't really handle
- * them later.
- */
-static int mce_disable(void)
-{
- int i;
-
- for (i = 0; i < banks; i++)
- wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
- return 0;
-}
-
-static int mce_suspend(struct sys_device *dev, pm_message_t state)
-{
- return mce_disable();
-}
-
-static int mce_shutdown(struct sys_device *dev)
-{
- return mce_disable();
-}
-
-/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
- Only one CPU is active at this time, the others get readded later using
- CPU hotplug. */
-static int mce_resume(struct sys_device *dev)
-{
- mce_init(NULL);
- mce_cpu_features(&current_cpu_data);
- return 0;
-}
-
-static void mce_cpu_restart(void *data)
-{
- del_timer_sync(&__get_cpu_var(mce_timer));
- if (mce_available(&current_cpu_data))
- mce_init(NULL);
- mce_init_timer();
-}
-
-/* Reinit MCEs after user configuration changes */
-static void mce_restart(void)
-{
- on_each_cpu(mce_cpu_restart, NULL, 1);
-}
-
-static struct sysdev_class mce_sysclass = {
- .suspend = mce_suspend,
- .shutdown = mce_shutdown,
- .resume = mce_resume,
- .name = "machinecheck",
-};
-
-DEFINE_PER_CPU(struct sys_device, device_mce);
-void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
-
-/* Why are there no generic functions for this? */
-#define ACCESSOR(name, var, start) \
- static ssize_t show_ ## name(struct sys_device *s, \
- struct sysdev_attribute *attr, \
- char *buf) { \
- return sprintf(buf, "%lx\n", (unsigned long)var); \
- } \
- static ssize_t set_ ## name(struct sys_device *s, \
- struct sysdev_attribute *attr, \
- const char *buf, size_t siz) { \
- char *end; \
- unsigned long new = simple_strtoul(buf, &end, 0); \
- if (end == buf) return -EINVAL; \
- var = new; \
- start; \
- return end-buf; \
- } \
- static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-
-static struct sysdev_attribute *bank_attrs;
-
-static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
- char *buf)
-{
- u64 b = bank[attr - bank_attrs];
- return sprintf(buf, "%llx\n", b);
-}
-
-static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
- const char *buf, size_t siz)
-{
- char *end;
- u64 new = simple_strtoull(buf, &end, 0);
- if (end == buf)
- return -EINVAL;
- bank[attr - bank_attrs] = new;
- mce_restart();
- return end-buf;
-}
-
-static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
- char *buf)
-{
- strcpy(buf, trigger);
- strcat(buf, "\n");
- return strlen(trigger) + 1;
-}
-
-static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
- const char *buf,size_t siz)
-{
- char *p;
- int len;
- strncpy(trigger, buf, sizeof(trigger));
- trigger[sizeof(trigger)-1] = 0;
- len = strlen(trigger);
- p = strchr(trigger, '\n');
- if (*p) *p = 0;
- return len;
-}
-
-static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
-ACCESSOR(check_interval,check_interval,mce_restart())
-static struct sysdev_attribute *mce_attributes[] = {
- &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
- NULL
-};
-
-static cpumask_var_t mce_device_initialized;
-
-/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
-static __cpuinit int mce_create_device(unsigned int cpu)
-{
- int err;
- int i;
-
- if (!mce_available(&boot_cpu_data))
- return -EIO;
-
- memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
- per_cpu(device_mce,cpu).id = cpu;
- per_cpu(device_mce,cpu).cls = &mce_sysclass;
-
- err = sysdev_register(&per_cpu(device_mce,cpu));
- if (err)
- return err;
-
- for (i = 0; mce_attributes[i]; i++) {
- err = sysdev_create_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
- if (err)
- goto error;
- }
- for (i = 0; i < banks; i++) {
- err = sysdev_create_file(&per_cpu(device_mce, cpu),
- &bank_attrs[i]);
- if (err)
- goto error2;
- }
- cpumask_set_cpu(cpu, mce_device_initialized);
-
- return 0;
-error2:
- while (--i >= 0) {
- sysdev_remove_file(&per_cpu(device_mce, cpu),
- &bank_attrs[i]);
- }
-error:
- while (--i >= 0) {
- sysdev_remove_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
- }
- sysdev_unregister(&per_cpu(device_mce,cpu));
-
- return err;
-}
-
-static __cpuinit void mce_remove_device(unsigned int cpu)
-{
- int i;
-
- if (!cpumask_test_cpu(cpu, mce_device_initialized))
- return;
-
- for (i = 0; mce_attributes[i]; i++)
- sysdev_remove_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
- for (i = 0; i < banks; i++)
- sysdev_remove_file(&per_cpu(device_mce, cpu),
- &bank_attrs[i]);
- sysdev_unregister(&per_cpu(device_mce,cpu));
- cpumask_clear_cpu(cpu, mce_device_initialized);
-}
-
-/* Make sure there are no machine checks on offlined CPUs. */
-static void mce_disable_cpu(void *h)
-{
- int i;
- unsigned long action = *(unsigned long *)h;
-
- if (!mce_available(&current_cpu_data))
- return;
- if (!(action & CPU_TASKS_FROZEN))
- cmci_clear();
- for (i = 0; i < banks; i++)
- wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
-}
-
-static void mce_reenable_cpu(void *h)
-{
- int i;
- unsigned long action = *(unsigned long *)h;
-
- if (!mce_available(&current_cpu_data))
- return;
- if (!(action & CPU_TASKS_FROZEN))
- cmci_reenable();
- for (i = 0; i < banks; i++)
- wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
- struct timer_list *t = &per_cpu(mce_timer, cpu);
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
- if (threshold_cpu_callback)
- threshold_cpu_callback(action, cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- if (threshold_cpu_callback)
- threshold_cpu_callback(action, cpu);
- mce_remove_device(cpu);
- break;
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- del_timer_sync(t);
- smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies +
- __get_cpu_var(next_interval));
- add_timer_on(t, cpu);
- smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
- break;
- case CPU_POST_DEAD:
- /* intentionally ignoring frozen here */
- cmci_rediscover(cpu);
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block mce_cpu_notifier __cpuinitdata = {
- .notifier_call = mce_cpu_callback,
-};
-
-static __init int mce_init_banks(void)
-{
- int i;
-
- bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
- GFP_KERNEL);
- if (!bank_attrs)
- return -ENOMEM;
-
- for (i = 0; i < banks; i++) {
- struct sysdev_attribute *a = &bank_attrs[i];
- a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
- if (!a->attr.name)
- goto nomem;
- a->attr.mode = 0644;
- a->show = show_bank;
- a->store = set_bank;
- }
- return 0;
-
-nomem:
- while (--i >= 0)
- kfree(bank_attrs[i].attr.name);
- kfree(bank_attrs);
- bank_attrs = NULL;
- return -ENOMEM;
-}
-
-static __init int mce_init_device(void)
-{
- int err;
- int i = 0;
-
- if (!mce_available(&boot_cpu_data))
- return -EIO;
-
- zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
-
- err = mce_init_banks();
- if (err)
- return err;
-
- err = sysdev_class_register(&mce_sysclass);
- if (err)
- return err;
-
- for_each_online_cpu(i) {
- err = mce_create_device(i);
- if (err)
- return err;
- }
-
- register_hotcpu_notifier(&mce_cpu_notifier);
- misc_register(&mce_log_device);
- return err;
-}
-
-device_initcall(mce_init_device);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 56dde9c4bc9..ddae21620bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -13,22 +13,22 @@
*
* All MC4_MISCi registers are shared between multi-cores
*/
-
-#include <linux/cpu.h>
-#include <linux/errno.h>
-#include <linux/init.h>
#include <linux/interrupt.h>
-#include <linux/kobject.h>
#include <linux/notifier.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
+#include <linux/kobject.h>
+#include <linux/percpu.h>
#include <linux/sysdev.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
#include <linux/sysfs.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+
#include <asm/apic.h>
+#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
-#include <asm/percpu.h>
-#include <asm/idle.h>
#define PFX "mce_threshold: "
#define VERSION "version 1.1.1"
@@ -48,26 +48,26 @@
#define MCG_XBLK_ADDR 0xC0000400
struct threshold_block {
- unsigned int block;
- unsigned int bank;
- unsigned int cpu;
- u32 address;
- u16 interrupt_enable;
- u16 threshold_limit;
- struct kobject kobj;
- struct list_head miscj;
+ unsigned int block;
+ unsigned int bank;
+ unsigned int cpu;
+ u32 address;
+ u16 interrupt_enable;
+ u16 threshold_limit;
+ struct kobject kobj;
+ struct list_head miscj;
};
/* defaults used early on boot */
static struct threshold_block threshold_defaults = {
- .interrupt_enable = 0,
- .threshold_limit = THRESHOLD_MAX,
+ .interrupt_enable = 0,
+ .threshold_limit = THRESHOLD_MAX,
};
struct threshold_bank {
- struct kobject *kobj;
- struct threshold_block *blocks;
- cpumask_var_t cpus;
+ struct kobject *kobj;
+ struct threshold_block *blocks;
+ cpumask_var_t cpus;
};
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
*/
struct thresh_restart {
- struct threshold_block *b;
- int reset;
- u16 old_limit;
+ struct threshold_block *b;
+ int reset;
+ u16 old_limit;
};
/* must be called with correct cpu affinity */
@@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr)
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (mci_misc_hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
+
mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
(new_count & THRESHOLD_MAX);
}
@@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr)
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
- unsigned int bank, block;
unsigned int cpu = smp_processor_id();
- u8 lvt_off;
u32 low = 0, high = 0, address = 0;
+ unsigned int bank, block;
struct thresh_restart tr;
+ u8 lvt_off;
for (bank = 0; bank < NR_BANKS; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!address)
break;
address += MCG_XBLK_ADDR;
- }
- else
+ } else
++address;
if (rdmsr_safe(address, &low, &high))
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
*/
static void amd_threshold_interrupt(void)
{
+ u32 low = 0, high = 0, address = 0;
unsigned int bank, block;
struct mce m;
- u32 low = 0, high = 0, address = 0;
mce_setup(&m);
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
- if (block == 0)
+ if (block == 0) {
address = MSR_IA32_MC0_MISC + bank * 4;
- else if (block == 1) {
+ } else if (block == 1) {
address = (low & MASK_BLKPTR_LO) >> 21;
if (!address)
break;
address += MCG_XBLK_ADDR;
- }
- else
+ } else {
++address;
+ }
if (rdmsr_safe(address, &low, &high))
break;
@@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void)
(high & MASK_LOCKED_HI))
continue;
- /* Log the machine check that caused the threshold
- event. */
+ /*
+ * Log the machine check that caused the threshold
+ * event.
+ */
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
@@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void)
struct threshold_attr {
struct attribute attr;
- ssize_t(*show) (struct threshold_block *, char *);
- ssize_t(*store) (struct threshold_block *, const char *, size_t count);
+ ssize_t (*show) (struct threshold_block *, char *);
+ ssize_t (*store) (struct threshold_block *, const char *, size_t count);
};
-#define SHOW_FIELDS(name) \
-static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
-{ \
- return sprintf(buf, "%lx\n", (unsigned long) b->name); \
+#define SHOW_FIELDS(name) \
+static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
+{ \
+ return sprintf(buf, "%lx\n", (unsigned long) b->name); \
}
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)
-static ssize_t store_interrupt_enable(struct threshold_block *b,
- const char *buf, size_t count)
+static ssize_t
+store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
{
- char *end;
struct thresh_restart tr;
- unsigned long new = simple_strtoul(buf, &end, 0);
- if (end == buf)
+ unsigned long new;
+
+ if (strict_strtoul(buf, 0, &new) < 0)
return -EINVAL;
+
b->interrupt_enable = !!new;
- tr.b = b;
- tr.reset = 0;
- tr.old_limit = 0;
+ tr.b = b;
+ tr.reset = 0;
+ tr.old_limit = 0;
+
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
- return end - buf;
+ return size;
}
-static ssize_t store_threshold_limit(struct threshold_block *b,
- const char *buf, size_t count)
+static ssize_t
+store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
{
- char *end;
struct thresh_restart tr;
- unsigned long new = simple_strtoul(buf, &end, 0);
- if (end == buf)
+ unsigned long new;
+
+ if (strict_strtoul(buf, 0, &new) < 0)
return -EINVAL;
+
if (new > THRESHOLD_MAX)
new = THRESHOLD_MAX;
if (new < 1)
new = 1;
+
tr.old_limit = b->threshold_limit;
b->threshold_limit = new;
tr.b = b;
@@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
- return end - buf;
+ return size;
}
struct threshold_block_cross_cpu {
- struct threshold_block *tb;
- long retval;
+ struct threshold_block *tb;
+ long retval;
};
static void local_error_count_handler(void *_tbcc)
@@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b,
return 1;
}
-#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
+#define RW_ATTR(val) \
+static struct threshold_attr val = { \
+ .attr = {.name = __stringify(val), .mode = 0644 }, \
+ .show = show_## val, \
+ .store = store_## val, \
};
-#define RW_ATTR(name) \
-static struct threshold_attr name = \
- THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
-
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
RW_ATTR(error_count);
@@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = {
NULL
};
-#define to_block(k) container_of(k, struct threshold_block, kobj)
-#define to_attr(a) container_of(a, struct threshold_attr, attr)
+#define to_block(k) container_of(k, struct threshold_block, kobj)
+#define to_attr(a) container_of(a, struct threshold_attr, attr)
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct threshold_block *b = to_block(kobj);
struct threshold_attr *a = to_attr(attr);
ssize_t ret;
+
ret = a->show ? a->show(b, buf) : -EIO;
+
return ret;
}
@@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct threshold_block *b = to_block(kobj);
struct threshold_attr *a = to_attr(attr);
ssize_t ret;
+
ret = a->store ? a->store(b, buf, count) : -EIO;
+
return ret;
}
static struct sysfs_ops threshold_ops = {
- .show = show,
- .store = store,
+ .show = show,
+ .store = store,
};
static struct kobj_type threshold_ktype = {
- .sysfs_ops = &threshold_ops,
- .default_attrs = default_attrs,
+ .sysfs_ops = &threshold_ops,
+ .default_attrs = default_attrs,
};
static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
@@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
unsigned int block,
u32 address)
{
- int err;
- u32 low, high;
struct threshold_block *b = NULL;
+ u32 low, high;
+ int err;
if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
return 0;
@@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
if (!b)
return -ENOMEM;
- b->block = block;
- b->bank = bank;
- b->cpu = cpu;
- b->address = address;
- b->interrupt_enable = 0;
- b->threshold_limit = THRESHOLD_MAX;
+ b->block = block;
+ b->bank = bank;
+ b->cpu = cpu;
+ b->address = address;
+ b->interrupt_enable = 0;
+ b->threshold_limit = THRESHOLD_MAX;
INIT_LIST_HEAD(&b->miscj);
- if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+ if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
list_add(&b->miscj,
&per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
- else
+ } else {
per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+ }
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
per_cpu(threshold_banks, cpu)[bank]->kobj,
@@ -447,8 +455,9 @@ recurse:
if (!address)
return 0;
address += MCG_XBLK_ADDR;
- } else
+ } else {
++address;
+ }
err = allocate_threshold_blocks(cpu, bank, ++block, address);
if (err)
@@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (!b)
goto out;
- err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
+ err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
b->kobj, name);
if (err)
goto out;
cpumask_copy(b->cpus, cpu_core_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
+
goto out;
}
#endif
@@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
- b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
+ b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
if (!b->kobj)
goto out_free;
@@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;
- err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+ err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
b->kobj, name);
if (err)
goto out;
@@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu,
static void threshold_remove_bank(unsigned int cpu, int bank)
{
- int i = 0;
struct threshold_bank *b;
char name[32];
+ int i = 0;
b = per_cpu(threshold_banks, cpu)[bank];
-
if (!b)
return;
-
if (!b->blocks)
goto free_out;
@@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
+
return;
}
#endif
@@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;
- sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
}
@@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu)
}
/* get notified when a cpu comes on/off */
-static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
- unsigned int cpu)
+static void __cpuinit
+amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
{
- if (cpu >= NR_CPUS)
- return;
-
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -686,11 +692,12 @@ static __init int threshold_init_device(void)
/* to hit CPUs online before the notifier is up */
for_each_online_cpu(lcpu) {
int err = threshold_create_device(lcpu);
+
if (err)
return err;
}
threshold_cpu_callback = amd_64_threshold_cpu_callback;
+
return 0;
}
-
device_initcall(threshold_init_device);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
new file mode 100644
index 00000000000..2b011d2d857
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -0,0 +1,74 @@
+/*
+ * Common code for Intel machine checks
+ */
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/therm_throt.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/apic.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+ unsigned int cpu = smp_processor_id();
+ int tm2 = 0;
+ u32 l, h;
+
+ /* Thermal monitoring depends on ACPI and clock modulation*/
+ if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ return;
+
+ /*
+ * First check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already:
+ */
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ h = apic_read(APIC_LVTTHMR);
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+ printk(KERN_DEBUG
+ "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ return;
+ }
+
+ if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+ tm2 = 1;
+
+ /* Check whether a vector already exists */
+ if (h & APIC_VECTOR_MASK) {
+ printk(KERN_DEBUG
+ "CPU%d: Thermal LVT vector (%#x) already installed\n",
+ cpu, (h & APIC_VECTOR_MASK));
+ return;
+ }
+
+ /* We'll mask the thermal vector in the lapic till we're ready: */
+ h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+ apic_write(APIC_LVTTHMR, h);
+
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+
+ intel_set_thermal_handler();
+
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+ /* Unmask the thermal vector: */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+ printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+ cpu, tm2 ? "TM2" : "TM1");
+
+ /* enable thermal throttle processing */
+ atomic_set(&therm_throt_en, 1);
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 65a0fceedcd..f2ef6952c40 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -16,6 +16,8 @@
#include <asm/idle.h>
#include <asm/therm_throt.h>
+#include "mce.h"
+
asmlinkage void smp_thermal_interrupt(void)
{
__u64 msr_val;
@@ -26,67 +28,13 @@ asmlinkage void smp_thermal_interrupt(void)
irq_enter();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & 1))
+ if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
mce_log_therm_throt_event(msr_val);
inc_irq_stat(irq_thermal_count);
irq_exit();
}
-static void intel_init_thermal(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int tm2 = 0;
- unsigned int cpu = smp_processor_id();
-
- if (!cpu_has(c, X86_FEATURE_ACPI))
- return;
-
- if (!cpu_has(c, X86_FEATURE_ACC))
- return;
-
- /* first check if TM1 is already enabled by the BIOS, in which
- * case there might be some SMM goo which handles it, so we can't even
- * put a handler since it might be delivered via SMI already.
- */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
- if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG
- "CPU%d: Thermal monitoring handled by SMI\n", cpu);
- return;
- }
-
- if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
- tm2 = 1;
-
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG
- "CPU%d: Thermal LVT vector (%#x) already "
- "installed\n", cpu, (h & APIC_VECTOR_MASK));
- return;
- }
-
- h = THERMAL_APIC_VECTOR;
- h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
- apic_write(APIC_LVTTHMR, h);
-
- rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
-
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
-
- /* enable thermal throttle processing */
- atomic_set(&therm_throt_en, 1);
- return;
-}
-
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
@@ -108,6 +56,9 @@ static int cmci_supported(int *banks)
{
u64 cap;
+ if (mce_cmci_disabled || mce_ignore_ce)
+ return 0;
+
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
@@ -131,7 +82,7 @@ static int cmci_supported(int *banks)
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
- mce_notify_user();
+ mce_notify_irq();
}
static void print_update(char *type, int *hdr, int num)
@@ -247,7 +198,7 @@ void cmci_rediscover(int dying)
return;
cpumask_copy(old, &current->cpus_allowed);
- for_each_online_cpu (cpu) {
+ for_each_online_cpu(cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index a74af128efc..70b710420f7 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -6,15 +6,14 @@
* This file contains routines to check for non-fatal MCEs every 15s
*
*/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/workqueue.h>
#include <linux/interrupt.h>
-#include <linux/smp.h>
+#include <linux/workqueue.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -22,9 +21,9 @@
#include "mce.h"
-static int firstbank;
+static int firstbank;
-#define MCE_RATE 15*HZ /* timer rate is 15s */
+#define MCE_RATE (15*HZ) /* timer rate is 15s */
static void mce_checkregs(void *info)
{
@@ -34,23 +33,24 @@ static void mce_checkregs(void *info)
for (i = firstbank; i < nr_mce_banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
- if (high & (1<<31)) {
- printk(KERN_INFO "MCE: The hardware reports a non "
- "fatal, correctable incident occurred on "
- "CPU %d.\n",
+ if (!(high & (1<<31)))
+ continue;
+
+ printk(KERN_INFO "MCE: The hardware reports a non fatal, "
+ "correctable incident occurred on CPU %d.\n",
smp_processor_id());
- printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
- /*
- * Scrub the error so we don't pick it up in MCE_RATE
- * seconds time.
- */
- wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
+
+ printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+
+ /*
+ * Scrub the error so we don't pick it up in MCE_RATE
+ * seconds time:
+ */
+ wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+
+ /* Serialize: */
+ wmb();
+ add_taint(TAINT_MACHINE_CHECK);
}
}
@@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void)
/* Some Athlons misbehave when we frob bank 0 */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- firstbank = 1;
+ boot_cpu_data.x86 == 6)
+ firstbank = 1;
else
- firstbank = 0;
+ firstbank = 0;
/*
* Check for non-fatal errors every MCE_RATE s
*/
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
printk(KERN_INFO "Machine check exception polling timer started.\n");
+
return 0;
}
module_init(init_nonfatal_mce_checker);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index f53bdcbaf38..82cee108a2d 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -2,18 +2,17 @@
* P4 specific Machine Check Exception Reporting
*/
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
#include <linux/smp.h>
+#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
-#include <asm/msr.h>
#include <asm/apic.h>
-
-#include <asm/therm_throt.h>
+#include <asm/msr.h>
#include "mce.h"
@@ -36,6 +35,7 @@ static int mce_num_extended_msrs;
#ifdef CONFIG_X86_MCE_P4THERMAL
+
static void unexpected_thermal_interrupt(struct pt_regs *regs)
{
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
@@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
add_taint(TAINT_MACHINE_CHECK);
}
-/* P4/Xeon Thermal transition interrupt handler */
+/* P4/Xeon Thermal transition interrupt handler: */
static void intel_thermal_interrupt(struct pt_regs *regs)
{
__u64 msr_val;
@@ -51,11 +51,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- therm_throt_process(msr_val & 0x1);
+ therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
}
-/* Thermal interrupt handler for this CPU setup */
-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+/* Thermal interrupt handler for this CPU setup: */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
+ unexpected_thermal_interrupt;
void smp_thermal_interrupt(struct pt_regs *regs)
{
@@ -65,67 +66,15 @@ void smp_thermal_interrupt(struct pt_regs *regs)
irq_exit();
}
-/* P4/Xeon Thermal regulation detect and init */
-static void intel_init_thermal(struct cpuinfo_x86 *c)
+void intel_set_thermal_handler(void)
{
- u32 l, h;
- unsigned int cpu = smp_processor_id();
-
- /* Thermal monitoring */
- if (!cpu_has(c, X86_FEATURE_ACPI))
- return; /* -ENODEV */
-
- /* Clock modulation */
- if (!cpu_has(c, X86_FEATURE_ACC))
- return; /* -ENODEV */
-
- /* first check if its enabled already, in which case there might
- * be some SMM goo which handles it, so we can't even put a handler
- * since it might be delivered via SMI already -zwanem.
- */
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- h = apic_read(APIC_LVTTHMR);
- if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
- printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
- cpu);
- return; /* -EBUSY */
- }
-
- /* check whether a vector already exists, temporarily masked? */
- if (h & APIC_VECTOR_MASK) {
- printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
- "installed\n",
- cpu, (h & APIC_VECTOR_MASK));
- return; /* -EBUSY */
- }
-
- /* The temperature transition interrupt handler setup */
- h = THERMAL_APIC_VECTOR; /* our delivery vector */
- h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
- apic_write(APIC_LVTTHMR, h);
-
- rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
-
- /* ok we're good to go... */
vendor_thermal_interrupt = intel_thermal_interrupt;
-
- rdmsr(MSR_IA32_MISC_ENABLE, l, h);
- wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
- l = apic_read(APIC_LVTTHMR);
- apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
-
- /* enable thermal throttle processing */
- atomic_set(&therm_throt_en, 1);
- return;
}
-#endif /* CONFIG_X86_MCE_P4THERMAL */
+#endif /* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
{
u32 h;
@@ -143,9 +92,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
static void intel_machine_check(struct pt_regs *regs, long error_code)
{
- int recover = 1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
+ int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -157,7 +106,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
if (mce_num_extended_msrs > 0) {
struct intel_mce_extended_msrs dbg;
+
intel_get_extended_msrs(&dbg);
+
printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
@@ -171,6 +122,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
if (high & (1<<31)) {
char misc[20];
char addr[24];
+
misc[0] = addr[0] = '\0';
if (high & (1<<29))
recover |= 1;
@@ -196,6 +148,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
panic("Unable to continue");
printk(KERN_EMERG "Attempting to continue.\n");
+
/*
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
* recoverable/continuable.This will allow BIOS to look at the MSRs
@@ -217,7 +170,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
-
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index c9f77ea69ed..015f481ab1b 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -2,11 +2,10 @@
* P5 specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
*/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
@@ -15,39 +14,58 @@
#include "mce.h"
-/* Machine check handler for Pentium class Intel */
+/* By default disabled */
+int mce_p5_enable;
+
+/* Machine check handler for Pentium class Intel CPUs: */
static void pentium_machine_check(struct pt_regs *regs, long error_code)
{
u32 loaddr, hi, lotype;
+
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
- printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
- if (lotype&(1<<5))
- printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+
+ printk(KERN_EMERG
+ "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
+ smp_processor_id(), loaddr, lotype);
+
+ if (lotype & (1<<5)) {
+ printk(KERN_EMERG
+ "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+ smp_processor_id());
+ }
+
add_taint(TAINT_MACHINE_CHECK);
}
-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
- /*Check for MCE support */
+ /* Check for MCE support: */
if (!cpu_has(c, X86_FEATURE_MCE))
return;
- /* Default P5 to off as its often misconnected */
+#ifdef CONFIG_X86_OLD_MCE
+ /* Default P5 to off as its often misconnected: */
if (mce_disabled != -1)
return;
+#endif
+
machine_check_vector = pentium_machine_check;
+ /* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
- /* Read registers before enabling */
+ /* Read registers before enabling: */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
- printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+ printk(KERN_INFO
+ "Intel old style machine check architecture supported.\n");
- /* Enable MCE */
+ /* Enable MCE: */
set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+ printk(KERN_INFO
+ "Intel old style machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 2ac52d7b434..43c24e66745 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -2,11 +2,10 @@
* P6 specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
*/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
#include <linux/smp.h>
#include <asm/processor.h>
@@ -18,9 +17,9 @@
/* Machine Check Handler For PII/PIII */
static void intel_machine_check(struct pt_regs *regs, long error_code)
{
- int recover = 1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
+ int recover = 1;
int i;
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
@@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
if (high & (1<<31)) {
char misc[20];
char addr[24];
- misc[0] = addr[0] = '\0';
+
+ misc[0] = '\0';
+ addr[0] = '\0';
+
if (high & (1<<29))
recover |= 1;
if (high & (1<<25))
recover |= 2;
high &= ~(1<<31);
+
if (high & (1<<27)) {
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
@@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
}
+
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
smp_processor_id(), i, high, low, misc, addr);
}
@@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
/*
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
* recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error.
+ * for errors if the OS could not log the error:
*/
for (i = 0; i < nr_mce_banks; i++) {
unsigned int msr;
+
msr = MSR_IA32_MC0_STATUS+i*4;
rdmsr(msr, low, high);
if (high & (1<<31)) {
- /* Clear it */
+ /* Clear it: */
wrmsr(msr, 0UL, 0UL);
- /* Serialize */
+ /* Serialize: */
wmb();
add_taint(TAINT_MACHINE_CHECK);
}
@@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
}
-/* Set up machine check reporting for processors with Intel style MCE */
+/* Set up machine check reporting for processors with Intel style MCE: */
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
/* Ok machine check is available */
machine_check_vector = intel_machine_check;
+ /* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
printk(KERN_INFO "Intel machine check architecture supported.\n");
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d5ae2243f0b..7b1ae2e20ba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -1,7 +1,7 @@
/*
- *
* Thermal throttle event support code (such as syslog messaging and rate
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ *
* This allows consistent reporting of CPU thermal throttle events.
*
* Maintains a counter in /sys that keeps track of the number of thermal
@@ -13,43 +13,43 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code.
*/
-
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
#include <linux/percpu.h>
#include <linux/sysdev.h>
#include <linux/cpu.h>
-#include <asm/cpu.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
+
#include <asm/therm_throt.h>
/* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL (300 * HZ)
+#define CHECK_INTERVAL (300 * HZ)
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
-#define define_therm_throt_sysdev_one_ro(_name) \
- static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
-
-#define define_therm_throt_sysdev_show_func(name) \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
- char *buf) \
-{ \
- unsigned int cpu = dev->id; \
- ssize_t ret; \
- \
- preempt_disable(); /* CPU hotplug */ \
- if (cpu_online(cpu)) \
- ret = sprintf(buf, "%lu\n", \
- per_cpu(thermal_throttle_##name, cpu)); \
- else \
- ret = 0; \
- preempt_enable(); \
- \
- return ret; \
+#define define_therm_throt_sysdev_one_ro(_name) \
+ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+
+#define define_therm_throt_sysdev_show_func(name) \
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
+{ \
+ unsigned int cpu = dev->id; \
+ ssize_t ret; \
+ \
+ preempt_disable(); /* CPU hotplug */ \
+ if (cpu_online(cpu)) \
+ ret = sprintf(buf, "%lu\n", \
+ per_cpu(thermal_throttle_##name, cpu)); \
+ else \
+ ret = 0; \
+ preempt_enable(); \
+ \
+ return ret; \
}
define_therm_throt_sysdev_show_func(count);
@@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = {
};
static struct attribute_group thermal_throttle_attr_group = {
- .attrs = thermal_throttle_attrs,
- .name = "thermal_throttle"
+ .attrs = thermal_throttle_attrs,
+ .name = "thermal_throttle"
};
#endif /* CONFIG_SYSFS */
@@ -110,10 +110,11 @@ int therm_throt_process(int curr)
}
#ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device */
+/* Add/Remove thermal_throttle interface for CPU device: */
static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
{
- return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+ return sysfs_create_group(&sys_dev->kobj,
+ &thermal_throttle_attr_group);
}
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
@@ -121,19 +122,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
}
-/* Mutex protecting device creation against CPU hotplug */
+/* Mutex protecting device creation against CPU hotplug: */
static DEFINE_MUTEX(therm_cpu_lock);
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static __cpuinit int
+thermal_throttle_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
int err = 0;
sys_dev = get_cpu_sysdev(cpu);
+
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 23ee9e730f7..d746df2909c 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage void mce_threshold_interrupt(void)
+asmlinkage void smp_threshold_interrupt(void)
{
exit_idle();
irq_enter();
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2a043d89811..81b02487090 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -2,11 +2,10 @@
* IDT Winchip specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
*/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -14,7 +13,7 @@
#include "mce.h"
-/* Machine check handler for WinChip C6 */
+/* Machine check handler for WinChip C6: */
static void winchip_machine_check(struct pt_regs *regs, long error_code)
{
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
@@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
+
machine_check_vector = winchip_machine_check;
+ /* Make sure the vector pointer is visible before we enable MCEs: */
wmb();
+
rdmsr(MSR_IDT_FCR1, lo, hi);
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
lo &= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
+
set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+
+ printk(KERN_INFO
+ "Winchip machine check reporting enabled on CPU#0.\n");
}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a4742a340d8..de74f0a3e0e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -963,6 +963,8 @@ END(\sym)
#ifdef CONFIG_SMP
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+ reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
@@ -994,10 +996,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
- threshold_interrupt mce_threshold_interrupt
+ threshold_interrupt smp_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+ mce_self_interrupt smp_mce_self_interrupt
+#endif
+
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
@@ -1379,7 +1386,7 @@ errorentry xen_stack_segment do_stack_segment
errorentry general_protection do_general_protection
errorentry page_fault do_page_fault
#ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
#endif
/*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 38287b5f116..b0cdde6932f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -12,6 +12,7 @@
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/idle.h>
+#include <asm/mce.h>
#include <asm/hw_irq.h>
atomic_t irq_err_count;
@@ -96,13 +97,23 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
+#ifdef CONFIG_X86_NEW_MCE
+ seq_printf(p, "%*s: ", prec, "MCE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+ seq_printf(p, " Machine check exceptions\n");
+ seq_printf(p, "%*s: ", prec, "MCP");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+ seq_printf(p, " Machine check polls\n");
+#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
@@ -185,10 +196,14 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#endif
#ifdef CONFIG_X86_MCE
sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+# ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
# endif
#endif
+#ifdef CONFIG_X86_NEW_MCE
+ sum += per_cpu(mce_exception_count, cpu);
+ sum += per_cpu(mce_poll_count, cpu);
+#endif
return sum;
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 267c6624c77..696f0e475c2 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -173,6 +173,9 @@ static void __init smp_intr_init(void)
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+
+ /* IPI used for rebooting/stopping */
+ alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
#endif
#endif /* CONFIG_SMP */
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 0a813b17b17..4c578751e94 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -24,11 +24,11 @@
#include <asm/ucontext.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+#include <asm/mce.h>
#ifdef CONFIG_X86_64
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
-#include <asm/mce.h>
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
@@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#ifdef CONFIG_X86_NEW_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
- mce_notify_user();
+ mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 28f5fb495a6..ec1de97600e 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask)
* this function calls the 'stop' function on all other CPUs in the system.
*/
+asmlinkage void smp_reboot_interrupt(void)
+{
+ ack_APIC_irq();
+ irq_enter();
+ stop_this_cpu(NULL);
+ irq_exit();
+}
+
static void native_smp_send_stop(void)
{
unsigned long flags;
+ unsigned long wait;
if (reboot_force)
return;
- smp_call_function(stop_this_cpu, NULL, 0);
+ /*
+ * Use an own vector here because smp_call_function
+ * does lots of things not suitable in a panic situation.
+ * On most systems we could also use an NMI here,
+ * but there are a few systems around where NMI
+ * is problematic so stay with an non NMI for now
+ * (this implies we cannot stop CPUs spinning with irq off
+ * currently)
+ */
+ if (num_online_cpus() > 1) {
+ apic->send_IPI_allbutself(REBOOT_VECTOR);
+
+ /* Don't wait longer than a second */
+ wait = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && wait--)
+ udelay(1);
+ }
+
local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 07d60c870ce..1e1e27b7d43 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
return new_kesp;
}
-#else
+#endif
+
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
-#endif
/*
* 'math_state_restore()' saves the current math information in the
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 74d0e622a51..4dfdd03e708 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -241,6 +241,11 @@ config CRYPTO_XTS
key size 256, 384 or 512 bits. This implementation currently
can't handle a sectorsize which is not a multiple of 16 bytes.
+config CRYPTO_FPU
+ tristate
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_MANAGER
+
comment "Hash modes"
config CRYPTO_HMAC
@@ -486,6 +491,7 @@ config CRYPTO_AES_NI_INTEL
select CRYPTO_AES_X86_64
select CRYPTO_CRYPTD
select CRYPTO_ALGAPI
+ select CRYPTO_FPU
help
Use Intel AES-NI instructions for AES algorithm.
@@ -505,6 +511,10 @@ config CRYPTO_AES_NI_INTEL
See <http://csrc.nist.gov/encryption/aes/> for more information.
+ In addition to AES cipher algorithm support, the
+ acceleration for some popular block cipher mode is supported
+ too, including ECB, CBC, CTR, LRW, PCBC, XTS.
+
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 6906f92aeac..9908dd830c2 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -280,29 +280,13 @@ static struct notifier_block cryptomgr_notifier = {
static int __init cryptomgr_init(void)
{
- int err;
-
- err = testmgr_init();
- if (err)
- return err;
-
- err = crypto_register_notifier(&cryptomgr_notifier);
- if (err)
- goto free_testmgr;
-
- return 0;
-
-free_testmgr:
- testmgr_exit();
- return err;
+ return crypto_register_notifier(&cryptomgr_notifier);
}
static void __exit cryptomgr_exit(void)
{
int err = crypto_unregister_notifier(&cryptomgr_notifier);
BUG_ON(err);
-
- testmgr_exit();
}
subsys_initcall(cryptomgr_init);
diff --git a/crypto/api.c b/crypto/api.c
index fd2545decb2..d5944f92b41 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -217,14 +217,11 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
alg = crypto_alg_lookup(name, type, mask);
if (!alg) {
- char tmp[CRYPTO_MAX_ALG_NAME];
-
- request_module(name);
+ request_module("%s", name);
if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
- CRYPTO_ALG_NEED_FALLBACK) &&
- snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp))
- request_module(tmp);
+ CRYPTO_ALG_NEED_FALLBACK))
+ request_module("%s-all", name);
alg = crypto_alg_lookup(name, type, mask);
}
@@ -580,20 +577,17 @@ EXPORT_SYMBOL_GPL(crypto_alloc_tfm);
void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm)
{
struct crypto_alg *alg;
- int size;
if (unlikely(!mem))
return;
alg = tfm->__crt_alg;
- size = ksize(mem);
if (!tfm->exit && alg->cra_exit)
alg->cra_exit(tfm);
crypto_exit_ops(tfm);
crypto_mod_put(alg);
- memset(mem, 0, size);
- kfree(mem);
+ kzfree(mem);
}
EXPORT_SYMBOL_GPL(crypto_destroy_tfm);
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index d14b22658d7..ae5fa99d5d3 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -586,20 +586,24 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
u32 type, u32 mask)
{
char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
- struct crypto_ablkcipher *tfm;
+ struct crypto_tfm *tfm;
if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
"cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
return ERR_PTR(-EINVAL);
- tfm = crypto_alloc_ablkcipher(cryptd_alg_name, type, mask);
+ type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
+ type |= CRYPTO_ALG_TYPE_BLKCIPHER;
+ mask &= ~CRYPTO_ALG_TYPE_MASK;
+ mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK);
+ tfm = crypto_alloc_base(cryptd_alg_name, type, mask);
if (IS_ERR(tfm))
return ERR_CAST(tfm);
- if (crypto_ablkcipher_tfm(tfm)->__crt_alg->cra_module != THIS_MODULE) {
- crypto_free_ablkcipher(tfm);
+ if (tfm->__crt_alg->cra_module != THIS_MODULE) {
+ crypto_free_tfm(tfm);
return ERR_PTR(-EINVAL);
}
- return __cryptd_ablkcipher_cast(tfm);
+ return __cryptd_ablkcipher_cast(__crypto_ablkcipher_cast(tfm));
}
EXPORT_SYMBOL_GPL(cryptd_alloc_ablkcipher);
diff --git a/crypto/internal.h b/crypto/internal.h
index fc76e1f37fc..113579a82df 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -121,9 +121,6 @@ int crypto_register_notifier(struct notifier_block *nb);
int crypto_unregister_notifier(struct notifier_block *nb);
int crypto_probing_notify(unsigned long val, void *v);
-int __init testmgr_init(void);
-void testmgr_exit(void);
-
static inline void crypto_alg_put(struct crypto_alg *alg)
{
if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
diff --git a/crypto/pcompress.c b/crypto/pcompress.c
index ca9a4af91ef..bcadc03726b 100644
--- a/crypto/pcompress.c
+++ b/crypto/pcompress.c
@@ -26,6 +26,7 @@
#include <linux/string.h>
#include <crypto/compress.h>
+#include <crypto/internal/compress.h>
#include "internal.h"
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index c3c9124209a..d59ba5079d1 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -27,6 +27,7 @@
#include <linux/timex.h>
#include <linux/interrupt.h>
#include "tcrypt.h"
+#include "internal.h"
/*
* Need slab memory for testing (size in number of pages).
@@ -396,16 +397,16 @@ static void test_hash_speed(const char *algo, unsigned int sec,
struct scatterlist sg[TVMEMSIZE];
struct crypto_hash *tfm;
struct hash_desc desc;
- char output[1024];
+ static char output[1024];
int i;
int ret;
- printk("\ntesting speed of %s\n", algo);
+ printk(KERN_INFO "\ntesting speed of %s\n", algo);
tfm = crypto_alloc_hash(algo, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm)) {
- printk("failed to load transform for %s: %ld\n", algo,
+ printk(KERN_ERR "failed to load transform for %s: %ld\n", algo,
PTR_ERR(tfm));
return;
}
@@ -414,7 +415,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
desc.flags = 0;
if (crypto_hash_digestsize(tfm) > sizeof(output)) {
- printk("digestsize(%u) > outputbuffer(%zu)\n",
+ printk(KERN_ERR "digestsize(%u) > outputbuffer(%zu)\n",
crypto_hash_digestsize(tfm), sizeof(output));
goto out;
}
@@ -427,12 +428,14 @@ static void test_hash_speed(const char *algo, unsigned int sec,
for (i = 0; speed[i].blen != 0; i++) {
if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
- printk("template (%u) too big for tvmem (%lu)\n",
+ printk(KERN_ERR
+ "template (%u) too big for tvmem (%lu)\n",
speed[i].blen, TVMEMSIZE * PAGE_SIZE);
goto out;
}
- printk("test%3u (%5u byte blocks,%5u bytes per update,%4u updates): ",
+ printk(KERN_INFO "test%3u "
+ "(%5u byte blocks,%5u bytes per update,%4u updates): ",
i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
if (sec)
@@ -443,7 +446,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
speed[i].plen, output);
if (ret) {
- printk("hashing failed ret=%d\n", ret);
+ printk(KERN_ERR "hashing failed ret=%d\n", ret);
break;
}
}
@@ -466,239 +469,255 @@ static void test_available(void)
static inline int tcrypt_test(const char *alg)
{
- return alg_test(alg, alg, 0, 0);
+ int ret;
+
+ ret = alg_test(alg, alg, 0, 0);
+ /* non-fips algs return -EINVAL in fips mode */
+ if (fips_enabled && ret == -EINVAL)
+ ret = 0;
+ return ret;
}
-static void do_test(int m)
+static int do_test(int m)
{
int i;
+ int ret = 0;
switch (m) {
case 0:
for (i = 1; i < 200; i++)
- do_test(i);
+ ret += do_test(i);
break;
case 1:
- tcrypt_test("md5");
+ ret += tcrypt_test("md5");
break;
case 2:
- tcrypt_test("sha1");
+ ret += tcrypt_test("sha1");
break;
case 3:
- tcrypt_test("ecb(des)");
- tcrypt_test("cbc(des)");
+ ret += tcrypt_test("ecb(des)");
+ ret += tcrypt_test("cbc(des)");
break;
case 4:
- tcrypt_test("ecb(des3_ede)");
- tcrypt_test("cbc(des3_ede)");
+ ret += tcrypt_test("ecb(des3_ede)");
+ ret += tcrypt_test("cbc(des3_ede)");
break;
case 5:
- tcrypt_test("md4");
+ ret += tcrypt_test("md4");
break;
case 6:
- tcrypt_test("sha256");
+ ret += tcrypt_test("sha256");
break;
case 7:
- tcrypt_test("ecb(blowfish)");
- tcrypt_test("cbc(blowfish)");
+ ret += tcrypt_test("ecb(blowfish)");
+ ret += tcrypt_test("cbc(blowfish)");
break;
case 8:
- tcrypt_test("ecb(twofish)");
- tcrypt_test("cbc(twofish)");
+ ret += tcrypt_test("ecb(twofish)");
+ ret += tcrypt_test("cbc(twofish)");
break;
case 9:
- tcrypt_test("ecb(serpent)");
+ ret += tcrypt_test("ecb(serpent)");
break;
case 10:
- tcrypt_test("ecb(aes)");
- tcrypt_test("cbc(aes)");
- tcrypt_test("lrw(aes)");
- tcrypt_test("xts(aes)");
- tcrypt_test("rfc3686(ctr(aes))");
+ ret += tcrypt_test("ecb(aes)");
+ ret += tcrypt_test("cbc(aes)");
+ ret += tcrypt_test("lrw(aes)");
+ ret += tcrypt_test("xts(aes)");
+ ret += tcrypt_test("ctr(aes)");
+ ret += tcrypt_test("rfc3686(ctr(aes))");
break;
case 11:
- tcrypt_test("sha384");
+ ret += tcrypt_test("sha384");
break;
case 12:
- tcrypt_test("sha512");
+ ret += tcrypt_test("sha512");
break;
case 13:
- tcrypt_test("deflate");
+ ret += tcrypt_test("deflate");
break;
case 14:
- tcrypt_test("ecb(cast5)");
+ ret += tcrypt_test("ecb(cast5)");
break;
case 15:
- tcrypt_test("ecb(cast6)");
+ ret += tcrypt_test("ecb(cast6)");
break;
case 16:
- tcrypt_test("ecb(arc4)");
+ ret += tcrypt_test("ecb(arc4)");
break;
case 17:
- tcrypt_test("michael_mic");
+ ret += tcrypt_test("michael_mic");
break;
case 18:
- tcrypt_test("crc32c");
+ ret += tcrypt_test("crc32c");
break;
case 19:
- tcrypt_test("ecb(tea)");
+ ret += tcrypt_test("ecb(tea)");
break;
case 20:
- tcrypt_test("ecb(xtea)");
+ ret += tcrypt_test("ecb(xtea)");
break;
case 21:
- tcrypt_test("ecb(khazad)");
+ ret += tcrypt_test("ecb(khazad)");
break;
case 22:
- tcrypt_test("wp512");
+ ret += tcrypt_test("wp512");
break;
case 23:
- tcrypt_test("wp384");
+ ret += tcrypt_test("wp384");
break;
case 24:
- tcrypt_test("wp256");
+ ret += tcrypt_test("wp256");
break;
case 25:
- tcrypt_test("ecb(tnepres)");
+ ret += tcrypt_test("ecb(tnepres)");
break;
case 26:
- tcrypt_test("ecb(anubis)");
- tcrypt_test("cbc(anubis)");
+ ret += tcrypt_test("ecb(anubis)");
+ ret += tcrypt_test("cbc(anubis)");
break;
case 27:
- tcrypt_test("tgr192");
+ ret += tcrypt_test("tgr192");
break;
case 28:
- tcrypt_test("tgr160");
+ ret += tcrypt_test("tgr160");
break;
case 29:
- tcrypt_test("tgr128");
+ ret += tcrypt_test("tgr128");
break;
case 30:
- tcrypt_test("ecb(xeta)");
+ ret += tcrypt_test("ecb(xeta)");
break;
case 31:
- tcrypt_test("pcbc(fcrypt)");
+ ret += tcrypt_test("pcbc(fcrypt)");
break;
case 32:
- tcrypt_test("ecb(camellia)");
- tcrypt_test("cbc(camellia)");
+ ret += tcrypt_test("ecb(camellia)");
+ ret += tcrypt_test("cbc(camellia)");
break;
case 33:
- tcrypt_test("sha224");
+ ret += tcrypt_test("sha224");
break;
case 34:
- tcrypt_test("salsa20");
+ ret += tcrypt_test("salsa20");
break;
case 35:
- tcrypt_test("gcm(aes)");
+ ret += tcrypt_test("gcm(aes)");
break;
case 36:
- tcrypt_test("lzo");
+ ret += tcrypt_test("lzo");
break;
case 37:
- tcrypt_test("ccm(aes)");
+ ret += tcrypt_test("ccm(aes)");
break;
case 38:
- tcrypt_test("cts(cbc(aes))");
+ ret += tcrypt_test("cts(cbc(aes))");
break;
case 39:
- tcrypt_test("rmd128");
+ ret += tcrypt_test("rmd128");
break;
case 40:
- tcrypt_test("rmd160");
+ ret += tcrypt_test("rmd160");
break;
case 41:
- tcrypt_test("rmd256");
+ ret += tcrypt_test("rmd256");
break;
case 42:
- tcrypt_test("rmd320");
+ ret += tcrypt_test("rmd320");
break;
case 43:
- tcrypt_test("ecb(seed)");
+ ret += tcrypt_test("ecb(seed)");
break;
case 44:
- tcrypt_test("zlib");
+ ret += tcrypt_test("zlib");
+ break;
+
+ case 45:
+ ret += tcrypt_test("rfc4309(ccm(aes))");
break;
case 100:
- tcrypt_test("hmac(md5)");
+ ret += tcrypt_test("hmac(md5)");
break;
case 101:
- tcrypt_test("hmac(sha1)");
+ ret += tcrypt_test("hmac(sha1)");
break;
case 102:
- tcrypt_test("hmac(sha256)");
+ ret += tcrypt_test("hmac(sha256)");
break;
case 103:
- tcrypt_test("hmac(sha384)");
+ ret += tcrypt_test("hmac(sha384)");
break;
case 104:
- tcrypt_test("hmac(sha512)");
+ ret += tcrypt_test("hmac(sha512)");
break;
case 105:
- tcrypt_test("hmac(sha224)");
+ ret += tcrypt_test("hmac(sha224)");
break;
case 106:
- tcrypt_test("xcbc(aes)");
+ ret += tcrypt_test("xcbc(aes)");
break;
case 107:
- tcrypt_test("hmac(rmd128)");
+ ret += tcrypt_test("hmac(rmd128)");
break;
case 108:
- tcrypt_test("hmac(rmd160)");
+ ret += tcrypt_test("hmac(rmd160)");
+ break;
+
+ case 150:
+ ret += tcrypt_test("ansi_cprng");
break;
case 200:
@@ -862,6 +881,8 @@ static void do_test(int m)
test_available();
break;
}
+
+ return ret;
}
static int __init tcrypt_mod_init(void)
@@ -875,15 +896,21 @@ static int __init tcrypt_mod_init(void)
goto err_free_tv;
}
- do_test(mode);
+ err = do_test(mode);
+ if (err) {
+ printk(KERN_ERR "tcrypt: one or more tests failed!\n");
+ goto err_free_tv;
+ }
- /* We intentionaly return -EAGAIN to prevent keeping
- * the module. It does all its work from init()
- * and doesn't offer any runtime functionality
+ /* We intentionaly return -EAGAIN to prevent keeping the module,
+ * unless we're running in fips mode. It does all its work from
+ * init() and doesn't offer any runtime functionality, but in
+ * the fips case, checking for a successful load is helpful.
* => we don't need it in the memory, do we?
* -- mludvig
*/
- err = -EAGAIN;
+ if (!fips_enabled)
+ err = -EAGAIN;
err_free_tv:
for (i = 0; i < TVMEMSIZE && tvmem[i]; i++)
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b50c3c6b17a..e9e9d84293b 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -19,6 +19,7 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <crypto/rng.h>
#include "internal.h"
#include "testmgr.h"
@@ -84,10 +85,16 @@ struct hash_test_suite {
unsigned int count;
};
+struct cprng_test_suite {
+ struct cprng_testvec *vecs;
+ unsigned int count;
+};
+
struct alg_test_desc {
const char *alg;
int (*test)(const struct alg_test_desc *desc, const char *driver,
u32 type, u32 mask);
+ int fips_allowed; /* set if alg is allowed in fips mode */
union {
struct aead_test_suite aead;
@@ -95,14 +102,12 @@ struct alg_test_desc {
struct comp_test_suite comp;
struct pcomp_test_suite pcomp;
struct hash_test_suite hash;
+ struct cprng_test_suite cprng;
} suite;
};
static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 };
-static char *xbuf[XBUFSIZE];
-static char *axbuf[XBUFSIZE];
-
static void hexdump(unsigned char *buf, unsigned int len)
{
print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET,
@@ -121,6 +126,33 @@ static void tcrypt_complete(struct crypto_async_request *req, int err)
complete(&res->completion);
}
+static int testmgr_alloc_buf(char *buf[XBUFSIZE])
+{
+ int i;
+
+ for (i = 0; i < XBUFSIZE; i++) {
+ buf[i] = (void *)__get_free_page(GFP_KERNEL);
+ if (!buf[i])
+ goto err_free_buf;
+ }
+
+ return 0;
+
+err_free_buf:
+ while (i-- > 0)
+ free_page((unsigned long)buf[i]);
+
+ return -ENOMEM;
+}
+
+static void testmgr_free_buf(char *buf[XBUFSIZE])
+{
+ int i;
+
+ for (i = 0; i < XBUFSIZE; i++)
+ free_page((unsigned long)buf[i]);
+}
+
static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
unsigned int tcount)
{
@@ -130,8 +162,12 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
char result[64];
struct ahash_request *req;
struct tcrypt_result tresult;
- int ret;
void *hash_buff;
+ char *xbuf[XBUFSIZE];
+ int ret = -ENOMEM;
+
+ if (testmgr_alloc_buf(xbuf))
+ goto out_nobuf;
init_completion(&tresult.completion);
@@ -139,17 +175,25 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
if (!req) {
printk(KERN_ERR "alg: hash: Failed to allocate request for "
"%s\n", algo);
- ret = -ENOMEM;
goto out_noreq;
}
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
tcrypt_complete, &tresult);
+ j = 0;
for (i = 0; i < tcount; i++) {
+ if (template[i].np)
+ continue;
+
+ j++;
memset(result, 0, 64);
hash_buff = xbuf[0];
+ ret = -EINVAL;
+ if (WARN_ON(template[i].psize > PAGE_SIZE))
+ goto out;
+
memcpy(hash_buff, template[i].plaintext, template[i].psize);
sg_init_one(&sg[0], hash_buff, template[i].psize);
@@ -159,7 +203,7 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
template[i].ksize);
if (ret) {
printk(KERN_ERR "alg: hash: setkey failed on "
- "test %d for %s: ret=%d\n", i + 1, algo,
+ "test %d for %s: ret=%d\n", j, algo,
-ret);
goto out;
}
@@ -181,14 +225,14 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
/* fall through */
default:
printk(KERN_ERR "alg: hash: digest failed on test %d "
- "for %s: ret=%d\n", i + 1, algo, -ret);
+ "for %s: ret=%d\n", j, algo, -ret);
goto out;
}
if (memcmp(result, template[i].digest,
crypto_ahash_digestsize(tfm))) {
printk(KERN_ERR "alg: hash: Test %d failed for %s\n",
- i + 1, algo);
+ j, algo);
hexdump(result, crypto_ahash_digestsize(tfm));
ret = -EINVAL;
goto out;
@@ -203,7 +247,11 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
temp = 0;
sg_init_table(sg, template[i].np);
+ ret = -EINVAL;
for (k = 0; k < template[i].np; k++) {
+ if (WARN_ON(offset_in_page(IDX[k]) +
+ template[i].tap[k] > PAGE_SIZE))
+ goto out;
sg_set_buf(&sg[k],
memcpy(xbuf[IDX[k] >> PAGE_SHIFT] +
offset_in_page(IDX[k]),
@@ -265,6 +313,8 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
out:
ahash_request_free(req);
out_noreq:
+ testmgr_free_buf(xbuf);
+out_nobuf:
return ret;
}
@@ -273,7 +323,7 @@ static int test_aead(struct crypto_aead *tfm, int enc,
{
const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
unsigned int i, j, k, n, temp;
- int ret = 0;
+ int ret = -ENOMEM;
char *q;
char *key;
struct aead_request *req;
@@ -285,6 +335,13 @@ static int test_aead(struct crypto_aead *tfm, int enc,
void *input;
void *assoc;
char iv[MAX_IVLEN];
+ char *xbuf[XBUFSIZE];
+ char *axbuf[XBUFSIZE];
+
+ if (testmgr_alloc_buf(xbuf))
+ goto out_noxbuf;
+ if (testmgr_alloc_buf(axbuf))
+ goto out_noaxbuf;
if (enc == ENCRYPT)
e = "encryption";
@@ -297,7 +354,6 @@ static int test_aead(struct crypto_aead *tfm, int enc,
if (!req) {
printk(KERN_ERR "alg: aead: Failed to allocate request for "
"%s\n", algo);
- ret = -ENOMEM;
goto out;
}
@@ -314,6 +370,11 @@ static int test_aead(struct crypto_aead *tfm, int enc,
input = xbuf[0];
assoc = axbuf[0];
+ ret = -EINVAL;
+ if (WARN_ON(template[i].ilen > PAGE_SIZE ||
+ template[i].alen > PAGE_SIZE))
+ goto out;
+
memcpy(input, template[i].input, template[i].ilen);
memcpy(assoc, template[i].assoc, template[i].alen);
if (template[i].iv)
@@ -363,6 +424,16 @@ static int test_aead(struct crypto_aead *tfm, int enc,
switch (ret) {
case 0:
+ if (template[i].novrfy) {
+ /* verification was supposed to fail */
+ printk(KERN_ERR "alg: aead: %s failed "
+ "on test %d for %s: ret was 0, "
+ "expected -EBADMSG\n",
+ e, j, algo);
+ /* so really, we got a bad message */
+ ret = -EBADMSG;
+ goto out;
+ }
break;
case -EINPROGRESS:
case -EBUSY:
@@ -372,6 +443,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
INIT_COMPLETION(result.completion);
break;
}
+ case -EBADMSG:
+ if (template[i].novrfy)
+ /* verification failure was expected */
+ continue;
/* fall through */
default:
printk(KERN_ERR "alg: aead: %s failed on test "
@@ -459,7 +534,11 @@ static int test_aead(struct crypto_aead *tfm, int enc,
}
sg_init_table(asg, template[i].anp);
+ ret = -EINVAL;
for (k = 0, temp = 0; k < template[i].anp; k++) {
+ if (WARN_ON(offset_in_page(IDX[k]) +
+ template[i].atap[k] > PAGE_SIZE))
+ goto out;
sg_set_buf(&asg[k],
memcpy(axbuf[IDX[k] >> PAGE_SHIFT] +
offset_in_page(IDX[k]),
@@ -481,6 +560,16 @@ static int test_aead(struct crypto_aead *tfm, int enc,
switch (ret) {
case 0:
+ if (template[i].novrfy) {
+ /* verification was supposed to fail */
+ printk(KERN_ERR "alg: aead: %s failed "
+ "on chunk test %d for %s: ret "
+ "was 0, expected -EBADMSG\n",
+ e, j, algo);
+ /* so really, we got a bad message */
+ ret = -EBADMSG;
+ goto out;
+ }
break;
case -EINPROGRESS:
case -EBUSY:
@@ -490,6 +579,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
INIT_COMPLETION(result.completion);
break;
}
+ case -EBADMSG:
+ if (template[i].novrfy)
+ /* verification failure was expected */
+ continue;
/* fall through */
default:
printk(KERN_ERR "alg: aead: %s failed on "
@@ -546,6 +639,10 @@ static int test_aead(struct crypto_aead *tfm, int enc,
out:
aead_request_free(req);
+ testmgr_free_buf(axbuf);
+out_noaxbuf:
+ testmgr_free_buf(xbuf);
+out_noxbuf:
return ret;
}
@@ -554,10 +651,14 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
{
const char *algo = crypto_tfm_alg_driver_name(crypto_cipher_tfm(tfm));
unsigned int i, j, k;
- int ret;
char *q;
const char *e;
void *data;
+ char *xbuf[XBUFSIZE];
+ int ret = -ENOMEM;
+
+ if (testmgr_alloc_buf(xbuf))
+ goto out_nobuf;
if (enc == ENCRYPT)
e = "encryption";
@@ -571,6 +672,10 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
j++;
+ ret = -EINVAL;
+ if (WARN_ON(template[i].ilen > PAGE_SIZE))
+ goto out;
+
data = xbuf[0];
memcpy(data, template[i].input, template[i].ilen);
@@ -611,6 +716,8 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
ret = 0;
out:
+ testmgr_free_buf(xbuf);
+out_nobuf:
return ret;
}
@@ -620,7 +727,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
const char *algo =
crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
unsigned int i, j, k, n, temp;
- int ret;
char *q;
struct ablkcipher_request *req;
struct scatterlist sg[8];
@@ -628,6 +734,11 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
struct tcrypt_result result;
void *data;
char iv[MAX_IVLEN];
+ char *xbuf[XBUFSIZE];
+ int ret = -ENOMEM;
+
+ if (testmgr_alloc_buf(xbuf))
+ goto out_nobuf;
if (enc == ENCRYPT)
e = "encryption";
@@ -640,7 +751,6 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
if (!req) {
printk(KERN_ERR "alg: skcipher: Failed to allocate request "
"for %s\n", algo);
- ret = -ENOMEM;
goto out;
}
@@ -657,6 +767,10 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
if (!(template[i].np)) {
j++;
+ ret = -EINVAL;
+ if (WARN_ON(template[i].ilen > PAGE_SIZE))
+ goto out;
+
data = xbuf[0];
memcpy(data, template[i].input, template[i].ilen);
@@ -825,6 +939,8 @@ static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
out:
ablkcipher_request_free(req);
+ testmgr_free_buf(xbuf);
+out_nobuf:
return ret;
}
@@ -837,7 +953,8 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
int ret;
for (i = 0; i < ctcount; i++) {
- int ilen, dlen = COMP_BUF_SIZE;
+ int ilen;
+ unsigned int dlen = COMP_BUF_SIZE;
memset(result, 0, sizeof (result));
@@ -869,7 +986,8 @@ static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
}
for (i = 0; i < dtcount; i++) {
- int ilen, dlen = COMP_BUF_SIZE;
+ int ilen;
+ unsigned int dlen = COMP_BUF_SIZE;
memset(result, 0, sizeof (result));
@@ -914,24 +1032,25 @@ static int test_pcomp(struct crypto_pcomp *tfm,
const char *algo = crypto_tfm_alg_driver_name(crypto_pcomp_tfm(tfm));
unsigned int i;
char result[COMP_BUF_SIZE];
- int error;
+ int res;
for (i = 0; i < ctcount; i++) {
struct comp_request req;
+ unsigned int produced = 0;
- error = crypto_compress_setup(tfm, ctemplate[i].params,
- ctemplate[i].paramsize);
- if (error) {
+ res = crypto_compress_setup(tfm, ctemplate[i].params,
+ ctemplate[i].paramsize);
+ if (res) {
pr_err("alg: pcomp: compression setup failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
- error = crypto_compress_init(tfm);
- if (error) {
+ res = crypto_compress_init(tfm);
+ if (res) {
pr_err("alg: pcomp: compression init failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
memset(result, 0, sizeof(result));
@@ -941,32 +1060,37 @@ static int test_pcomp(struct crypto_pcomp *tfm,
req.next_out = result;
req.avail_out = ctemplate[i].outlen / 2;
- error = crypto_compress_update(tfm, &req);
- if (error && (error != -EAGAIN || req.avail_in)) {
+ res = crypto_compress_update(tfm, &req);
+ if (res < 0 && (res != -EAGAIN || req.avail_in)) {
pr_err("alg: pcomp: compression update failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ if (res > 0)
+ produced += res;
/* Add remaining input data */
req.avail_in += (ctemplate[i].inlen + 1) / 2;
- error = crypto_compress_update(tfm, &req);
- if (error && (error != -EAGAIN || req.avail_in)) {
+ res = crypto_compress_update(tfm, &req);
+ if (res < 0 && (res != -EAGAIN || req.avail_in)) {
pr_err("alg: pcomp: compression update failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ if (res > 0)
+ produced += res;
/* Provide remaining output space */
req.avail_out += COMP_BUF_SIZE - ctemplate[i].outlen / 2;
- error = crypto_compress_final(tfm, &req);
- if (error) {
+ res = crypto_compress_final(tfm, &req);
+ if (res < 0) {
pr_err("alg: pcomp: compression final failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ produced += res;
if (COMP_BUF_SIZE - req.avail_out != ctemplate[i].outlen) {
pr_err("alg: comp: Compression test %d failed for %s: "
@@ -976,6 +1100,13 @@ static int test_pcomp(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ if (produced != ctemplate[i].outlen) {
+ pr_err("alg: comp: Compression test %d failed for %s: "
+ "returned len = %u (expected %d)\n", i + 1,
+ algo, produced, ctemplate[i].outlen);
+ return -EINVAL;
+ }
+
if (memcmp(result, ctemplate[i].output, ctemplate[i].outlen)) {
pr_err("alg: pcomp: Compression test %d failed for "
"%s\n", i + 1, algo);
@@ -986,21 +1117,21 @@ static int test_pcomp(struct crypto_pcomp *tfm,
for (i = 0; i < dtcount; i++) {
struct comp_request req;
+ unsigned int produced = 0;
- error = crypto_decompress_setup(tfm, dtemplate[i].params,
- dtemplate[i].paramsize);
- if (error) {
+ res = crypto_decompress_setup(tfm, dtemplate[i].params,
+ dtemplate[i].paramsize);
+ if (res) {
pr_err("alg: pcomp: decompression setup failed on "
- "test %d for %s: error=%d\n", i + 1, algo,
- error);
- return error;
+ "test %d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
- error = crypto_decompress_init(tfm);
- if (error) {
+ res = crypto_decompress_init(tfm);
+ if (res) {
pr_err("alg: pcomp: decompression init failed on test "
- "%d for %s: error=%d\n", i + 1, algo, error);
- return error;
+ "%d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
memset(result, 0, sizeof(result));
@@ -1010,35 +1141,38 @@ static int test_pcomp(struct crypto_pcomp *tfm,
req.next_out = result;
req.avail_out = dtemplate[i].outlen / 2;
- error = crypto_decompress_update(tfm, &req);
- if (error && (error != -EAGAIN || req.avail_in)) {
+ res = crypto_decompress_update(tfm, &req);
+ if (res < 0 && (res != -EAGAIN || req.avail_in)) {
pr_err("alg: pcomp: decompression update failed on "
- "test %d for %s: error=%d\n", i + 1, algo,
- error);
- return error;
+ "test %d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ if (res > 0)
+ produced += res;
/* Add remaining input data */
req.avail_in += (dtemplate[i].inlen + 1) / 2;
- error = crypto_decompress_update(tfm, &req);
- if (error && (error != -EAGAIN || req.avail_in)) {
+ res = crypto_decompress_update(tfm, &req);
+ if (res < 0 && (res != -EAGAIN || req.avail_in)) {
pr_err("alg: pcomp: decompression update failed on "
- "test %d for %s: error=%d\n", i + 1, algo,
- error);
- return error;
+ "test %d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ if (res > 0)
+ produced += res;
/* Provide remaining output space */
req.avail_out += COMP_BUF_SIZE - dtemplate[i].outlen / 2;
- error = crypto_decompress_final(tfm, &req);
- if (error && (error != -EAGAIN || req.avail_in)) {
+ res = crypto_decompress_final(tfm, &req);
+ if (res < 0 && (res != -EAGAIN || req.avail_in)) {
pr_err("alg: pcomp: decompression final failed on "
- "test %d for %s: error=%d\n", i + 1, algo,
- error);
- return error;
+ "test %d for %s: error=%d\n", i + 1, algo, res);
+ return res;
}
+ if (res > 0)
+ produced += res;
if (COMP_BUF_SIZE - req.avail_out != dtemplate[i].outlen) {
pr_err("alg: comp: Decompression test %d failed for "
@@ -1048,6 +1182,13 @@ static int test_pcomp(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ if (produced != dtemplate[i].outlen) {
+ pr_err("alg: comp: Decompression test %d failed for "
+ "%s: returned len = %u (expected %d)\n", i + 1,
+ algo, produced, dtemplate[i].outlen);
+ return -EINVAL;
+ }
+
if (memcmp(result, dtemplate[i].output, dtemplate[i].outlen)) {
pr_err("alg: pcomp: Decompression test %d failed for "
"%s\n", i + 1, algo);
@@ -1059,6 +1200,68 @@ static int test_pcomp(struct crypto_pcomp *tfm,
return 0;
}
+
+static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
+ unsigned int tcount)
+{
+ const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm));
+ int err, i, j, seedsize;
+ u8 *seed;
+ char result[32];
+
+ seedsize = crypto_rng_seedsize(tfm);
+
+ seed = kmalloc(seedsize, GFP_KERNEL);
+ if (!seed) {
+ printk(KERN_ERR "alg: cprng: Failed to allocate seed space "
+ "for %s\n", algo);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < tcount; i++) {
+ memset(result, 0, 32);
+
+ memcpy(seed, template[i].v, template[i].vlen);
+ memcpy(seed + template[i].vlen, template[i].key,
+ template[i].klen);
+ memcpy(seed + template[i].vlen + template[i].klen,
+ template[i].dt, template[i].dtlen);
+
+ err = crypto_rng_reset(tfm, seed, seedsize);
+ if (err) {
+ printk(KERN_ERR "alg: cprng: Failed to reset rng "
+ "for %s\n", algo);
+ goto out;
+ }
+
+ for (j = 0; j < template[i].loops; j++) {
+ err = crypto_rng_get_bytes(tfm, result,
+ template[i].rlen);
+ if (err != template[i].rlen) {
+ printk(KERN_ERR "alg: cprng: Failed to obtain "
+ "the correct amount of random data for "
+ "%s (requested %d, got %d)\n", algo,
+ template[i].rlen, err);
+ goto out;
+ }
+ }
+
+ err = memcmp(result, template[i].result,
+ template[i].rlen);
+ if (err) {
+ printk(KERN_ERR "alg: cprng: Test %d failed for %s\n",
+ i, algo);
+ hexdump(result, template[i].rlen);
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ kfree(seed);
+ return err;
+}
+
static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
u32 type, u32 mask)
{
@@ -1258,11 +1461,42 @@ out:
return err;
}
+static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
+ u32 type, u32 mask)
+{
+ struct crypto_rng *rng;
+ int err;
+
+ rng = crypto_alloc_rng(driver, type, mask);
+ if (IS_ERR(rng)) {
+ printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
+ "%ld\n", driver, PTR_ERR(rng));
+ return PTR_ERR(rng);
+ }
+
+ err = test_cprng(rng, desc->suite.cprng.vecs, desc->suite.cprng.count);
+
+ crypto_free_rng(rng);
+
+ return err;
+}
+
/* Please keep this list sorted by algorithm name. */
static const struct alg_test_desc alg_test_descs[] = {
{
+ .alg = "ansi_cprng",
+ .test = alg_test_cprng,
+ .fips_allowed = 1,
+ .suite = {
+ .cprng = {
+ .vecs = ansi_cprng_aes_tv_template,
+ .count = ANSI_CPRNG_AES_TEST_VECTORS
+ }
+ }
+ }, {
.alg = "cbc(aes)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@@ -1338,6 +1572,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "cbc(des3_ede)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@@ -1368,6 +1603,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ccm(aes)",
.test = alg_test_aead,
+ .fips_allowed = 1,
.suite = {
.aead = {
.enc = {
@@ -1383,6 +1619,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "crc32c",
.test = alg_test_crc32c,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = crc32c_tv_template,
@@ -1390,6 +1627,22 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "ctr(aes)",
+ .test = alg_test_skcipher,
+ .fips_allowed = 1,
+ .suite = {
+ .cipher = {
+ .enc = {
+ .vecs = aes_ctr_enc_tv_template,
+ .count = AES_CTR_ENC_TEST_VECTORS
+ },
+ .dec = {
+ .vecs = aes_ctr_dec_tv_template,
+ .count = AES_CTR_DEC_TEST_VECTORS
+ }
+ }
+ }
+ }, {
.alg = "cts(cbc(aes))",
.test = alg_test_skcipher,
.suite = {
@@ -1422,6 +1675,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ecb(aes)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@@ -1527,6 +1781,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ecb(des)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@@ -1542,6 +1797,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ecb(des3_ede)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
@@ -1677,6 +1933,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "gcm(aes)",
.test = alg_test_aead,
+ .fips_allowed = 1,
.suite = {
.aead = {
.enc = {
@@ -1719,6 +1976,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "hmac(sha1)",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = hmac_sha1_tv_template,
@@ -1728,6 +1986,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "hmac(sha224)",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = hmac_sha224_tv_template,
@@ -1737,6 +1996,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "hmac(sha256)",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = hmac_sha256_tv_template,
@@ -1746,6 +2006,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "hmac(sha384)",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = hmac_sha384_tv_template,
@@ -1755,6 +2016,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "hmac(sha512)",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = hmac_sha512_tv_template,
@@ -1836,15 +2098,32 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "rfc3686(ctr(aes))",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
- .vecs = aes_ctr_enc_tv_template,
- .count = AES_CTR_ENC_TEST_VECTORS
+ .vecs = aes_ctr_rfc3686_enc_tv_template,
+ .count = AES_CTR_3686_ENC_TEST_VECTORS
},
.dec = {
- .vecs = aes_ctr_dec_tv_template,
- .count = AES_CTR_DEC_TEST_VECTORS
+ .vecs = aes_ctr_rfc3686_dec_tv_template,
+ .count = AES_CTR_3686_DEC_TEST_VECTORS
+ }
+ }
+ }
+ }, {
+ .alg = "rfc4309(ccm(aes))",
+ .test = alg_test_aead,
+ .fips_allowed = 1,
+ .suite = {
+ .aead = {
+ .enc = {
+ .vecs = aes_ccm_rfc4309_enc_tv_template,
+ .count = AES_CCM_4309_ENC_TEST_VECTORS
+ },
+ .dec = {
+ .vecs = aes_ccm_rfc4309_dec_tv_template,
+ .count = AES_CCM_4309_DEC_TEST_VECTORS
}
}
}
@@ -1898,6 +2177,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "sha1",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = sha1_tv_template,
@@ -1907,6 +2187,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "sha224",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = sha224_tv_template,
@@ -1916,6 +2197,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "sha256",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = sha256_tv_template,
@@ -1925,6 +2207,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "sha384",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = sha384_tv_template,
@@ -1934,6 +2217,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "sha512",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = sha512_tv_template,
@@ -2077,60 +2361,36 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
if (i < 0)
goto notest;
- return alg_test_cipher(alg_test_descs + i, driver, type, mask);
+ if (fips_enabled && !alg_test_descs[i].fips_allowed)
+ goto non_fips_alg;
+
+ rc = alg_test_cipher(alg_test_descs + i, driver, type, mask);
+ goto test_done;
}
i = alg_find_test(alg);
if (i < 0)
goto notest;
+ if (fips_enabled && !alg_test_descs[i].fips_allowed)
+ goto non_fips_alg;
+
rc = alg_test_descs[i].test(alg_test_descs + i, driver,
type, mask);
+test_done:
if (fips_enabled && rc)
panic("%s: %s alg self test failed in fips mode!\n", driver, alg);
+ if (fips_enabled && !rc)
+ printk(KERN_INFO "alg: self-tests for %s (%s) passed\n",
+ driver, alg);
+
return rc;
notest:
printk(KERN_INFO "alg: No test for %s (%s)\n", alg, driver);
return 0;
+non_fips_alg:
+ return -EINVAL;
}
EXPORT_SYMBOL_GPL(alg_test);
-
-int __init testmgr_init(void)
-{
- int i;
-
- for (i = 0; i < XBUFSIZE; i++) {
- xbuf[i] = (void *)__get_free_page(GFP_KERNEL);
- if (!xbuf[i])
- goto err_free_xbuf;
- }
-
- for (i = 0; i < XBUFSIZE; i++) {
- axbuf[i] = (void *)__get_free_page(GFP_KERNEL);
- if (!axbuf[i])
- goto err_free_axbuf;
- }
-
- return 0;
-
-err_free_axbuf:
- for (i = 0; i < XBUFSIZE && axbuf[i]; i++)
- free_page((unsigned long)axbuf[i]);
-err_free_xbuf:
- for (i = 0; i < XBUFSIZE && xbuf[i]; i++)
- free_page((unsigned long)xbuf[i]);
-
- return -ENOMEM;
-}
-
-void testmgr_exit(void)
-{
- int i;
-
- for (i = 0; i < XBUFSIZE; i++)
- free_page((unsigned long)axbuf[i]);
- for (i = 0; i < XBUFSIZE; i++)
- free_page((unsigned long)xbuf[i]);
-}
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 526f00a9c72..69316228fc1 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -62,6 +62,7 @@ struct aead_testvec {
int np;
int anp;
unsigned char fail;
+ unsigned char novrfy; /* ccm dec verification failure expected */
unsigned char wk; /* weak key flag */
unsigned char klen;
unsigned short ilen;
@@ -69,6 +70,18 @@ struct aead_testvec {
unsigned short rlen;
};
+struct cprng_testvec {
+ char *key;
+ char *dt;
+ char *v;
+ char *result;
+ unsigned char klen;
+ unsigned short dtlen;
+ unsigned short vlen;
+ unsigned short rlen;
+ unsigned short loops;
+};
+
static char zeroed_string[48];
/*
@@ -2841,12 +2854,16 @@ static struct cipher_testvec cast6_dec_tv_template[] = {
#define AES_LRW_DEC_TEST_VECTORS 8
#define AES_XTS_ENC_TEST_VECTORS 4
#define AES_XTS_DEC_TEST_VECTORS 4
-#define AES_CTR_ENC_TEST_VECTORS 7
-#define AES_CTR_DEC_TEST_VECTORS 6
+#define AES_CTR_ENC_TEST_VECTORS 3
+#define AES_CTR_DEC_TEST_VECTORS 3
+#define AES_CTR_3686_ENC_TEST_VECTORS 7
+#define AES_CTR_3686_DEC_TEST_VECTORS 6
#define AES_GCM_ENC_TEST_VECTORS 9
#define AES_GCM_DEC_TEST_VECTORS 8
#define AES_CCM_ENC_TEST_VECTORS 7
#define AES_CCM_DEC_TEST_VECTORS 7
+#define AES_CCM_4309_ENC_TEST_VECTORS 7
+#define AES_CCM_4309_DEC_TEST_VECTORS 10
static struct cipher_testvec aes_enc_tv_template[] = {
{ /* From FIPS-197 */
@@ -3983,6 +4000,164 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
static struct cipher_testvec aes_ctr_enc_tv_template[] = {
+ { /* From NIST Special Publication 800-38A, Appendix F.5 */
+ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+ "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ .klen = 16,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ilen = 64,
+ .result = "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
+ "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
+ "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
+ "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
+ "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
+ "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
+ "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
+ "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
+ .rlen = 64,
+ }, {
+ .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+ "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ .klen = 24,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ilen = 64,
+ .result = "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
+ "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
+ "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
+ "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
+ "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
+ "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
+ "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
+ "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
+ .rlen = 64,
+ }, {
+ .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+ "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+ "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ .klen = 32,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ilen = 64,
+ .result = "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
+ "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
+ "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
+ "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
+ "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
+ "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
+ "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
+ "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
+ .rlen = 64,
+ }
+};
+
+static struct cipher_testvec aes_ctr_dec_tv_template[] = {
+ { /* From NIST Special Publication 800-38A, Appendix F.5 */
+ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+ "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ .klen = 16,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
+ "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
+ "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
+ "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
+ "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
+ "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
+ "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
+ "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
+ .ilen = 64,
+ .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .rlen = 64,
+ }, {
+ .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+ "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ .klen = 24,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
+ "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
+ "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
+ "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
+ "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
+ "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
+ "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
+ "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
+ .ilen = 64,
+ .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .rlen = 64,
+ }, {
+ .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+ "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+ "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ .klen = 32,
+ .iv = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+ .input = "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
+ "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
+ "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
+ "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
+ "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
+ "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
+ "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
+ "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
+ .ilen = 64,
+ .result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .rlen = 64,
+ }
+};
+
+static struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = {
{ /* From RFC 3686 */
.key = "\xae\x68\x52\xf8\x12\x10\x67\xcc"
"\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
@@ -5114,7 +5289,7 @@ static struct cipher_testvec aes_ctr_enc_tv_template[] = {
},
};
-static struct cipher_testvec aes_ctr_dec_tv_template[] = {
+static struct cipher_testvec aes_ctr_rfc3686_dec_tv_template[] = {
{ /* From RFC 3686 */
.key = "\xae\x68\x52\xf8\x12\x10\x67\xcc"
"\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
@@ -5825,6 +6000,470 @@ static struct aead_testvec aes_ccm_dec_tv_template[] = {
},
};
+/*
+ * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all
+ * use a 13-byte nonce, we only support an 11-byte nonce. Similarly, all of
+ * Special Publication 800-38C's test vectors also use nonce lengths our
+ * implementation doesn't support. The following are taken from fips cavs
+ * fax files on hand at Red Hat.
+ *
+ * nb: actual key lengths are (klen - 3), the last 3 bytes are actually
+ * part of the nonce which combine w/the iv, but need to be input this way.
+ */
+static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = {
+ {
+ .key = "\x83\xac\x54\x66\xc2\xeb\xe5\x05"
+ "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e"
+ "\x96\xac\x59",
+ .klen = 19,
+ .iv = "\x30\x07\xa1\xe2\xa2\xc7\x55\x24",
+ .alen = 0,
+ .input = "\x19\xc8\x81\xf6\xe9\x86\xff\x93"
+ "\x0b\x78\x67\xe5\xbb\xb7\xfc\x6e"
+ "\x83\x77\xb3\xa6\x0c\x8c\x9f\x9c"
+ "\x35\x2e\xad\xe0\x62\xf9\x91\xa1",
+ .ilen = 32,
+ .result = "\xab\x6f\xe1\x69\x1d\x19\x99\xa8"
+ "\x92\xa0\xc4\x6f\x7e\xe2\x8b\xb1"
+ "\x70\xbb\x8c\xa6\x4c\x6e\x97\x8a"
+ "\x57\x2b\xbe\x5d\x98\xa6\xb1\x32"
+ "\xda\x24\xea\xd9\xa1\x39\x98\xfd"
+ "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8",
+ .rlen = 48,
+ }, {
+ .key = "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0"
+ "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3"
+ "\x4f\xa3\x19",
+ .klen = 19,
+ .iv = "\xd3\x01\x5a\xd8\x30\x60\x15\x56",
+ .assoc = "\xda\xe6\x28\x9c\x45\x2d\xfd\x63"
+ "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7"
+ "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1"
+ "\x0a\x63\x09\x78\xbc\x2c\x55\xde",
+ .alen = 32,
+ .input = "\x87\xa3\x36\xfd\x96\xb3\x93\x78"
+ "\xa9\x28\x63\xba\x12\xa3\x14\x85"
+ "\x57\x1e\x06\xc9\x7b\x21\xef\x76"
+ "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e",
+ .ilen = 32,
+ .result = "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19"
+ "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab"
+ "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff"
+ "\x3b\xb5\xce\x53\xef\xde\xbb\x02"
+ "\xa9\x86\x15\x6c\x13\xfe\xda\x0a"
+ "\x22\xb8\x29\x3d\xd8\x39\x9a\x23",
+ .rlen = 48,
+ }, {
+ .key = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
+ "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75"
+ "\x53\x14\x73\x66\x8d\x88\xf6\x80"
+ "\xa0\x20\x35",
+ .klen = 27,
+ .iv = "\x26\xf2\x21\x8d\x50\x20\xda\xe2",
+ .assoc = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
+ "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
+ "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
+ "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
+ .alen = 32,
+ .ilen = 0,
+ .result = "\x36\xea\x7a\x70\x08\xdc\x6a\xbc"
+ "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b",
+ .rlen = 16,
+ }, {
+ .key = "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42"
+ "\xef\x7a\xd3\xce\xfc\x84\x60\x62"
+ "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01"
+ "\xd6\x3c\x8c",
+ .klen = 27,
+ .iv = "\x86\x84\xb6\xcd\xef\x09\x2e\x94",
+ .assoc = "\x02\x65\x78\x3c\xe9\x21\x30\x91"
+ "\xb1\xb9\xda\x76\x9a\x78\x6d\x95"
+ "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c"
+ "\xe3\x00\x73\x69\x84\x69\x87\x79",
+ .alen = 32,
+ .input = "\x9f\xd2\x02\x4b\x52\x49\x31\x3c"
+ "\x43\x69\x3a\x2d\x8e\x70\xad\x7e"
+ "\xe0\xe5\x46\x09\x80\x89\x13\xb2"
+ "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b",
+ .ilen = 32,
+ .result = "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62"
+ "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f"
+ "\x9b\x6a\x09\x70\xc1\x51\x83\xc2"
+ "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e"
+ "\xc7\x79\x11\x58\xe5\x6b\x20\x40"
+ "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1",
+ .rlen = 48,
+ }, {
+ .key = "\xe0\x8d\x99\x71\x60\xd7\x97\x1a"
+ "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a"
+ "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e"
+ "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b"
+ "\x1e\x29\x91",
+ .klen = 35,
+ .iv = "\xad\x8e\xc1\x53\x0a\xcf\x2d\xbe",
+ .assoc = "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b"
+ "\x78\x2b\x94\x02\x29\x0f\x42\x27"
+ "\x6b\x75\xcb\x98\x34\x08\x7e\x79"
+ "\xe4\x3e\x49\x0d\x84\x8b\x22\x87",
+ .alen = 32,
+ .input = "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f"
+ "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66"
+ "\xbf\x17\x99\x62\x4a\x39\x27\x1f"
+ "\x1d\xdc\x24\xae\x19\x2f\x98\x4c",
+ .ilen = 32,
+ .result = "\x19\xb8\x61\x33\x45\x2b\x43\x96"
+ "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6"
+ "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f"
+ "\xf0\x62\x17\x34\xf2\x1e\x8d\x75"
+ "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d",
+ .rlen = 40,
+ }, {
+ .key = "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c"
+ "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32"
+ "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c"
+ "\x09\x75\x9a\x9b\x3c\x9b\x27\x39"
+ "\xf9\xd9\x4e",
+ .klen = 35,
+ .iv = "\x63\xb5\x3d\x9d\x43\xf6\x1e\x50",
+ .assoc = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
+ "\x13\x02\x01\x0c\x83\x4c\x96\x35"
+ "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
+ "\xb0\x39\x36\xe6\x8f\x57\xe0\x13",
+ .alen = 32,
+ .input = "\x3b\x6c\x29\x36\xb6\xef\x07\xa6"
+ "\x83\x72\x07\x4f\xcf\xfa\x66\x89"
+ "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27"
+ "\x30\xdb\x75\x09\x93\xd4\x65\xe4",
+ .ilen = 32,
+ .result = "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d"
+ "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d"
+ "\xcc\x63\x44\x25\x07\x78\x4f\x9e"
+ "\x96\xb8\x88\xeb\xbc\x48\x1f\x06"
+ "\x39\xaf\x39\xac\xd8\x4a\x80\x39"
+ "\x7b\x72\x8a\xf7",
+ .rlen = 44,
+ }, {
+ .key = "\xab\xd0\xe9\x33\x07\x26\xe5\x83"
+ "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46"
+ "\xf9\x8f\xad\xe3\x02\x13\x83\x77"
+ "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b"
+ "\x24\xa7\x8b",
+ .klen = 35,
+ .iv = "\x07\xcb\xcc\x0e\xe6\x33\xbf\xf5",
+ .assoc = "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f"
+ "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d"
+ "\xab\x90\x65\x8d\x8e\xca\x4d\x4f"
+ "\x16\x0c\x40\x90\x4b\xc7\x36\x73",
+ .alen = 32,
+ .input = "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92"
+ "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d"
+ "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2"
+ "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a",
+ .ilen = 32,
+ .result = "\x83\x6f\x40\x87\x72\xcf\xc1\x13"
+ "\xef\xbb\x80\x21\x04\x6c\x58\x09"
+ "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7"
+ "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf"
+ "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d"
+ "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8",
+ .rlen = 48,
+ },
+};
+
+static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
+ {
+ .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+ "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
+ "\xc6\xfb\x7d",
+ .klen = 19,
+ .iv = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+ .alen = 0,
+ .input = "\xd5\xe8\x93\x9f\xc7\x89\x2e\x2b",
+ .ilen = 8,
+ .result = "\x00",
+ .rlen = 0,
+ .novrfy = 1,
+ }, {
+ .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+ "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
+ "\xaf\x94\x87",
+ .klen = 19,
+ .iv = "\x78\x35\x82\x81\x7f\x88\x94\x68",
+ .alen = 0,
+ .input = "\x41\x3c\xb8\x87\x73\xcb\xf3\xf3",
+ .ilen = 8,
+ .result = "\x00",
+ .rlen = 0,
+ }, {
+ .key = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
+ "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
+ "\xc6\xfb\x7d",
+ .klen = 19,
+ .iv = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+ .assoc = "\xf3\x94\x87\x78\x35\x82\x81\x7f"
+ "\x88\x94\x68\xb1\x78\x6b\x2b\xd6"
+ "\x04\x1f\x4e\xed\x78\xd5\x33\x66"
+ "\xd8\x94\x99\x91\x81\x54\x62\x57",
+ .alen = 32,
+ .input = "\xf0\x7c\x29\x02\xae\x1c\x2f\x55"
+ "\xd0\xd1\x3d\x1a\xa3\x6d\xe4\x0a"
+ "\x86\xb0\x87\x6b\x62\x33\x8c\x34"
+ "\xce\xab\x57\xcc\x79\x0b\xe0\x6f"
+ "\x5c\x3e\x48\x1f\x6c\x46\xf7\x51"
+ "\x8b\x84\x83\x2a\xc1\x05\xb8\xc5",
+ .ilen = 48,
+ .result = "\x50\x82\x3e\x07\xe2\x1e\xb6\xfb"
+ "\x33\xe4\x73\xce\xd2\xfb\x95\x79"
+ "\xe8\xb4\xb5\x77\x11\x10\x62\x6f"
+ "\x6a\x82\xd1\x13\xec\xf5\xd0\x48",
+ .rlen = 32,
+ .novrfy = 1,
+ }, {
+ .key = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
+ "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
+ "\x05\xe0\xc9",
+ .klen = 19,
+ .iv = "\x0f\xed\x34\xea\x97\xd4\x3b\xdf",
+ .assoc = "\x49\x5c\x50\x1f\x1d\x94\xcc\x81"
+ "\xba\xb7\xb6\x03\xaf\xa5\xc1\xa1"
+ "\xd8\x5c\x42\x68\xe0\x6c\xda\x89"
+ "\x05\xac\x56\xac\x1b\x2a\xd3\x86",
+ .alen = 32,
+ .input = "\x39\xbe\x7d\x15\x62\x77\xf3\x3c"
+ "\xad\x83\x52\x6d\x71\x03\x25\x1c"
+ "\xed\x81\x3a\x9a\x16\x7d\x19\x80"
+ "\x72\x04\x72\xd0\xf6\xff\x05\x0f"
+ "\xb7\x14\x30\x00\x32\x9e\xa0\xa6"
+ "\x9e\x5a\x18\xa1\xb8\xfe\xdb\xd3",
+ .ilen = 48,
+ .result = "\x75\x05\xbe\xc2\xd9\x1e\xde\x60"
+ "\x47\x3d\x8c\x7d\xbd\xb5\xd9\xb7"
+ "\xf2\xae\x61\x05\x8f\x82\x24\x3f"
+ "\x9c\x67\x91\xe1\x38\x4f\xe4\x0c",
+ .rlen = 32,
+ }, {
+ .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+ "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
+ "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
+ "\xee\x49\x83",
+ .klen = 27,
+ .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+ .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+ "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+ "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+ "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+ .alen = 32,
+ .input = "\x71\x99\xfa\xf4\x44\x12\x68\x9b",
+ .ilen = 8,
+ .result = "\x00",
+ .rlen = 0,
+ }, {
+ .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+ "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+ "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+ "\xee\x49\x83",
+ .klen = 27,
+ .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+ .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+ "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+ "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+ "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+ .alen = 32,
+ .input = "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7"
+ "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2"
+ "\x66\xca\x61\x1e\x96\x7a\x61\xb3"
+ "\x1c\x16\x45\x52\xba\x04\x9c\x9f"
+ "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1",
+ .ilen = 40,
+ .result = "\x85\x34\x66\x42\xc8\x92\x0f\x36"
+ "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb"
+ "\x0a\x85\xcc\x02\xad\x7a\x96\xe9"
+ "\x65\x43\xa4\xc3\x0f\xdc\x55\x81",
+ .rlen = 32,
+ }, {
+ .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+ "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+ "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+ "\xd1\xfc\x57",
+ .klen = 27,
+ .iv = "\x9c\xfe\xb8\x9c\xad\x71\xaa\x1f",
+ .assoc = "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6"
+ "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3"
+ "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7"
+ "\x3c\xa1\x52\x13\x03\x8a\x23\x3a",
+ .alen = 32,
+ .input = "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00"
+ "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37"
+ "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa"
+ "\x64\x19\xc0\x30\xd7\xfc\x14\x6b"
+ "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f"
+ "\xa9\xb4\x2d\x68\x03\xa3\x44\xef",
+ .ilen = 48,
+ .result = "\x02\x87\x4d\x28\x80\x6e\xb2\xed"
+ "\x99\x2a\xa8\xca\x04\x25\x45\x90"
+ "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c"
+ "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b",
+ .rlen = 32,
+ .novrfy = 1,
+ }, {
+ .key = "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01"
+ "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c"
+ "\x20\x2c\xad\x30\xc2\x2b\x41\xfb"
+ "\x0e\x85\xbc\x33\xad\x0f\x2b\xff"
+ "\xee\x49\x83",
+ .klen = 35,
+ .iv = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+ .alen = 0,
+ .input = "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2",
+ .ilen = 8,
+ .result = "\x00",
+ .rlen = 0,
+ }, {
+ .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+ "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
+ "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
+ "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb"
+ "\x85\x34\x66",
+ .klen = 35,
+ .iv = "\x42\xc8\x92\x0f\x36\x58\xe0\x6b",
+ .alen = 0,
+ .input = "\x48\x01\x5e\x02\x24\x04\x66\x47"
+ "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd"
+ "\xa5\xa9\x87\x8d\x84\xee\x2e\x77"
+ "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6"
+ "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07"
+ "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a",
+ .ilen = 48,
+ .result = "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c"
+ "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99"
+ "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39"
+ "\x89\xd4\x75\x7a\x63\xb1\xda\x93",
+ .rlen = 32,
+ .novrfy = 1,
+ }, {
+ .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+ "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+ "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+ "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b"
+ "\xcf\x76\x3f",
+ .klen = 35,
+ .iv = "\xd9\x95\x75\x8f\x44\x89\x40\x7b",
+ .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+ "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+ "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+ "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
+ .alen = 32,
+ .input = "\x48\x58\xd6\xf3\xad\x63\x58\xbf"
+ "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4"
+ "\x78\x5c\x4c\x67\x71\x89\x94\xbf"
+ "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5"
+ "\x7f\x44\x0a\x0c\x01\x18\x07\x92"
+ "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b",
+ .ilen = 48,
+ .result = "\xc2\x54\xc8\xde\x78\x87\x77\x40"
+ "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
+ "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
+ "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+ .rlen = 32,
+ },
+};
+
+/*
+ * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode)
+ * test vectors, taken from Appendix B.2.9 and B.2.10:
+ * http://csrc.nist.gov/groups/STM/cavp/documents/rng/RNGVS.pdf
+ * Only AES-128 is supported at this time.
+ */
+#define ANSI_CPRNG_AES_TEST_VECTORS 6
+
+static struct cprng_testvec ansi_cprng_aes_tv_template[] = {
+ {
+ .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+ "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+ .klen = 16,
+ .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+ "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xf9",
+ .dtlen = 16,
+ .v = "\x80\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .vlen = 16,
+ .result = "\x59\x53\x1e\xd1\x3b\xb0\xc0\x55"
+ "\x84\x79\x66\x85\xc1\x2f\x76\x41",
+ .rlen = 16,
+ .loops = 1,
+ }, {
+ .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+ "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+ .klen = 16,
+ .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+ "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfa",
+ .dtlen = 16,
+ .v = "\xc0\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .vlen = 16,
+ .result = "\x7c\x22\x2c\xf4\xca\x8f\xa2\x4c"
+ "\x1c\x9c\xb6\x41\xa9\xf3\x22\x0d",
+ .rlen = 16,
+ .loops = 1,
+ }, {
+ .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+ "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+ .klen = 16,
+ .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+ "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfb",
+ .dtlen = 16,
+ .v = "\xe0\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .vlen = 16,
+ .result = "\x8a\xaa\x00\x39\x66\x67\x5b\xe5"
+ "\x29\x14\x28\x81\xa9\x4d\x4e\xc7",
+ .rlen = 16,
+ .loops = 1,
+ }, {
+ .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+ "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+ .klen = 16,
+ .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+ "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfc",
+ .dtlen = 16,
+ .v = "\xf0\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .vlen = 16,
+ .result = "\x88\xdd\xa4\x56\x30\x24\x23\xe5"
+ "\xf6\x9d\xa5\x7e\x7b\x95\xc7\x3a",
+ .rlen = 16,
+ .loops = 1,
+ }, {
+ .key = "\xf3\xb1\x66\x6d\x13\x60\x72\x42"
+ "\xed\x06\x1c\xab\xb8\xd4\x62\x02",
+ .klen = 16,
+ .dt = "\xe6\xb3\xbe\x78\x2a\x23\xfa\x62"
+ "\xd7\x1d\x4a\xfb\xb0\xe9\x22\xfd",
+ .dtlen = 16,
+ .v = "\xf8\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .vlen = 16,
+ .result = "\x05\x25\x92\x46\x61\x79\xd2\xcb"
+ "\x78\xc4\x0b\x14\x0a\x5a\x9a\xc8",
+ .rlen = 16,
+ .loops = 1,
+ }, { /* Monte Carlo Test */
+ .key = "\x9f\x5b\x51\x20\x0b\xf3\x34\xb5"
+ "\xd8\x2b\xe8\xc3\x72\x55\xc8\x48",
+ .klen = 16,
+ .dt = "\x63\x76\xbb\xe5\x29\x02\xba\x3b"
+ "\x67\xc9\x25\xfa\x70\x1f\x11\xac",
+ .dtlen = 16,
+ .v = "\x57\x2c\x8e\x76\x87\x26\x47\x97"
+ "\x7e\x74\xfb\xdd\xc4\x95\x01\xd1",
+ .vlen = 16,
+ .result = "\x48\xe9\xbd\x0d\x06\xee\x18\xfb"
+ "\xe4\x57\x90\xd5\xc3\xfc\x9b\x73",
+ .rlen = 16,
+ .loops = 10000,
+ },
+};
+
/* Cast5 test vectors from RFC 2144 */
#define CAST5_ENC_TEST_VECTORS 3
#define CAST5_DEC_TEST_VECTORS 3
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 33609bab614..c3015733c99 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -165,15 +165,15 @@ static int zlib_compress_update(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ ret = req->avail_out - stream->avail_out;
pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
stream->avail_in, stream->avail_out,
- req->avail_in - stream->avail_in,
- req->avail_out - stream->avail_out);
+ req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
req->avail_in = stream->avail_in;
req->next_out = stream->next_out;
req->avail_out = stream->avail_out;
- return 0;
+ return ret;
}
static int zlib_compress_final(struct crypto_pcomp *tfm,
@@ -195,15 +195,15 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ ret = req->avail_out - stream->avail_out;
pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
stream->avail_in, stream->avail_out,
- req->avail_in - stream->avail_in,
- req->avail_out - stream->avail_out);
+ req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
req->avail_in = stream->avail_in;
req->next_out = stream->next_out;
req->avail_out = stream->avail_out;
- return 0;
+ return ret;
}
@@ -280,15 +280,15 @@ static int zlib_decompress_update(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ ret = req->avail_out - stream->avail_out;
pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
stream->avail_in, stream->avail_out,
- req->avail_in - stream->avail_in,
- req->avail_out - stream->avail_out);
+ req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
req->avail_in = stream->avail_in;
req->next_out = stream->next_out;
req->avail_out = stream->avail_out;
- return 0;
+ return ret;
}
static int zlib_decompress_final(struct crypto_pcomp *tfm,
@@ -328,15 +328,15 @@ static int zlib_decompress_final(struct crypto_pcomp *tfm,
return -EINVAL;
}
+ ret = req->avail_out - stream->avail_out;
pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
stream->avail_in, stream->avail_out,
- req->avail_in - stream->avail_in,
- req->avail_out - stream->avail_out);
+ req->avail_in - stream->avail_in, ret);
req->next_in = stream->next_in;
req->avail_in = stream->avail_in;
req->next_out = stream->next_out;
req->avail_out = stream->avail_out;
- return 0;
+ return ret;
}
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 5fab6470f4b..9c00440dcf8 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -88,7 +88,7 @@ config HW_RANDOM_N2RNG
config HW_RANDOM_VIA
tristate "VIA HW Random Number Generator support"
- depends on HW_RANDOM && X86_32
+ depends on HW_RANDOM && X86
default HW_RANDOM
---help---
This driver provides kernel-side support for the Random Number
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 538313f9e7a..00dd3de1be5 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -89,7 +89,7 @@ static struct hwrng omap_rng_ops = {
.data_read = omap_rng_data_read,
};
-static int __init omap_rng_probe(struct platform_device *pdev)
+static int __devinit omap_rng_probe(struct platform_device *pdev)
{
struct resource *res, *mem;
int ret;
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index dcd352ad0e7..a94e930575f 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -88,9 +88,9 @@ static struct hwrng timeriomem_rng_ops = {
.priv = 0,
};
-static int __init timeriomem_rng_probe(struct platform_device *pdev)
+static int __devinit timeriomem_rng_probe(struct platform_device *pdev)
{
- struct resource *res, *mem;
+ struct resource *res;
int ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -98,21 +98,12 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
if (!res)
return -ENOENT;
- mem = request_mem_region(res->start, res->end - res->start + 1,
- pdev->name);
- if (mem == NULL)
- return -EBUSY;
-
- dev_set_drvdata(&pdev->dev, mem);
-
timeriomem_rng_data = pdev->dev.platform_data;
timeriomem_rng_data->address = ioremap(res->start,
res->end - res->start + 1);
- if (!timeriomem_rng_data->address) {
- ret = -ENOMEM;
- goto err_ioremap;
- }
+ if (!timeriomem_rng_data->address)
+ return -EIO;
if (timeriomem_rng_data->period != 0
&& usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
@@ -125,7 +116,7 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
ret = hwrng_register(&timeriomem_rng_ops);
if (ret)
- goto err_register;
+ goto failed;
dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
timeriomem_rng_data->address,
@@ -133,24 +124,19 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
return 0;
-err_register:
+failed:
dev_err(&pdev->dev, "problem registering\n");
iounmap(timeriomem_rng_data->address);
-err_ioremap:
- release_resource(mem);
return ret;
}
static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
{
- struct resource *mem = dev_get_drvdata(&pdev->dev);
-
del_timer_sync(&timeriomem_rng_timer);
hwrng_unregister(&timeriomem_rng_ops);
iounmap(timeriomem_rng_data->address);
- release_resource(mem);
return 0;
}
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 4e9573c1d39..794aacb715c 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -132,6 +132,19 @@ static int via_rng_init(struct hwrng *rng)
struct cpuinfo_x86 *c = &cpu_data(0);
u32 lo, hi, old_lo;
+ /* VIA Nano CPUs don't have the MSR_VIA_RNG anymore. The RNG
+ * is always enabled if CPUID rng_en is set. There is no
+ * RNG configuration like it used to be the case in this
+ * register */
+ if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
+ if (!cpu_has_xstore_enabled) {
+ printk(KERN_ERR PFX "can't enable hardware RNG "
+ "if XSTORE is not enabled\n");
+ return -ENODEV;
+ }
+ return 0;
+ }
+
/* Control the RNG via MSR. Tread lightly and pay very close
* close attention to values written, as the reserved fields
* are documented to be "undefined and unpredictable"; but it
@@ -205,5 +218,5 @@ static void __exit mod_exit(void)
module_init(mod_init);
module_exit(mod_exit);
-MODULE_DESCRIPTION("H/W RNG driver for VIA chipsets");
+MODULE_DESCRIPTION("H/W RNG driver for VIA CPU with PadLock");
MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 01afd758072..e748e55bd86 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -12,7 +12,7 @@ if CRYPTO_HW
config CRYPTO_DEV_PADLOCK
tristate "Support for VIA PadLock ACE"
- depends on X86_32 && !UML
+ depends on X86 && !UML
select CRYPTO_ALGAPI
help
Some VIA processors come with an integrated crypto engine
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 2bef086fb34..5f753fc0873 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2564,7 +2564,7 @@ static void hifn_tasklet_callback(unsigned long data)
hifn_process_queue(dev);
}
-static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int __devinit hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int err, i;
struct hifn_device *dev;
@@ -2696,7 +2696,7 @@ err_out_disable_pci_device:
return err;
}
-static void hifn_remove(struct pci_dev *pdev)
+static void __devexit hifn_remove(struct pci_dev *pdev)
{
int i;
struct hifn_device *dev;
@@ -2744,7 +2744,7 @@ static struct pci_driver hifn_pci_driver = {
.remove = __devexit_p(hifn_remove),
};
-static int __devinit hifn_init(void)
+static int __init hifn_init(void)
{
unsigned int freq;
int err;
@@ -2789,7 +2789,7 @@ static int __devinit hifn_init(void)
return 0;
}
-static void __devexit hifn_fini(void)
+static void __exit hifn_fini(void)
{
pci_unregister_driver(&hifn_pci_driver);
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 856b3cc2558..87f92c39b5f 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -154,7 +154,11 @@ static inline void padlock_reset_key(struct cword *cword)
int cpu = raw_smp_processor_id();
if (cword != per_cpu(last_cword, cpu))
+#ifndef CONFIG_X86_64
asm volatile ("pushfl; popfl");
+#else
+ asm volatile ("pushfq; popfq");
+#endif
}
static inline void padlock_store_cword(struct cword *cword)
@@ -208,10 +212,19 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
asm volatile ("test $1, %%cl;"
"je 1f;"
+#ifndef CONFIG_X86_64
"lea -1(%%ecx), %%eax;"
"mov $1, %%ecx;"
+#else
+ "lea -1(%%rcx), %%rax;"
+ "mov $1, %%rcx;"
+#endif
".byte 0xf3,0x0f,0xa7,0xc8;" /* rep xcryptecb */
+#ifndef CONFIG_X86_64
"mov %%eax, %%ecx;"
+#else
+ "mov %%rax, %%rcx;"
+#endif
"1:"
".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
: "+S"(input), "+D"(output)
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index a3918c16b3d..c70775fd3ce 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -44,6 +44,8 @@
#include <crypto/sha.h>
#include <crypto/aead.h>
#include <crypto/authenc.h>
+#include <crypto/skcipher.h>
+#include <crypto/scatterwalk.h>
#include "talitos.h"
@@ -339,7 +341,8 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
status = error;
dma_unmap_single(dev, request->dma_desc,
- sizeof(struct talitos_desc), DMA_BIDIRECTIONAL);
+ sizeof(struct talitos_desc),
+ DMA_BIDIRECTIONAL);
/* copy entries so we can call callback outside lock */
saved_req.desc = request->desc;
@@ -413,7 +416,8 @@ static struct talitos_desc *current_desc(struct device *dev, int ch)
/*
* user diagnostics; report root cause of error based on execution unit status
*/
-static void report_eu_error(struct device *dev, int ch, struct talitos_desc *desc)
+static void report_eu_error(struct device *dev, int ch,
+ struct talitos_desc *desc)
{
struct talitos_private *priv = dev_get_drvdata(dev);
int i;
@@ -684,8 +688,8 @@ struct talitos_ctx {
unsigned int authsize;
};
-static int aead_authenc_setauthsize(struct crypto_aead *authenc,
- unsigned int authsize)
+static int aead_setauthsize(struct crypto_aead *authenc,
+ unsigned int authsize)
{
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
@@ -694,8 +698,8 @@ static int aead_authenc_setauthsize(struct crypto_aead *authenc,
return 0;
}
-static int aead_authenc_setkey(struct crypto_aead *authenc,
- const u8 *key, unsigned int keylen)
+static int aead_setkey(struct crypto_aead *authenc,
+ const u8 *key, unsigned int keylen)
{
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
struct rtattr *rta = (void *)key;
@@ -740,7 +744,7 @@ badkey:
}
/*
- * ipsec_esp_edesc - s/w-extended ipsec_esp descriptor
+ * talitos_edesc - s/w-extended descriptor
* @src_nents: number of segments in input scatterlist
* @dst_nents: number of segments in output scatterlist
* @dma_len: length of dma mapped link_tbl space
@@ -752,17 +756,67 @@ badkey:
* is greater than 1, an integrity check value is concatenated to the end
* of link_tbl data
*/
-struct ipsec_esp_edesc {
+struct talitos_edesc {
int src_nents;
int dst_nents;
+ int src_is_chained;
+ int dst_is_chained;
int dma_len;
dma_addr_t dma_link_tbl;
struct talitos_desc desc;
struct talitos_ptr link_tbl[0];
};
+static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
+ unsigned int nents, enum dma_data_direction dir,
+ int chained)
+{
+ if (unlikely(chained))
+ while (sg) {
+ dma_map_sg(dev, sg, 1, dir);
+ sg = scatterwalk_sg_next(sg);
+ }
+ else
+ dma_map_sg(dev, sg, nents, dir);
+ return nents;
+}
+
+static void talitos_unmap_sg_chain(struct device *dev, struct scatterlist *sg,
+ enum dma_data_direction dir)
+{
+ while (sg) {
+ dma_unmap_sg(dev, sg, 1, dir);
+ sg = scatterwalk_sg_next(sg);
+ }
+}
+
+static void talitos_sg_unmap(struct device *dev,
+ struct talitos_edesc *edesc,
+ struct scatterlist *src,
+ struct scatterlist *dst)
+{
+ unsigned int src_nents = edesc->src_nents ? : 1;
+ unsigned int dst_nents = edesc->dst_nents ? : 1;
+
+ if (src != dst) {
+ if (edesc->src_is_chained)
+ talitos_unmap_sg_chain(dev, src, DMA_TO_DEVICE);
+ else
+ dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE);
+
+ if (edesc->dst_is_chained)
+ talitos_unmap_sg_chain(dev, dst, DMA_FROM_DEVICE);
+ else
+ dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE);
+ } else
+ if (edesc->src_is_chained)
+ talitos_unmap_sg_chain(dev, src, DMA_BIDIRECTIONAL);
+ else
+ dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL);
+}
+
static void ipsec_esp_unmap(struct device *dev,
- struct ipsec_esp_edesc *edesc,
+ struct talitos_edesc *edesc,
struct aead_request *areq)
{
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE);
@@ -772,15 +826,7 @@ static void ipsec_esp_unmap(struct device *dev,
dma_unmap_sg(dev, areq->assoc, 1, DMA_TO_DEVICE);
- if (areq->src != areq->dst) {
- dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
- DMA_TO_DEVICE);
- dma_unmap_sg(dev, areq->dst, edesc->dst_nents ? : 1,
- DMA_FROM_DEVICE);
- } else {
- dma_unmap_sg(dev, areq->src, edesc->src_nents ? : 1,
- DMA_BIDIRECTIONAL);
- }
+ talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
@@ -795,13 +841,14 @@ static void ipsec_esp_encrypt_done(struct device *dev,
int err)
{
struct aead_request *areq = context;
- struct ipsec_esp_edesc *edesc =
- container_of(desc, struct ipsec_esp_edesc, desc);
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+ struct talitos_edesc *edesc;
struct scatterlist *sg;
void *icvdata;
+ edesc = container_of(desc, struct talitos_edesc, desc);
+
ipsec_esp_unmap(dev, edesc, areq);
/* copy the generated ICV to dst */
@@ -819,17 +866,18 @@ static void ipsec_esp_encrypt_done(struct device *dev,
}
static void ipsec_esp_decrypt_swauth_done(struct device *dev,
- struct talitos_desc *desc, void *context,
- int err)
+ struct talitos_desc *desc,
+ void *context, int err)
{
struct aead_request *req = context;
- struct ipsec_esp_edesc *edesc =
- container_of(desc, struct ipsec_esp_edesc, desc);
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+ struct talitos_edesc *edesc;
struct scatterlist *sg;
void *icvdata;
+ edesc = container_of(desc, struct talitos_edesc, desc);
+
ipsec_esp_unmap(dev, edesc, req);
if (!err) {
@@ -851,20 +899,20 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
}
static void ipsec_esp_decrypt_hwauth_done(struct device *dev,
- struct talitos_desc *desc, void *context,
- int err)
+ struct talitos_desc *desc,
+ void *context, int err)
{
struct aead_request *req = context;
- struct ipsec_esp_edesc *edesc =
- container_of(desc, struct ipsec_esp_edesc, desc);
+ struct talitos_edesc *edesc;
+
+ edesc = container_of(desc, struct talitos_edesc, desc);
ipsec_esp_unmap(dev, edesc, req);
/* check ICV auth status */
- if (!err)
- if ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
- DESC_HDR_LO_ICCR1_PASS)
- err = -EBADMSG;
+ if (!err && ((desc->hdr_lo & DESC_HDR_LO_ICCR1_MASK) !=
+ DESC_HDR_LO_ICCR1_PASS))
+ err = -EBADMSG;
kfree(edesc);
@@ -886,7 +934,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
link_tbl_ptr->j_extent = 0;
link_tbl_ptr++;
cryptlen -= sg_dma_len(sg);
- sg = sg_next(sg);
+ sg = scatterwalk_sg_next(sg);
}
/* adjust (decrease) last one (or two) entry's len to cryptlen */
@@ -910,7 +958,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
/*
* fill in and submit ipsec_esp descriptor
*/
-static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
+static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
u8 *giv, u64 seq,
void (*callback) (struct device *dev,
struct talitos_desc *desc,
@@ -952,32 +1000,31 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
desc->ptr[4].len = cpu_to_be16(cryptlen);
desc->ptr[4].j_extent = authsize;
- if (areq->src == areq->dst)
- sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
- DMA_BIDIRECTIONAL);
- else
- sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ? : 1,
- DMA_TO_DEVICE);
+ sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+ (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
+ : DMA_TO_DEVICE,
+ edesc->src_is_chained);
if (sg_count == 1) {
desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
} else {
sg_link_tbl_len = cryptlen;
- if ((edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) &&
- (edesc->desc.hdr & DESC_HDR_MODE0_ENCRYPT) == 0) {
+ if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
sg_link_tbl_len = cryptlen + authsize;
- }
+
sg_count = sg_to_link_tbl(areq->src, sg_count, sg_link_tbl_len,
&edesc->link_tbl[0]);
if (sg_count > 1) {
desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
desc->ptr[4].ptr = cpu_to_be32(edesc->dma_link_tbl);
- dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
- edesc->dma_len, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+ edesc->dma_len,
+ DMA_BIDIRECTIONAL);
} else {
/* Only one segment now, so no link tbl needed */
- desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->src));
+ desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->
+ src));
}
}
@@ -985,10 +1032,11 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
desc->ptr[5].len = cpu_to_be16(cryptlen);
desc->ptr[5].j_extent = authsize;
- if (areq->src != areq->dst) {
- sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1,
- DMA_FROM_DEVICE);
- }
+ if (areq->src != areq->dst)
+ sg_count = talitos_map_sg(dev, areq->dst,
+ edesc->dst_nents ? : 1,
+ DMA_FROM_DEVICE,
+ edesc->dst_is_chained);
if (sg_count == 1) {
desc->ptr[5].ptr = cpu_to_be32(sg_dma_address(areq->dst));
@@ -1033,49 +1081,55 @@ static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
return ret;
}
-
/*
* derive number of elements in scatterlist
*/
-static int sg_count(struct scatterlist *sg_list, int nbytes)
+static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
{
struct scatterlist *sg = sg_list;
int sg_nents = 0;
- while (nbytes) {
+ *chained = 0;
+ while (nbytes > 0) {
sg_nents++;
nbytes -= sg->length;
- sg = sg_next(sg);
+ if (!sg_is_last(sg) && (sg + 1)->length == 0)
+ *chained = 1;
+ sg = scatterwalk_sg_next(sg);
}
return sg_nents;
}
/*
- * allocate and map the ipsec_esp extended descriptor
+ * allocate and map the extended descriptor
*/
-static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
- int icv_stashing)
+static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
+ struct scatterlist *src,
+ struct scatterlist *dst,
+ unsigned int cryptlen,
+ unsigned int authsize,
+ int icv_stashing,
+ u32 cryptoflags)
{
- struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
- struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
- struct ipsec_esp_edesc *edesc;
+ struct talitos_edesc *edesc;
int src_nents, dst_nents, alloc_len, dma_len;
- gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+ int src_chained, dst_chained = 0;
+ gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
GFP_ATOMIC;
- if (areq->cryptlen + ctx->authsize > TALITOS_MAX_DATA_LEN) {
- dev_err(ctx->dev, "cryptlen exceeds h/w max limit\n");
+ if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
+ dev_err(dev, "length exceeds h/w max limit\n");
return ERR_PTR(-EINVAL);
}
- src_nents = sg_count(areq->src, areq->cryptlen + ctx->authsize);
+ src_nents = sg_count(src, cryptlen + authsize, &src_chained);
src_nents = (src_nents == 1) ? 0 : src_nents;
- if (areq->dst == areq->src) {
+ if (dst == src) {
dst_nents = src_nents;
} else {
- dst_nents = sg_count(areq->dst, areq->cryptlen + ctx->authsize);
+ dst_nents = sg_count(dst, cryptlen + authsize, &dst_chained);
dst_nents = (dst_nents == 1) ? 0 : dst_nents;
}
@@ -1084,39 +1138,52 @@ static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
* allowing for two separate entries for ICV and generated ICV (+ 2),
* and the ICV data itself
*/
- alloc_len = sizeof(struct ipsec_esp_edesc);
+ alloc_len = sizeof(struct talitos_edesc);
if (src_nents || dst_nents) {
dma_len = (src_nents + dst_nents + 2) *
- sizeof(struct talitos_ptr) + ctx->authsize;
+ sizeof(struct talitos_ptr) + authsize;
alloc_len += dma_len;
} else {
dma_len = 0;
- alloc_len += icv_stashing ? ctx->authsize : 0;
+ alloc_len += icv_stashing ? authsize : 0;
}
edesc = kmalloc(alloc_len, GFP_DMA | flags);
if (!edesc) {
- dev_err(ctx->dev, "could not allocate edescriptor\n");
+ dev_err(dev, "could not allocate edescriptor\n");
return ERR_PTR(-ENOMEM);
}
edesc->src_nents = src_nents;
edesc->dst_nents = dst_nents;
+ edesc->src_is_chained = src_chained;
+ edesc->dst_is_chained = dst_chained;
edesc->dma_len = dma_len;
- edesc->dma_link_tbl = dma_map_single(ctx->dev, &edesc->link_tbl[0],
+ edesc->dma_link_tbl = dma_map_single(dev, &edesc->link_tbl[0],
edesc->dma_len, DMA_BIDIRECTIONAL);
return edesc;
}
-static int aead_authenc_encrypt(struct aead_request *req)
+static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq,
+ int icv_stashing)
+{
+ struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
+
+ return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst,
+ areq->cryptlen, ctx->authsize, icv_stashing,
+ areq->base.flags);
+}
+
+static int aead_encrypt(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
- struct ipsec_esp_edesc *edesc;
+ struct talitos_edesc *edesc;
/* allocate extended descriptor */
- edesc = ipsec_esp_edesc_alloc(req, 0);
+ edesc = aead_edesc_alloc(req, 0);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
@@ -1126,70 +1193,67 @@ static int aead_authenc_encrypt(struct aead_request *req)
return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_encrypt_done);
}
-
-
-static int aead_authenc_decrypt(struct aead_request *req)
+static int aead_decrypt(struct aead_request *req)
{
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
unsigned int authsize = ctx->authsize;
struct talitos_private *priv = dev_get_drvdata(ctx->dev);
- struct ipsec_esp_edesc *edesc;
+ struct talitos_edesc *edesc;
struct scatterlist *sg;
void *icvdata;
req->cryptlen -= authsize;
/* allocate extended descriptor */
- edesc = ipsec_esp_edesc_alloc(req, 1);
+ edesc = aead_edesc_alloc(req, 1);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
if ((priv->features & TALITOS_FTR_HW_AUTH_CHECK) &&
- (((!edesc->src_nents && !edesc->dst_nents) ||
- priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT))) {
+ ((!edesc->src_nents && !edesc->dst_nents) ||
+ priv->features & TALITOS_FTR_SRC_LINK_TBL_LEN_INCLUDES_EXTENT)) {
/* decrypt and check the ICV */
- edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND |
+ edesc->desc.hdr = ctx->desc_hdr_template |
+ DESC_HDR_DIR_INBOUND |
DESC_HDR_MODE1_MDEU_CICV;
/* reset integrity check result bits */
edesc->desc.hdr_lo = 0;
- return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_hwauth_done);
+ return ipsec_esp(edesc, req, NULL, 0,
+ ipsec_esp_decrypt_hwauth_done);
- } else {
-
- /* Have to check the ICV with software */
+ }
- edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+ /* Have to check the ICV with software */
+ edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
- /* stash incoming ICV for later cmp with ICV generated by the h/w */
- if (edesc->dma_len)
- icvdata = &edesc->link_tbl[edesc->src_nents +
- edesc->dst_nents + 2];
- else
- icvdata = &edesc->link_tbl[0];
+ /* stash incoming ICV for later cmp with ICV generated by the h/w */
+ if (edesc->dma_len)
+ icvdata = &edesc->link_tbl[edesc->src_nents +
+ edesc->dst_nents + 2];
+ else
+ icvdata = &edesc->link_tbl[0];
- sg = sg_last(req->src, edesc->src_nents ? : 1);
+ sg = sg_last(req->src, edesc->src_nents ? : 1);
- memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
- ctx->authsize);
+ memcpy(icvdata, (char *)sg_virt(sg) + sg->length - ctx->authsize,
+ ctx->authsize);
- return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
- }
+ return ipsec_esp(edesc, req, NULL, 0, ipsec_esp_decrypt_swauth_done);
}
-static int aead_authenc_givencrypt(
- struct aead_givcrypt_request *req)
+static int aead_givencrypt(struct aead_givcrypt_request *req)
{
struct aead_request *areq = &req->areq;
struct crypto_aead *authenc = crypto_aead_reqtfm(areq);
struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
- struct ipsec_esp_edesc *edesc;
+ struct talitos_edesc *edesc;
/* allocate extended descriptor */
- edesc = ipsec_esp_edesc_alloc(areq, 0);
+ edesc = aead_edesc_alloc(areq, 0);
if (IS_ERR(edesc))
return PTR_ERR(edesc);
@@ -1204,31 +1268,228 @@ static int aead_authenc_givencrypt(
ipsec_esp_encrypt_done);
}
+static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
+ const u8 *key, unsigned int keylen)
+{
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ struct ablkcipher_alg *alg = crypto_ablkcipher_alg(cipher);
+
+ if (keylen > TALITOS_MAX_KEY_SIZE)
+ goto badkey;
+
+ if (keylen < alg->min_keysize || keylen > alg->max_keysize)
+ goto badkey;
+
+ memcpy(&ctx->key, key, keylen);
+ ctx->keylen = keylen;
+
+ return 0;
+
+badkey:
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+}
+
+static void common_nonsnoop_unmap(struct device *dev,
+ struct talitos_edesc *edesc,
+ struct ablkcipher_request *areq)
+{
+ unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+ unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
+ unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
+
+ talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
+
+ if (edesc->dma_len)
+ dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
+ DMA_BIDIRECTIONAL);
+}
+
+static void ablkcipher_done(struct device *dev,
+ struct talitos_desc *desc, void *context,
+ int err)
+{
+ struct ablkcipher_request *areq = context;
+ struct talitos_edesc *edesc;
+
+ edesc = container_of(desc, struct talitos_edesc, desc);
+
+ common_nonsnoop_unmap(dev, edesc, areq);
+
+ kfree(edesc);
+
+ areq->base.complete(&areq->base, err);
+}
+
+static int common_nonsnoop(struct talitos_edesc *edesc,
+ struct ablkcipher_request *areq,
+ u8 *giv,
+ void (*callback) (struct device *dev,
+ struct talitos_desc *desc,
+ void *context, int error))
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ struct device *dev = ctx->dev;
+ struct talitos_desc *desc = &edesc->desc;
+ unsigned int cryptlen = areq->nbytes;
+ unsigned int ivsize;
+ int sg_count, ret;
+
+ /* first DWORD empty */
+ desc->ptr[0].len = 0;
+ desc->ptr[0].ptr = 0;
+ desc->ptr[0].j_extent = 0;
+
+ /* cipher iv */
+ ivsize = crypto_ablkcipher_ivsize(cipher);
+ map_single_talitos_ptr(dev, &desc->ptr[1], ivsize, giv ?: areq->info, 0,
+ DMA_TO_DEVICE);
+
+ /* cipher key */
+ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
+ (char *)&ctx->key, 0, DMA_TO_DEVICE);
+
+ /*
+ * cipher in
+ */
+ desc->ptr[3].len = cpu_to_be16(cryptlen);
+ desc->ptr[3].j_extent = 0;
+
+ sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
+ (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
+ : DMA_TO_DEVICE,
+ edesc->src_is_chained);
+
+ if (sg_count == 1) {
+ desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->src));
+ } else {
+ sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen,
+ &edesc->link_tbl[0]);
+ if (sg_count > 1) {
+ desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
+ desc->ptr[3].ptr = cpu_to_be32(edesc->dma_link_tbl);
+ dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+ edesc->dma_len,
+ DMA_BIDIRECTIONAL);
+ } else {
+ /* Only one segment now, so no link tbl needed */
+ desc->ptr[3].ptr = cpu_to_be32(sg_dma_address(areq->
+ src));
+ }
+ }
+
+ /* cipher out */
+ desc->ptr[4].len = cpu_to_be16(cryptlen);
+ desc->ptr[4].j_extent = 0;
+
+ if (areq->src != areq->dst)
+ sg_count = talitos_map_sg(dev, areq->dst,
+ edesc->dst_nents ? : 1,
+ DMA_FROM_DEVICE,
+ edesc->dst_is_chained);
+
+ if (sg_count == 1) {
+ desc->ptr[4].ptr = cpu_to_be32(sg_dma_address(areq->dst));
+ } else {
+ struct talitos_ptr *link_tbl_ptr =
+ &edesc->link_tbl[edesc->src_nents + 1];
+
+ desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
+ desc->ptr[4].ptr = cpu_to_be32((struct talitos_ptr *)
+ edesc->dma_link_tbl +
+ edesc->src_nents + 1);
+ sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
+ link_tbl_ptr);
+ dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
+ edesc->dma_len, DMA_BIDIRECTIONAL);
+ }
+
+ /* iv out */
+ map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
+ DMA_FROM_DEVICE);
+
+ /* last DWORD empty */
+ desc->ptr[6].len = 0;
+ desc->ptr[6].ptr = 0;
+ desc->ptr[6].j_extent = 0;
+
+ ret = talitos_submit(dev, desc, callback, areq);
+ if (ret != -EINPROGRESS) {
+ common_nonsnoop_unmap(dev, edesc, areq);
+ kfree(edesc);
+ }
+ return ret;
+}
+
+static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
+ areq)
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+
+ return talitos_edesc_alloc(ctx->dev, areq->src, areq->dst, areq->nbytes,
+ 0, 0, areq->base.flags);
+}
+
+static int ablkcipher_encrypt(struct ablkcipher_request *areq)
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ struct talitos_edesc *edesc;
+
+ /* allocate extended descriptor */
+ edesc = ablkcipher_edesc_alloc(areq);
+ if (IS_ERR(edesc))
+ return PTR_ERR(edesc);
+
+ /* set encrypt */
+ edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_MODE0_ENCRYPT;
+
+ return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
+}
+
+static int ablkcipher_decrypt(struct ablkcipher_request *areq)
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+ struct talitos_edesc *edesc;
+
+ /* allocate extended descriptor */
+ edesc = ablkcipher_edesc_alloc(areq);
+ if (IS_ERR(edesc))
+ return PTR_ERR(edesc);
+
+ edesc->desc.hdr = ctx->desc_hdr_template | DESC_HDR_DIR_INBOUND;
+
+ return common_nonsnoop(edesc, areq, NULL, ablkcipher_done);
+}
+
struct talitos_alg_template {
- char name[CRYPTO_MAX_ALG_NAME];
- char driver_name[CRYPTO_MAX_ALG_NAME];
- unsigned int blocksize;
- struct aead_alg aead;
- struct device *dev;
+ struct crypto_alg alg;
__be32 desc_hdr_template;
};
static struct talitos_alg_template driver_algs[] = {
- /* single-pass ipsec_esp descriptor */
+ /* AEAD algorithms. These use a single-pass ipsec_esp descriptor */
{
- .name = "authenc(hmac(sha1),cbc(aes))",
- .driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
- .blocksize = AES_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = AES_BLOCK_SIZE,
- .maxauthsize = SHA1_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(sha1),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-sha1-cbc-aes-talitos",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
@@ -1238,19 +1499,23 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{
- .name = "authenc(hmac(sha1),cbc(des3_ede))",
- .driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
- .blocksize = DES3_EDE_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .maxauthsize = SHA1_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-sha1-cbc-3des-talitos",
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
@@ -1261,19 +1526,23 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_SHA1_HMAC,
},
{
- .name = "authenc(hmac(sha256),cbc(aes))",
- .driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
- .blocksize = AES_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = AES_BLOCK_SIZE,
- .maxauthsize = SHA256_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(sha256),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-sha256-cbc-aes-talitos",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
@@ -1283,19 +1552,23 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{
- .name = "authenc(hmac(sha256),cbc(des3_ede))",
- .driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
- .blocksize = DES3_EDE_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .maxauthsize = SHA256_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-sha256-cbc-3des-talitos",
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
@@ -1306,19 +1579,23 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_SHA256_HMAC,
},
{
- .name = "authenc(hmac(md5),cbc(aes))",
- .driver_name = "authenc-hmac-md5-cbc-aes-talitos",
- .blocksize = AES_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = AES_BLOCK_SIZE,
- .maxauthsize = MD5_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(md5),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-md5-cbc-aes-talitos",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_AESU |
DESC_HDR_MODE0_AESU_CBC |
@@ -1328,19 +1605,23 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_MD5_HMAC,
},
{
- .name = "authenc(hmac(md5),cbc(des3_ede))",
- .driver_name = "authenc-hmac-md5-cbc-3des-talitos",
- .blocksize = DES3_EDE_BLOCK_SIZE,
- .aead = {
- .setkey = aead_authenc_setkey,
- .setauthsize = aead_authenc_setauthsize,
- .encrypt = aead_authenc_encrypt,
- .decrypt = aead_authenc_decrypt,
- .givencrypt = aead_authenc_givencrypt,
- .geniv = "<built-in>",
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .maxauthsize = MD5_DIGEST_SIZE,
- },
+ .alg = {
+ .cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-md5-cbc-3des-talitos",
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_aead_type,
+ .cra_aead = {
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .encrypt = aead_encrypt,
+ .decrypt = aead_decrypt,
+ .givencrypt = aead_givencrypt,
+ .geniv = "<built-in>",
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ }
+ },
.desc_hdr_template = DESC_HDR_TYPE_IPSEC_ESP |
DESC_HDR_SEL0_DEU |
DESC_HDR_MODE0_DEU_CBC |
@@ -1349,6 +1630,52 @@ static struct talitos_alg_template driver_algs[] = {
DESC_HDR_MODE1_MDEU_INIT |
DESC_HDR_MODE1_MDEU_PAD |
DESC_HDR_MODE1_MDEU_MD5_HMAC,
+ },
+ /* ABLKCIPHER algorithms. */
+ {
+ .alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-talitos",
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_ablkcipher = {
+ .setkey = ablkcipher_setkey,
+ .encrypt = ablkcipher_encrypt,
+ .decrypt = ablkcipher_decrypt,
+ .geniv = "eseqiv",
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ }
+ },
+ .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+ DESC_HDR_SEL0_AESU |
+ DESC_HDR_MODE0_AESU_CBC,
+ },
+ {
+ .alg = {
+ .cra_name = "cbc(des3_ede)",
+ .cra_driver_name = "cbc-3des-talitos",
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_ablkcipher = {
+ .setkey = ablkcipher_setkey,
+ .encrypt = ablkcipher_encrypt,
+ .decrypt = ablkcipher_decrypt,
+ .geniv = "eseqiv",
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ }
+ },
+ .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU |
+ DESC_HDR_SEL0_DEU |
+ DESC_HDR_MODE0_DEU_CBC |
+ DESC_HDR_MODE0_DEU_3DES,
}
};
@@ -1362,12 +1689,14 @@ struct talitos_crypto_alg {
static int talitos_cra_init(struct crypto_tfm *tfm)
{
struct crypto_alg *alg = tfm->__crt_alg;
- struct talitos_crypto_alg *talitos_alg =
- container_of(alg, struct talitos_crypto_alg, crypto_alg);
+ struct talitos_crypto_alg *talitos_alg;
struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+ talitos_alg = container_of(alg, struct talitos_crypto_alg, crypto_alg);
+
/* update context with ptr to dev */
ctx->dev = talitos_alg->dev;
+
/* copy descriptor header template value */
ctx->desc_hdr_template = talitos_alg->desc_hdr_template;
@@ -1453,19 +1782,13 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
return ERR_PTR(-ENOMEM);
alg = &t_alg->crypto_alg;
+ *alg = template->alg;
- snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
- snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
- template->driver_name);
alg->cra_module = THIS_MODULE;
alg->cra_init = talitos_cra_init;
alg->cra_priority = TALITOS_CRA_PRIORITY;
- alg->cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
- alg->cra_blocksize = template->blocksize;
alg->cra_alignmask = 0;
- alg->cra_type = &crypto_aead_type;
alg->cra_ctxsize = sizeof(struct talitos_ctx);
- alg->cra_u.aead = template->aead;
t_alg->desc_hdr_template = template->desc_hdr_template;
t_alg->dev = dev;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5899c..480f28127f0 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
config XFS_POSIX_ACL
bool "XFS POSIX ACL support"
depends on XFS_FS
+ select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e47fe..7a59daed178 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
endif
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o
xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
@@ -88,8 +88,7 @@ xfs-y += xfs_alloc.o \
xfs_utils.o \
xfs_vnodeops.o \
xfs_rw.o \
- xfs_dmops.o \
- xfs_qmops.o
+ xfs_dmops.o
xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 00000000000..1e9d1246eeb
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+#define XFS_ACL_NOT_CACHED ((void *)-1)
+
+/*
+ * Locking scheme:
+ * - all ACL updates are protected by inode->i_mutex, which is taken before
+ * calling into this file.
+ * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ * protected by inode->i_lock.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+ struct posix_acl_entry *acl_e;
+ struct posix_acl *acl;
+ struct xfs_acl_entry *ace;
+ int count, i;
+
+ count = be32_to_cpu(aclp->acl_cnt);
+
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ acl_e = &acl->a_entries[i];
+ ace = &aclp->acl_entry[i];
+
+ /*
+ * The tag is 32 bits on disk and 16 bits in core.
+ *
+ * Because every access to it goes through the core
+ * format first this is not a problem.
+ */
+ acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+ acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+ switch (acl_e->e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ acl_e->e_id = be32_to_cpu(ace->ae_id);
+ break;
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ acl_e->e_id = ACL_UNDEFINED_ID;
+ break;
+ default:
+ goto fail;
+ }
+ }
+ return acl;
+
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+ const struct posix_acl_entry *acl_e;
+ struct xfs_acl_entry *ace;
+ int i;
+
+ aclp->acl_cnt = cpu_to_be32(acl->a_count);
+ for (i = 0; i < acl->a_count; i++) {
+ ace = &aclp->acl_entry[i];
+ acl_e = &acl->a_entries[i];
+
+ ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+ ace->ae_id = cpu_to_be32(acl_e->e_id);
+ ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+ }
+}
+
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl = NULL, **p_acl;
+ struct xfs_acl *xfs_acl;
+ int len = sizeof(struct xfs_acl);
+ char *ea_name;
+ int error;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != XFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If we have a cached ACLs value just return it, not need to
+ * go out to the disk.
+ */
+ if (acl)
+ return acl;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return ERR_PTR(-ENOMEM);
+
+ error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+ if (error) {
+ /*
+ * If the attribute doesn't exist make sure we have a negative
+ * cache entry, for any other error assume it is transient and
+ * leave the cache entry as XFS_ACL_NOT_CACHED.
+ */
+ if (error == -ENOATTR) {
+ acl = NULL;
+ goto out_update_cache;
+ }
+ goto out;
+ }
+
+ acl = xfs_acl_from_disk(xfs_acl);
+ if (IS_ERR(acl))
+ goto out;
+
+ out_update_cache:
+ xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+ kfree(xfs_acl);
+ return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl **p_acl;
+ char *ea_name;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ struct xfs_acl *xfs_acl;
+ int len;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return -ENOMEM;
+
+ xfs_acl_to_disk(xfs_acl, acl);
+ len = sizeof(struct xfs_acl) -
+ (sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+ error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+ len, ATTR_ROOT);
+
+ kfree(xfs_acl);
+ } else {
+ /*
+ * A NULL ACL argument means we want to remove the ACL.
+ */
+ error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+ /*
+ * If the attribute didn't exist to start with that's fine.
+ */
+ if (error == -ENOATTR)
+ error = 0;
+ }
+
+ if (!error)
+ xfs_update_cached_acl(inode, p_acl, acl);
+ return error;
+}
+
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ xfs_itrace_entry(ip);
+
+ /*
+ * If there is no attribute fork no ACL exists on this inode and
+ * we can skip the whole exercise.
+ */
+ if (!XFS_IFORK_Q(ip))
+ return -EAGAIN;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+ int error = 0;
+
+ if (mode != inode->i_mode) {
+ struct iattr iattr;
+
+ iattr.ia_valid = ATTR_MODE;
+ iattr.ia_mode = mode;
+
+ error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ }
+
+ return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+ int len = sizeof(struct xfs_acl);
+
+ return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+ ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+ return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+ if (!S_ISDIR(inode->i_mode))
+ return 0;
+ return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+ struct posix_acl *clone;
+ mode_t mode;
+ int error = 0, inherit = 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+ if (error)
+ return error;
+ }
+
+ clone = posix_acl_clone(default_acl, GFP_KERNEL);
+ if (!clone)
+ return -ENOMEM;
+
+ mode = inode->i_mode;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error < 0)
+ goto out_release_clone;
+
+ /*
+ * If posix_acl_create_masq returns a positive value we need to
+ * inherit a permission that can't be represented using the Unix
+ * mode bits and we actually need to set an ACL.
+ */
+ if (error > 0)
+ inherit = 1;
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release_clone;
+
+ if (inherit)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ out_release_clone:
+ posix_acl_release(clone);
+ return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ posix_acl_release(clone);
+ return error;
+}
+
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking, inode is not live yet.
+ */
+ ip->i_acl = XFS_ACL_NOT_CACHED;
+ ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking here, the inode is not live anymore
+ * and just about to be freed.
+ */
+ if (ip->i_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_acl);
+ if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_default_acl);
+}
+
+
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+ if (strcmp(name, "posix_acl_access") == 0)
+ return ACL_TYPE_ACCESS;
+ else if (strcmp(name, "posix_acl_default") == 0)
+ return ACL_TYPE_DEFAULT;
+ return -EINVAL;
+}
+
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int type, error;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+
+ acl = xfs_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+
+ error = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct posix_acl *acl = NULL;
+ int error = 0, type;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+ if (flags & XATTR_CREATE)
+ return -EINVAL;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return value ? -EACCES : 0;
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (!value)
+ goto set_acl;
+
+ acl = posix_acl_from_xattr(value, size);
+ if (!acl) {
+ /*
+ * acl_set_file(3) may request that we set default ACLs with
+ * zero length -- defend (gracefully) against that here.
+ */
+ goto out;
+ }
+ if (IS_ERR(acl)) {
+ error = PTR_ERR(acl);
+ goto out;
+ }
+
+ error = posix_acl_valid(acl);
+ if (error)
+ goto out_release;
+
+ error = -EINVAL;
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+ goto out_release;
+
+ if (type == ACL_TYPE_ACCESS) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+
+ if (error <= 0) {
+ posix_acl_release(acl);
+ acl = NULL;
+
+ if (error < 0)
+ return error;
+ }
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release;
+ }
+
+ set_acl:
+ error = xfs_set_acl(inode, type, acl);
+ out_release:
+ posix_acl_release(acl);
+ out:
+ return error;
+}
+
+struct xattr_handler xfs_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = xfs_xattr_system_get,
+ .set = xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 34eaab608e6..5bb523d7f37 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -899,7 +898,8 @@ xfs_ioctl_setattr(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
unsigned int lock_flags = 0;
- struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
struct xfs_dquot *olddquot = NULL;
int code;
@@ -919,7 +919,7 @@ xfs_ioctl_setattr(
* because the i_*dquot fields will get updated anyway.
*/
if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
- code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+ code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
ip->i_d.di_gid, fa->fsx_projid,
XFS_QMOPT_PQUOTA, &udqp, &gdqp);
if (code)
@@ -954,10 +954,11 @@ xfs_ioctl_setattr(
* Do a quota reservation only if projid is actually going to change.
*/
if (mask & FSX_PROJID) {
- if (XFS_IS_PQUOTA_ON(mp) &&
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ XFS_IS_PQUOTA_ON(mp) &&
ip->i_d.di_projid != fa->fsx_projid) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -1059,8 +1060,8 @@ xfs_ioctl_setattr(
* in the transaction.
*/
if (ip->i_d.di_projid != fa->fsx_projid) {
- if (XFS_IS_PQUOTA_ON(mp)) {
- olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+ olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_projid = fa->fsx_projid;
@@ -1106,9 +1107,9 @@ xfs_ioctl_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (code)
return code;
@@ -1122,8 +1123,8 @@ xfs_ioctl_setattr(
return 0;
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
xfs_trans_cancel(tp, 0);
if (lock_flags)
xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6075382336d..58973bb4603 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -51,6 +52,7 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/namei.h>
+#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/falloc.h>
#include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
{
struct inode *inode;
struct xfs_inode *ip = NULL;
- xfs_acl_t *default_acl = NULL;
+ struct posix_acl *default_acl = NULL;
struct xfs_name name;
- int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
int error;
/*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
rdev = 0;
}
- if (test_default_acl && test_default_acl(dir)) {
- if (!_ACL_ALLOC(default_acl)) {
- return -ENOMEM;
- }
- if (!_ACL_GET_DEFAULT(dir, default_acl)) {
- _ACL_FREE(default_acl);
- default_acl = NULL;
- }
- }
+ if (IS_POSIXACL(dir)) {
+ default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(default_acl))
+ return -PTR_ERR(default_acl);
- if (IS_POSIXACL(dir) && !default_acl)
- mode &= ~current_umask();
+ if (!default_acl)
+ mode &= ~current_umask();
+ }
xfs_dentry_to_name(&name, dentry);
error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
goto out_cleanup_inode;
if (default_acl) {
- error = _ACL_INHERIT(inode, mode, default_acl);
+ error = -xfs_inherit_acl(inode, default_acl);
if (unlikely(error))
goto out_cleanup_inode;
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
}
@@ -257,8 +254,7 @@ xfs_vn_mknod(
out_cleanup_inode:
xfs_cleanup_inode(dir, inode, dentry);
out_free_acl:
- if (default_acl)
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
return -error;
}
@@ -488,26 +484,6 @@ xfs_vn_put_link(
kfree(s);
}
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
- struct inode *inode,
- int mask)
-{
- struct xfs_inode *ip = XFS_I(inode);
- int error;
-
- xfs_itrace_entry(ip);
-
- if (XFS_IFORK_Q(ip)) {
- error = xfs_acl_iaccess(ip, mask, NULL);
- if (error != -1)
- return -error;
- }
-
- return -EAGAIN;
-}
-
STATIC int
xfs_vn_permission(
struct inode *inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
{
return generic_permission(inode, mask, xfs_check_acl);
}
-#else
-#define xfs_vn_permission NULL
-#endif
STATIC int
xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe..7078974a6ee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a633d3d..cb6e2cca214 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
{
struct xfs_mount *mp = XFS_M(sb);
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_sync_inodes(mp, SYNC_DELWRI);
+ return -xfs_sync_data(mp, 0);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 08d6bd9a394..2e09efbca8d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
#include "xfs_itable.h"
#include "xfs_fsops.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -405,6 +404,14 @@ xfs_parseargs(
return EINVAL;
}
+#ifndef CONFIG_XFS_QUOTA
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ cmn_err(CE_WARN,
+ "XFS: quota support not available in this kernel.");
+ return EINVAL;
+ }
+#endif
+
if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
(mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
cmn_err(CE_WARN,
@@ -1063,7 +1070,18 @@ xfs_fs_put_super(
int unmount_event_flags = 0;
xfs_syncd_stop(mp);
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /*
+ * XXX(hch): this should be SYNC_WAIT.
+ *
+ * Or more likely not needed at all because the VFS is already
+ * calling ->sync_fs after shutting down all filestem
+ * operations and just before calling ->put_super.
+ */
+ xfs_sync_data(mp, 0);
+ xfs_sync_attr(mp, 0);
+ }
#ifdef HAVE_DMAPI
if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1098,7 +1116,6 @@ xfs_fs_put_super(
xfs_freesb(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- xfs_qmops_put(mp);
xfs_dmops_put(mp);
xfs_free_fsname(mp);
kfree(mp);
@@ -1158,6 +1175,7 @@ xfs_fs_statfs(
{
struct xfs_mount *mp = XFS_M(dentry->d_sb);
xfs_sb_t *sbp = &mp->m_sb;
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
@@ -1186,7 +1204,10 @@ xfs_fs_statfs(
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
spin_unlock(&mp->m_sb_lock);
- XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+ if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+ xfs_qm_statvfs(ip, statp);
return 0;
}
@@ -1394,16 +1415,13 @@ xfs_fs_fill_super(
error = xfs_dmops_get(mp);
if (error)
goto out_free_fsname;
- error = xfs_qmops_get(mp);
- if (error)
- goto out_put_dmops;
if (silent)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_put_qmops;
+ goto out_put_dmops;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1471,8 +1489,6 @@ xfs_fs_fill_super(
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- out_put_qmops:
- xfs_qmops_put(mp);
out_put_dmops:
xfs_dmops_put(mp);
out_free_fsname:
@@ -1706,18 +1722,8 @@ xfs_init_zones(void)
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
-#ifdef CONFIG_XFS_POSIX_ACL
- xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
- if (!xfs_acl_zone)
- goto out_destroy_ili_zone;
-#endif
-
return 0;
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
- kmem_zone_destroy(xfs_ili_zone);
out_destroy_inode_zone:
kmem_zone_destroy(xfs_inode_zone);
out_destroy_efi_zone:
@@ -1751,9 +1757,6 @@ xfs_init_zones(void)
STATIC void
xfs_destroy_zones(void)
{
-#ifdef CONFIG_XFS_POSIX_ACL
- kmem_zone_destroy(xfs_acl_zone);
-#endif
kmem_zone_destroy(xfs_ili_zone);
kmem_zone_destroy(xfs_inode_zone);
kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c2..b619d6b8ca4 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,166 +43,267 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_rw.h"
+#include "xfs_quota.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int flags)
-{
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index = 0;
- int error = 0;
- int last_error = 0;
- do {
- struct inode *inode;
- xfs_inode_t *ip = NULL;
- int lock_flags = XFS_ILOCK_SHARED;
+STATIC xfs_inode_t *
+xfs_inode_ag_lookup(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ uint32_t *first_index,
+ int tag)
+{
+ int nr_found;
+ struct xfs_inode *ip;
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
+ (void **)&ip, *first_index, 1);
+ } else {
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, *first_index, 1, tag);
+ }
+ if (!nr_found)
+ goto unlock;
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ goto unlock;
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ return ip;
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- read_unlock(&pag->pag_ici_lock);
- return 0;
- }
+unlock:
+ read_unlock(&pag->pag_ici_lock);
+ return NULL;
+}
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- inode = VFS_I(ip);
- if (!igrab(inode)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- read_unlock(&pag->pag_ici_lock);
+STATIC int
+xfs_inode_ag_walk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t ag,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
+{
+ struct xfs_perag *pag = &mp->m_perag[ag];
+ uint32_t first_index;
+ int last_error = 0;
+ int skipped;
- /* avoid new or bad inodes */
- if (is_bad_inode(inode) ||
- xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+restart:
+ skipped = 0;
+ first_index = 0;
+ do {
+ int error = 0;
+ xfs_inode_t *ip;
- /*
- * If we have to flush data or wait for I/O completion
- * we need to hold the iolock.
- */
- if (flags & SYNC_DELWRI) {
- if (VN_DIRTY(inode)) {
- if (flags & SYNC_TRYLOCK) {
- if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
- lock_flags |= XFS_IOLOCK_SHARED;
- } else {
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- lock_flags |= XFS_IOLOCK_SHARED;
- }
- if (lock_flags & XFS_IOLOCK_SHARED) {
- error = xfs_flush_pages(ip, 0, -1,
- (flags & SYNC_WAIT) ? 0
- : XFS_B_ASYNC,
- FI_NONE);
- }
- }
- if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
- xfs_ioend_wait(ip);
- }
- xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
- if (flags & SYNC_WAIT) {
- xfs_iflock(ip);
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
- else
- xfs_ifunlock(ip);
- } else if (xfs_iflock_nowait(ip)) {
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
- else
- xfs_ifunlock(ip);
- }
- }
- xfs_iput(ip, lock_flags);
+ ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+ if (!ip)
+ break;
+ error = execute(ip, pag, flags);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
if (error)
last_error = error;
/*
* bail out if the filesystem is corrupted.
*/
if (error == EFSCORRUPTED)
- return XFS_ERROR(error);
+ break;
- } while (nr_found);
+ } while (1);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ xfs_put_perag(mp, pag);
return last_error;
}
int
-xfs_sync_inodes(
- xfs_mount_t *mp,
- int flags)
+xfs_inode_ag_iterator(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
{
- int error;
- int last_error;
- int i;
- int lflags = XFS_LOG_FORCE;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
- error = 0;
- last_error = 0;
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
+}
+
+/* must be called with pag_ici_lock held and releases it */
+int
+xfs_sync_inode_valid(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag)
+{
+ struct inode *inode = VFS_I(ip);
+
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ read_unlock(&pag->pag_ici_lock);
+ return EFSCORRUPTED;
+ }
+
+ /*
+ * If we can't get a reference on the inode, it must be in reclaim.
+ * Leave it for the reclaim code to flush. Also avoid inodes that
+ * haven't been fully initialised.
+ */
+ if (!igrab(inode)) {
+ read_unlock(&pag->pag_ici_lock);
+ return ENOENT;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ return ENOENT;
+ }
+
+ return 0;
+}
+
+STATIC int
+xfs_sync_inode_data(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct inode *inode = VFS_I(ip);
+ struct address_space *mapping = inode->i_mapping;
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ goto out_wait;
+
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (flags & SYNC_TRYLOCK)
+ goto out_wait;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
+
+ error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+ 0 : XFS_B_ASYNC, FI_NONE);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ out_wait:
if (flags & SYNC_WAIT)
- lflags |= XFS_LOG_SYNC;
+ xfs_ioend_wait(ip);
+ IRELE(ip);
+ return error;
+}
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- error = xfs_sync_inodes_ag(mp, i, flags);
- if (error)
- last_error = error;
- if (error == EFSCORRUPTED)
- break;
+STATIC int
+xfs_sync_inode_attr(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_inode_clean(ip))
+ goto out_unlock;
+ if (!xfs_iflock_nowait(ip)) {
+ if (!(flags & SYNC_WAIT))
+ goto out_unlock;
+ xfs_iflock(ip);
}
- if (flags & SYNC_DELWRI)
- xfs_log_force(mp, 0, lflags);
- return XFS_ERROR(last_error);
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ goto out_unlock;
+ }
+
+ error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+ XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+
+ out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ IRELE(ip);
+ return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+int
+xfs_sync_data(
+ struct xfs_mount *mp,
+ int flags)
+{
+ int error;
+
+ ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+ XFS_ICI_NO_TAG);
+ if (error)
+ return XFS_ERROR(error);
+
+ xfs_log_force(mp, 0,
+ (flags & SYNC_WAIT) ?
+ XFS_LOG_FORCE | XFS_LOG_SYNC :
+ XFS_LOG_FORCE);
+ return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+ struct xfs_mount *mp,
+ int flags)
+{
+ ASSERT((flags & ~SYNC_WAIT) == 0);
+
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+ XFS_ICI_NO_TAG);
}
STATIC int
@@ -252,7 +353,7 @@ xfs_sync_fsdata(
* If this is xfssyncd() then only sync the superblock if we can
* lock it without sleeping and it is not pinned.
*/
- if (flags & SYNC_BDFLUSH) {
+ if (flags & SYNC_TRYLOCK) {
ASSERT(!(flags & SYNC_WAIT));
bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -316,13 +417,13 @@ xfs_quiesce_data(
int error;
/* push non-blocking */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
- XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ xfs_sync_data(mp, 0);
+ xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_filestream_flush(mp);
/* push and block */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
- XFS_QM_DQSYNC(mp, SYNC_WAIT);
+ xfs_sync_data(mp, SYNC_WAIT);
+ xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp, 0);
@@ -341,7 +442,7 @@ xfs_quiesce_fs(
int count = 0, pincount;
xfs_flush_buftarg(mp->m_ddev_targp, 0);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/*
* This loop must run at least twice. The first instance of the loop
@@ -350,7 +451,7 @@ xfs_quiesce_fs(
* logged before we can write the unmount record.
*/
do {
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+ xfs_sync_attr(mp, SYNC_WAIT);
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (!pincount) {
delay(50);
@@ -433,8 +534,8 @@ xfs_flush_inodes_work(
void *arg)
{
struct inode *inode = arg;
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+ xfs_sync_data(mp, SYNC_TRYLOCK);
+ xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
iput(inode);
}
@@ -465,10 +566,10 @@ xfs_sync_worker(
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/* dgc: errors ignored here */
- error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
- error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+ error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+ error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
}
@@ -569,7 +670,7 @@ xfs_reclaim_inode(
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- return 1;
+ return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
@@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag(
xfs_put_perag(mp, pag);
}
-
-STATIC void
-xfs_reclaim_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int noblock,
- int mode)
+STATIC int
+xfs_reclaim_inode_now(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index;
- int skipped;
-
-restart:
- first_index = 0;
- skipped = 0;
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void**)&ip, first_index, 1,
- XFS_ICI_RECLAIM_TAG);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* ignore if already under reclaim */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- if (noblock) {
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- if (xfs_ipincount(ip) ||
- !xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- }
+ /* ignore if already under reclaim */
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&pag->pag_ici_lock);
-
- /*
- * hmmm - this is an inode already in reclaim. Do
- * we even bother catching it here?
- */
- if (xfs_reclaim_inode(ip, noblock, mode))
- skipped++;
- } while (nr_found);
-
- if (skipped) {
- delay(1);
- goto restart;
+ return 0;
}
- return;
+ read_unlock(&pag->pag_ici_lock);
+ return xfs_reclaim_inode(ip, 0, flags);
}
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
- int noblock,
int mode)
{
- int i;
-
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_reclaim_inodes_ag(mp, i, noblock, mode);
- }
- return 0;
+ return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+ XFS_ICI_RECLAIM_TAG);
}
-
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfb..2a10301c99c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,17 +29,14 @@ typedef struct xfs_sync_work {
struct completion *w_completion;
} xfs_sync_work_t;
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_DELWRI 0x0002 /* look at delayed writes */
-#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
-#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
+#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
+#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
int xfs_quiesce_data(struct xfs_mount *mp);
@@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
+
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+ int flags, int tag);
+
#endif
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621fde6e..497c7fb75cc 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
#include <linux/xattr.h>
-/*
- * ACL handling. Should eventually be moved into xfs_acl.c
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
- if (strcmp(name, "posix_acl_access") == 0)
- return _ACL_TYPE_ACCESS;
- else if (strcmp(name, "posix_acl_default") == 0)
- return _ACL_TYPE_DEFAULT;
- return -EINVAL;
-}
-
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
-
- return xfs_acl_vget(inode, buffer, size, acl);
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
- if (flags & XATTR_CREATE)
- return -EINVAL;
-
- if (!value)
- return xfs_acl_vremove(inode, acl);
-
- return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-
-static struct xattr_handler xfs_xattr_system_handler = {
- .prefix = XATTR_SYSTEM_PREFIX,
- .get = xfs_xattr_system_get,
- .set = xfs_xattr_system_set,
-};
-
-
-/*
- * Real xattr handling. The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
-
static int
__xfs_xattr_get(struct inode *inode, const char *name,
void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_user_handler,
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
&xfs_xattr_system_handler,
+#endif
NULL
};
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
/*
* Then add the two synthetic ACL attributes.
*/
- if (xfs_acl_vhasacl_access(inode)) {
+ if (posix_acl_access_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS) + 1,
data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
return error;
}
- if (xfs_acl_vhasacl_default(inode)) {
+ if (posix_acl_default_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc6342..2f3f2229eaa 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -1194,7 +1193,9 @@ void
xfs_qm_dqrele(
xfs_dquot_t *dqp)
{
- ASSERT(dqp);
+ if (!dqp)
+ return;
+
xfs_dqtrace_entry(dqp, "DQRELE");
xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402ddb4..6533ead9b88 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
xfs_dqid_t, uint, uint, xfs_dquot_t **);
extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_qm_dqrele(xfs_dquot_t *);
extern void xfs_dqlock(xfs_dquot_t *);
extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
extern void xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a7c4f..d0d4a9a0bbd 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b6695049e0..45b1bfef738 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref(
* Just destroy the quotainfo structure.
*/
void
-xfs_qm_unmount_quotadestroy(
- xfs_mount_t *mp)
+xfs_qm_unmount(
+ struct xfs_mount *mp)
{
- if (mp->m_quotainfo)
+ if (mp->m_quotainfo) {
+ xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
xfs_qm_destroy_quotainfo(mp);
+ }
}
@@ -385,8 +386,13 @@ xfs_qm_mount_quotas(
if (error) {
xfs_fs_cmn_err(CE_WARN, mp,
"Failed to initialize disk quotas.");
+ return;
}
- return;
+
+#ifdef QUOTADEBUG
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_qm_internalqcheck(mp);
+#endif
}
/*
@@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint(
* Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
* into account.
* If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
* Inode may get unlocked and relocked in here, and the caller must deal with
* the consequences.
*/
int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
xfs_inode_t *ip,
uint flags)
{
@@ -787,17 +792,14 @@ xfs_qm_dqattach(
uint nquotas = 0;
int error = 0;
- if ((! XFS_IS_QUOTA_ON(mp)) ||
- (! XFS_NOT_DQATTACHED(mp, ip)) ||
- (ip->i_ino == mp->m_sb.sb_uquotino) ||
- (ip->i_ino == mp->m_sb.sb_gquotino))
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
+ !XFS_NOT_DQATTACHED(mp, ip) ||
+ ip->i_ino == mp->m_sb.sb_uquotino ||
+ ip->i_ino == mp->m_sb.sb_gquotino)
return 0;
- ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
- xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_UQUOTA_ON(mp)) {
error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +851,7 @@ xfs_qm_dqattach(
xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
}
- done:
-
+ done:
#ifdef QUOTADEBUG
if (! error) {
if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +859,22 @@ xfs_qm_dqattach(
if (XFS_IS_OQUOTA_ON(mp))
ASSERT(ip->i_gdquot);
}
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
#endif
+ return error;
+}
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+int
+xfs_qm_dqattach(
+ struct xfs_inode *ip,
+ uint flags)
+{
+ int error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_qm_dqattach_locked(ip, flags);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
-#ifdef QUOTADEBUG
- else
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
return error;
}
@@ -896,11 +904,6 @@ xfs_qm_dqdetach(
}
}
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
int
xfs_qm_sync(
xfs_mount_t *mp,
@@ -909,17 +912,13 @@ xfs_qm_sync(
int recl, restarts;
xfs_dquot_t *dqp;
uint flush_flags;
- boolean_t nowait;
int error;
- if (! XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
+ flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
restarts = 0;
- /*
- * We won't block unless we are asked to.
- */
- nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
again:
xfs_qm_mplist_lock(mp);
@@ -939,18 +938,10 @@ xfs_qm_sync(
* don't 'seem' to be dirty. ie. don't acquire dqlock.
* This is very similar to what xfs_sync does with inodes.
*/
- if (flags & SYNC_BDFLUSH) {
- if (! XFS_DQ_IS_DIRTY(dqp))
+ if (flags & SYNC_TRYLOCK) {
+ if (!XFS_DQ_IS_DIRTY(dqp))
continue;
- }
-
- if (nowait) {
- /*
- * Try to acquire the dquot lock. We are NOT out of
- * lock order, but we just don't want to wait for this
- * lock, unless somebody wanted us to.
- */
- if (! xfs_qm_dqlock_nowait(dqp))
+ if (!xfs_qm_dqlock_nowait(dqp))
continue;
} else {
xfs_dqlock(dqp);
@@ -967,7 +958,7 @@ xfs_qm_sync(
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
if (!xfs_dqflock_nowait(dqp)) {
- if (nowait) {
+ if (flags & SYNC_TRYLOCK) {
xfs_dqunlock(dqp);
continue;
}
@@ -985,7 +976,6 @@ xfs_qm_sync(
* Let go of the mplist lock. We don't want to hold it
* across a disk write
*/
- flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
xfs_qm_mplist_unlock(mp);
xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
error = xfs_qm_dqflush(dqp, flush_flags);
@@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes(
*/
int
xfs_qm_vop_dqalloc(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- uid_t uid,
- gid_t gid,
- prid_t prid,
- uint flags,
- xfs_dquot_t **O_udqpp,
- xfs_dquot_t **O_gdqpp)
+ struct xfs_inode *ip,
+ uid_t uid,
+ gid_t gid,
+ prid_t prid,
+ uint flags,
+ struct xfs_dquot **O_udqpp,
+ struct xfs_dquot **O_gdqpp)
{
- int error;
- xfs_dquot_t *uq, *gq;
- uint lockflags;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dquot *uq, *gq;
+ int error;
+ uint lockflags;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc(
* if necessary. The dquot(s) will not be locked.
*/
if (XFS_NOT_DQATTACHED(mp, ip)) {
- if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
- XFS_QMOPT_ILOCKED))) {
+ error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+ if (error) {
xfs_iunlock(ip, lockflags);
return error;
}
@@ -2469,6 +2459,7 @@ xfs_qm_vop_chown(
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
@@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve(
xfs_dquot_t *gdqp,
uint flags)
{
- int error;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = ip->i_mount;
uint delblks, blkflags, prjflags = 0;
xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+ int error;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- mp = ip->i_mount;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
delblks = ip->i_delayed_blks;
@@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve(
int
xfs_qm_vop_rename_dqattach(
- xfs_inode_t **i_tab)
+ struct xfs_inode **i_tab)
{
- xfs_inode_t *ip;
- int i;
- int error;
+ struct xfs_mount *mp = i_tab[0]->i_mount;
+ int i;
- ip = i_tab[0];
-
- if (! XFS_IS_QUOTA_ON(ip->i_mount))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- for (i = 1; (i < 4 && i_tab[i]); i++) {
+ for (i = 0; (i < 4 && i_tab[i]); i++) {
+ struct xfs_inode *ip = i_tab[i];
+ int error;
+
/*
* Watch out for duplicate entries in the table.
*/
- if ((ip = i_tab[i]) != i_tab[i-1]) {
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+ if (i == 0 || ip != i_tab[i-1]) {
+ if (XFS_NOT_DQATTACHED(mp, ip)) {
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
@@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach(
}
void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp)
+xfs_qm_vop_create_dqattach(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp)
{
- if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if (udqp) {
xfs_dqlock(udqp);
@@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
ip->i_udquot = udqp;
- ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+ ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
@@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
ip->i_gdquot = gdqp;
- ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
- ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+ ASSERT(XFS_IS_OQUOTA_ON(mp));
+ ASSERT((XFS_IS_GQUOTA_ON(mp) ?
ip->i_d.di_gid : ip->i_d.di_projid) ==
be32_to_cpu(gdqp->q_core.d_id));
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954cae1..495564b8af3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
} xfs_quotainfo_t;
-extern xfs_dqtrxops_t xfs_trans_dquot_ops;
-
extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
#define XFS_QM_RTBWARNLIMIT 5
extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void xfs_qm_mount_quotas(xfs_mount_t *);
extern int xfs_qm_quotacheck(xfs_mount_t *);
-extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void xfs_qm_unmount_quotas(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int xfs_qm_sync(xfs_mount_t *, int);
/* dquot stuff */
extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void xfs_qm_dqdetach(xfs_inode_t *);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
@@ -183,19 +175,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-/* vop stuff */
-extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
- uid_t, gid_t, prid_t, uint,
- xfs_dquot_t **, xfs_dquot_t **);
-extern void xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *);
-extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t **, xfs_dquot_t *);
-extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *, uint);
-
/* list stuff */
extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c689a4..a5346630dfa 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
@@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot(
* return a statvfs of the project, not the entire filesystem.
* This makes such trees appear as if they are filesystems in themselves.
*/
-STATIC void
+void
xfs_qm_statvfs(
xfs_inode_t *ip,
struct kstatfs *statp)
@@ -92,20 +91,13 @@ xfs_qm_statvfs(
xfs_mount_t *mp = ip->i_mount;
xfs_dquot_t *dqp;
- if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
- !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
- return;
-
if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
- xfs_disk_dquot_t *dp = &dqp->q_core;
-
- xfs_fill_statvfs_from_dquot(statp, dp);
+ xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
xfs_qm_dqput(dqp);
}
}
-STATIC int
+int
xfs_qm_newmount(
xfs_mount_t *mp,
uint *needquotamount,
@@ -114,9 +106,6 @@ xfs_qm_newmount(
uint quotaondisk;
uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
@@ -179,66 +168,6 @@ xfs_qm_newmount(
return 0;
}
-STATIC int
-xfs_qm_endmount(
- xfs_mount_t *mp,
- uint needquotamount,
- uint quotaflags)
-{
- if (needquotamount) {
- ASSERT(mp->m_qflags == 0);
- mp->m_qflags = quotaflags;
- xfs_qm_mount_quotas(mp);
- }
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- if (! (XFS_IS_QUOTA_ON(mp)))
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
- else
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
- cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
- return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
- xfs_dquot_t *dq)
-{
- /*
- * Called from XFS, where we always check first for a NULL dquot.
- */
- if (!dq)
- return;
- xfs_qm_dqrele(dq);
-}
-
-
-struct xfs_qmops xfs_qmcore_xfs = {
- .xfs_qminit = xfs_qm_newmount,
- .xfs_qmdone = xfs_qm_unmount_quotadestroy,
- .xfs_qmmount = xfs_qm_endmount,
- .xfs_qmunmount = xfs_qm_unmount_quotas,
- .xfs_dqrele = xfs_qm_dqrele_null,
- .xfs_dqattach = xfs_qm_dqattach,
- .xfs_dqdetach = xfs_qm_dqdetach,
- .xfs_dqpurgeall = xfs_qm_dqpurge_all,
- .xfs_dqvopalloc = xfs_qm_vop_dqalloc,
- .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode,
- .xfs_dqvoprename = xfs_qm_vop_rename_dqattach,
- .xfs_dqvopchown = xfs_qm_vop_chown,
- .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
- .xfs_dqstatvfs = xfs_qm_statvfs,
- .xfs_dqsync = xfs_qm_sync,
- .xfs_dqtrxops = &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
-
void __init
xfs_qm_init(void)
{
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f545cf..21b08c0396a 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6506c..4e4276b956e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -847,105 +846,55 @@ xfs_qm_export_flags(
}
-/*
- * Release all the dquots on the inodes in an AG.
- */
-STATIC void
-xfs_qm_dqrele_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- uint flags)
+STATIC int
+xfs_dqrele_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int first_index = 0;
- int nr_found;
-
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* skip quota inodes */
- if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ int error;
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- if (!igrab(VFS_I(ip))) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ /* skip quota inodes */
+ if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+ ASSERT(ip->i_udquot == NULL);
+ ASSERT(ip->i_gdquot == NULL);
read_unlock(&pag->pag_ici_lock);
+ return 0;
+ }
- /* avoid new inodes though we shouldn't find any here */
- if (xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
- ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+ xfs_qm_dqrele(ip->i_udquot);
+ ip->i_udquot = NULL;
+ }
+ if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ xfs_qm_dqrele(ip->i_gdquot);
+ ip->i_gdquot = NULL;
+ }
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
- } while (nr_found);
+ return 0;
}
+
/*
* Go thru all the inodes in the file system, releasing their dquots.
+ *
* Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * AFTER this, in the case of quotaoff.
*/
void
xfs_qm_dqrele_all_inodes(
struct xfs_mount *mp,
uint flags)
{
- int i;
-
ASSERT(mp->m_quotainfo);
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_qm_dqrele_inodes_ag(mp, i, flags);
- }
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173bcf96..97ac9640be9 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -111,7 +110,7 @@ xfs_trans_log_dquot(
* Carry forward whatever is left of the quota blk reservation to
* the spanky new transaction
*/
-STATIC void
+void
xfs_trans_dup_dqinfo(
xfs_trans_t *otp,
xfs_trans_t *ntp)
@@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo(
/*
* Wrap around mod_dquot to account for both user and group quotas.
*/
-STATIC void
+void
xfs_trans_mod_dquot_byino(
xfs_trans_t *tp,
xfs_inode_t *ip,
uint field,
long delta)
{
- xfs_mount_t *mp;
-
- ASSERT(tp);
- mp = tp->t_mountp;
+ xfs_mount_t *mp = tp->t_mountp;
- if (!XFS_IS_QUOTA_ON(mp) ||
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
ip->i_ino == mp->m_sb.sb_uquotino ||
ip->i_ino == mp->m_sb.sb_gquotino)
return;
@@ -229,6 +226,7 @@ xfs_trans_mod_dquot(
xfs_dqtrx_t *qtrx;
ASSERT(tp);
+ ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
qtrx = NULL;
if (tp->t_dqinfo == NULL)
@@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin(
* Unreserve just the reservations done by this transaction.
* dquot is still left locked at exit.
*/
-STATIC void
+void
xfs_trans_apply_dquot_deltas(
xfs_trans_t *tp)
{
@@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas(
long totalbdelta;
long totalrtbdelta;
- if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
ASSERT(tp->t_dqinfo);
@@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas(
* we simply throw those away, since that's the expected behavior
* when a transaction is curtailed without a commit.
*/
-STATIC void
+void
xfs_trans_unreserve_and_mod_dquots(
xfs_trans_t *tp)
{
@@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots(
{
int resvd = 0, error;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots(
* This doesn't change the actual usage, just the reservation.
* The inode sent in is locked.
*/
-STATIC int
+int
xfs_trans_reserve_quota_nblks(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- long nblks,
- long ninos,
- uint flags)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ long nblks,
+ long ninos,
+ uint flags)
{
- int error;
+ struct xfs_mount *mp = ip->i_mount;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (XFS_IS_PQUOTA_ON(mp))
flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks(
ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
XFS_TRANS_DQ_RES_RTBLKS ||
(flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks(
/*
* Reserve nblks against these dquots, with trans as the mediator.
*/
- error = xfs_trans_reserve_quota_bydquots(tp, mp,
- ip->i_udquot, ip->i_gdquot,
- nblks, ninos,
- flags);
- return error;
+ return xfs_trans_reserve_quota_bydquots(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ nblks, ninos, flags);
}
/*
@@ -895,25 +889,15 @@ STATIC void
xfs_trans_alloc_dqinfo(
xfs_trans_t *tp)
{
- (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+ tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
}
-STATIC void
+void
xfs_trans_free_dqinfo(
xfs_trans_t *tp)
{
if (!tp->t_dqinfo)
return;
- kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
- (tp)->t_dqinfo = NULL;
+ kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+ tp->t_dqinfo = NULL;
}
-
-xfs_dqtrxops_t xfs_trans_dquot_ops = {
- .qo_dup_dqinfo = xfs_trans_dup_dqinfo,
- .qo_free_dqinfo = xfs_trans_free_dqinfo,
- .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino,
- .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas,
- .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks,
- .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots,
- .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73999a..00000000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-
-STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_endian(xfs_acl_t *);
-STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int xfs_acl_invalid(xfs_acl_t *);
-STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int xfs_acl_allow_set(struct inode *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
- struct inode *vp)
-{
- int error;
-
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
- struct inode *vp)
-{
- int error;
-
- if (!S_ISDIR(vp->i_mode))
- return 0;
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
- posix_acl_xattr_header *src,
- size_t size,
- xfs_acl_t *dest)
-{
- posix_acl_xattr_entry *src_entry;
- xfs_acl_entry_t *dest_entry;
- int n;
-
- if (!src || !dest)
- return EINVAL;
-
- if (size < sizeof(posix_acl_xattr_header))
- return EINVAL;
-
- if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return EOPNOTSUPP;
-
- memset(dest, 0, sizeof(xfs_acl_t));
- dest->acl_cnt = posix_acl_xattr_count(size);
- if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
- return EINVAL;
-
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- if (!dest->acl_cnt)
- return 0;
-
- src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
- dest_entry = &dest->acl_entry[0];
-
- for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
- dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
- if (_ACL_PERM_INVALID(dest_entry->ae_perm))
- return EINVAL;
- dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag);
- switch(dest_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->ae_id = ACL_UNDEFINED_ID;
- break;
- default:
- return EINVAL;
- }
- }
- if (xfs_acl_invalid(dest))
- return EINVAL;
-
- return 0;
-}
-
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
- const void *va,
- const void *vb)
-{
- xfs_acl_entry_t *a = (xfs_acl_entry_t *)va,
- *b = (xfs_acl_entry_t *)vb;
-
- if (a->ae_tag == b->ae_tag)
- return (a->ae_id - b->ae_id);
- return (a->ae_tag - b->ae_tag);
-}
-
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
- xfs_acl_t *src,
- posix_acl_xattr_header *dest,
- size_t size)
-{
- int n;
- size_t new_size = posix_acl_xattr_size(src->acl_cnt);
- posix_acl_xattr_entry *dest_entry;
- xfs_acl_entry_t *src_entry;
-
- if (size < new_size)
- return -ERANGE;
-
- /* Need to sort src XFS ACL by <ae_tag,ae_id> */
- xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
- xfs_acl_entry_compare);
-
- dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
- dest_entry = &dest->a_entries[0];
- src_entry = &src->acl_entry[0];
- for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
- dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
- if (_ACL_PERM_INVALID(src_entry->ae_perm))
- return -EINVAL;
- dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag);
- switch (src_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
- break;
- default:
- return -EINVAL;
- }
- }
- return new_size;
-}
-
-int
-xfs_acl_vget(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- int error;
- xfs_acl_t *xfs_acl = NULL;
- posix_acl_xattr_header *ext_acl = acl;
- int flags = 0;
-
- if(size) {
- if (!(_ACL_ALLOC(xfs_acl))) {
- error = ENOMEM;
- goto out;
- }
- memset(xfs_acl, 0, sizeof(xfs_acl_t));
- } else
- flags = ATTR_KERNOVAL;
-
- xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
- if (error)
- goto out;
-
- if (!size) {
- error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
- } else {
- if (xfs_acl_invalid(xfs_acl)) {
- error = EINVAL;
- goto out;
- }
- if (kind == _ACL_TYPE_ACCESS)
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
- error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
- }
-out:
- if(xfs_acl)
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_vremove(
- struct inode *vp,
- int kind)
-{
- int error;
-
- error = xfs_acl_allow_set(vp, kind);
- if (!error) {
- error = xfs_attr_remove(XFS_I(vp),
- kind == _ACL_TYPE_DEFAULT?
- SGI_ACL_DEFAULT: SGI_ACL_FILE,
- ATTR_ROOT);
- if (error == ENOATTR)
- error = 0; /* 'scool */
- }
- return -error;
-}
-
-int
-xfs_acl_vset(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- posix_acl_xattr_header *ext_acl = acl;
- xfs_acl_t *xfs_acl;
- int error;
- int basicperms = 0; /* more than std unix perms? */
-
- if (!acl)
- return -EINVAL;
-
- if (!(_ACL_ALLOC(xfs_acl)))
- return -ENOMEM;
-
- error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
- if (error) {
- _ACL_FREE(xfs_acl);
- return -error;
- }
- if (!xfs_acl->acl_cnt) {
- _ACL_FREE(xfs_acl);
- return 0;
- }
-
- error = xfs_acl_allow_set(vp, kind);
-
- /* Incoming ACL exists, set file mode based on its value */
- if (!error && kind == _ACL_TYPE_ACCESS)
- error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
- if (error)
- goto out;
-
- /*
- * If we have more than std unix permissions, set up the actual attr.
- * Otherwise, delete any existing attr. This prevents us from
- * having actual attrs for permissions that can be stored in the
- * standard permission bits.
- */
- if (!basicperms) {
- xfs_acl_set_attr(vp, xfs_acl, kind, &error);
- } else {
- error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
- }
-
-out:
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_iaccess(
- xfs_inode_t *ip,
- mode_t mode,
- cred_t *cr)
-{
- xfs_acl_t *acl;
- int rval;
- struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-
- if (!(_ACL_ALLOC(acl)))
- return -1;
-
- /* If the file has no ACL return -1. */
- rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
- _ACL_FREE(acl);
- return -1;
- }
- xfs_acl_get_endian(acl);
-
- /* If the file has an empty ACL return -1. */
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
- _ACL_FREE(acl);
- return -1;
- }
-
- /* Synchronize ACL with mode bits */
- xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-
- rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
- _ACL_FREE(acl);
- return rval;
-}
-
-STATIC int
-xfs_acl_allow_set(
- struct inode *vp,
- int kind)
-{
- if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
- return EPERM;
- if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
- return ENOTDIR;
- if (vp->i_sb->s_flags & MS_RDONLY)
- return EROFS;
- if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
- return EPERM;
- return 0;
-}
-
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- * It is not used to find out the credentials uid or groups etc, as was
- * done in IRIX. It is assumed that the uid and groups for the current
- * thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
- uid_t fuid,
- gid_t fgid,
- xfs_acl_t *fap,
- mode_t md,
- cred_t *cr)
-{
- xfs_acl_entry_t matched;
- int i, allows;
- int maskallows = -1; /* true, but not 1, either */
- int seen_userobj = 0;
-
- matched.ae_tag = 0; /* Invalid type */
- matched.ae_perm = 0;
-
- for (i = 0; i < fap->acl_cnt; i++) {
- /*
- * Break out if we've got a user_obj entry or
- * a user entry and the mask (and have processed USER_OBJ)
- */
- if (matched.ae_tag == ACL_USER_OBJ)
- break;
- if (matched.ae_tag == ACL_USER) {
- if (maskallows != -1 && seen_userobj)
- break;
- if (fap->acl_entry[i].ae_tag != ACL_MASK &&
- fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
- continue;
- }
- /* True if this entry allows the requested access */
- allows = ((fap->acl_entry[i].ae_perm & md) == md);
-
- switch (fap->acl_entry[i].ae_tag) {
- case ACL_USER_OBJ:
- seen_userobj = 1;
- if (fuid != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_USER:
- if (fap->acl_entry[i].ae_id != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP_OBJ:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fgid))
- continue;
- matched.ae_tag = ACL_GROUP_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fap->acl_entry[i].ae_id))
- continue;
- matched.ae_tag = ACL_GROUP;
- matched.ae_perm = allows;
- break;
- case ACL_MASK:
- maskallows = allows;
- break;
- case ACL_OTHER:
- if (matched.ae_tag != 0)
- continue;
- matched.ae_tag = ACL_OTHER;
- matched.ae_perm = allows;
- break;
- }
- }
- /*
- * First possibility is that no matched entry allows access.
- * The capability to override DAC may exist, so check for it.
- */
- switch (matched.ae_tag) {
- case ACL_OTHER:
- case ACL_USER_OBJ:
- if (matched.ae_perm)
- return 0;
- break;
- case ACL_USER:
- case ACL_GROUP_OBJ:
- case ACL_GROUP:
- if (maskallows && matched.ae_perm)
- return 0;
- break;
- case 0:
- break;
- }
-
- /* EACCES tells generic_permission to check for capability overrides */
- return EACCES;
-}
-
-/*
- * ACL validity checker.
- * This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *entry, *e;
- int user = 0, group = 0, other = 0, mask = 0;
- int mask_required = 0;
- int i, j;
-
- if (!aclp)
- goto acl_invalid;
-
- if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
- goto acl_invalid;
-
- for (i = 0; i < aclp->acl_cnt; i++) {
- entry = &aclp->acl_entry[i];
- switch (entry->ae_tag) {
- case ACL_USER_OBJ:
- if (user++)
- goto acl_invalid;
- break;
- case ACL_GROUP_OBJ:
- if (group++)
- goto acl_invalid;
- break;
- case ACL_OTHER:
- if (other++)
- goto acl_invalid;
- break;
- case ACL_USER:
- case ACL_GROUP:
- for (j = i + 1; j < aclp->acl_cnt; j++) {
- e = &aclp->acl_entry[j];
- if (e->ae_id == entry->ae_id &&
- e->ae_tag == entry->ae_tag)
- goto acl_invalid;
- }
- mask_required++;
- break;
- case ACL_MASK:
- if (mask++)
- goto acl_invalid;
- break;
- default:
- goto acl_invalid;
- }
- }
- if (!user || !group || !other || (mask_required && !mask))
- goto acl_invalid;
- else
- return 0;
-acl_invalid:
- return EINVAL;
-}
-
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *ace, *end;
-
- INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
- INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
-}
-
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int flags,
- int *error)
-{
- int len = sizeof(xfs_acl_t);
-
- ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
- flags |= ATTR_ROOT;
- *error = xfs_attr_get(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE : SGI_ACL_DEFAULT,
- (char *)aclp, &len, flags);
- if (*error || (flags & ATTR_KERNOVAL))
- return;
- xfs_acl_get_endian(aclp);
-}
-
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int *error)
-{
- xfs_acl_entry_t *ace, *newace, *end;
- xfs_acl_t *newacl;
- int len;
-
- if (!(_ACL_ALLOC(newacl))) {
- *error = ENOMEM;
- return;
- }
-
- len = sizeof(xfs_acl_t) -
- (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
- ace < end;
- ace++, newace++) {
- INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
- INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- *error = xfs_attr_set(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE: SGI_ACL_DEFAULT,
- (char *)newacl, len, ATTR_ROOT);
- _ACL_FREE(newacl);
-}
-
-int
-xfs_acl_vtoacl(
- struct inode *vp,
- xfs_acl_t *access_acl,
- xfs_acl_t *default_acl)
-{
- int error = 0;
-
- if (access_acl) {
- /*
- * Get the Access ACL and the mode. If either cannot
- * be obtained for some reason, invalidate the access ACL.
- */
- xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
- if (error)
- access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
- }
-
- if (default_acl) {
- xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
- if (error)
- default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- }
- return error;
-}
-
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
- struct inode *vp,
- mode_t mode,
- xfs_acl_t *pdaclp)
-{
- xfs_acl_t *cacl;
- int error = 0;
- int basicperms = 0;
-
- /*
- * If the parent does not have a default ACL, or it's an
- * invalid ACL, we're done.
- */
- if (!vp)
- return 0;
- if (!pdaclp || xfs_acl_invalid(pdaclp))
- return 0;
-
- /*
- * Copy the default ACL of the containing directory to
- * the access ACL of the new file and use the mode that
- * was passed in to set up the correct initial values for
- * the u::,g::[m::], and o:: entries. This is what makes
- * umask() "work" with ACL's.
- */
-
- if (!(_ACL_ALLOC(cacl)))
- return ENOMEM;
-
- memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
- xfs_acl_filter_mode(mode, cacl);
- error = xfs_acl_setmode(vp, cacl, &basicperms);
- if (error)
- goto out_error;
-
- /*
- * Set the Default and Access ACL on the file. The mode is already
- * set on the file, so we don't need to worry about that.
- *
- * If the new file is a directory, its default ACL is a copy of
- * the containing directory's default ACL.
- */
- if (S_ISDIR(vp->i_mode))
- xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
- if (!error && !basicperms)
- xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
- _ACL_FREE(cacl);
- return error;
-}
-
-/*
- * Set up the correct mode on the file based on the supplied ACL. This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL. Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
- struct inode *vp,
- xfs_acl_t *acl,
- int *basicperms)
-{
- struct iattr iattr;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
- int i, nomask = 1;
-
- *basicperms = 1;
-
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
- return 0;
-
- /*
- * Copy the u::, g::, o::, and m:: bits from the ACL into the
- * mode. The m:: bits take precedence over the g:: bits.
- */
- iattr.ia_valid = ATTR_MODE;
- iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
- iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
- ap = acl->acl_entry;
- for (i = 0; i < acl->acl_cnt; ++i) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- iattr.ia_mode |= ap->ae_perm << 6;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK: /* more than just standard modes */
- nomask = 0;
- iattr.ia_mode |= ap->ae_perm << 3;
- *basicperms = 0;
- break;
- case ACL_OTHER:
- iattr.ia_mode |= ap->ae_perm;
- break;
- default: /* more than just standard modes */
- *basicperms = 0;
- break;
- }
- ap++;
- }
-
- /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- iattr.ia_mode |= gap->ae_perm << 3;
-
- return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change. This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks. Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- * the existing mode bits will override whatever is in the
- * ACL. Similarly, if there is a pre-existing ACL that was
- * never in sync with its mode (owing to a bug in 6.5 and
- * before), it will now magically (or mystically) be
- * synchronized. This could cause slight astonishment, but
- * it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries. These are treated as place holders when we fill
- * out the ACL. This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be set instead of the GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm = (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm = (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm = mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be merged with GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm &= (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm &= (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm &= mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db4def..63dc1f2efad 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
#ifndef __XFS_ACL_H__
#define __XFS_ACL_H__
-/*
- * Access Control Lists
- */
-typedef __uint16_t xfs_acl_perm_t;
-typedef __int32_t xfs_acl_tag_t;
-typedef __int32_t xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
-typedef struct xfs_acl_entry {
- xfs_acl_tag_t ae_tag;
- xfs_acl_id_t ae_id;
- xfs_acl_perm_t ae_perm;
-} xfs_acl_entry_t;
-
-typedef struct xfs_acl {
- __int32_t acl_cnt;
- xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl {
+ __be32 acl_cnt;
+ struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ } acl_entry[XFS_ACL_MAX_ENTRIES];
+};
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE "SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
-
#ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
-struct vattr;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name) \
- (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
-
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-
-#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default
-
-#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct xattr_handler xfs_xattr_system_handler;
#else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t) (-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v) (0)
-#define xfs_acl_vhasacl_default(v) (0)
-#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */
-#define _ACL_FREE(a) ((void)0)
-#define _ACL_INHERIT(c,m,d) (0)
-#define _ACL_GET_ACCESS(pv,pa) (0)
-#define _ACL_GET_DEFAULT(pv,pd) (0)
-#define _ACL_ACCESS_EXISTS (NULL)
-#define _ACL_DEFAULT_EXISTS (NULL)
-#endif
-
+# define xfs_check_acl NULL
+# define xfs_get_acl(inode, type) NULL
+# define xfs_inherit_acl(inode, default_acl) 0
+# define xfs_acl_chmod(inode) 0
+# define xfs_inode_init_acls(ip)
+# define xfs_inode_clear_acls(ip)
+# define posix_acl_access_exists(inode) 0
+# define posix_acl_default_exists(inode) 0
+#endif /* CONFIG_XFS_POSIX_ACL */
#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f713ca..f24b50b68d0 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
/*
* tags for inode radix tree
*/
+#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
+ in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70d136..0902249354a 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
#endif /* __KERNEL__ */
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT 0
-#endif
-
-/* generic swapping macros */
-
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-
-#define INT_SWAP(type, var) \
- ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
- ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
- ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
- (var))))
-
/*
* get and set integers from potentially unaligned locations
*/
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
((__u8*)(pointer))[1] = (((value) ) & 0xff); \
}
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
- (__builtin_constant_p(valueref) ? \
- (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
- (void)( \
- ((reference) = (valueref)), \
- ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
- ) \
- )
-
/*
* In directories inode numbers are stored as unaligned arrays of unsigned
* 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde1654b43..db15feb906f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
-#include "xfs_acl.h"
#include "xfs_rw.h"
#include "xfs_vnodeops.h"
@@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* If the inode doesn't have an attribute fork, add one.
@@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+ error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c6005a48..7928b9983c1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
XFS_TRANS_DQ_BCOUNT,
(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
return error;
xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
if (cur->bc_bufs[0] == cbp)
cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
* Adjust quota data.
*/
if (qfield)
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+ xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
/*
* Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
/*
* Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork, 1);
ip->i_d.di_nblocks = 1;
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+ xfs_trans_mod_dquot_byino(tp, ip,
XFS_TRANS_DQ_BCOUNT, 1L);
flags |= xfs_ilog_fext(whichfork);
} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
goto error0;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+ error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
* adjusted later. We return if we haven't
* allocated blocks already inside this loop.
*/
- if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
- mp, NULL, ip, (long)alen, 0,
+ error = xfs_trans_reserve_quota_nblks(
+ NULL, ip, (long)alen, 0,
rt ? XFS_QMOPT_RES_RTBLKS :
- XFS_QMOPT_RES_REGBLKS))) {
+ XFS_QMOPT_RES_REGBLKS);
+ if (error) {
if (n == 0) {
*nmap = 0;
ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
if (XFS_IS_QUOTA_ON(mp))
/* unreserve the blocks now */
(void)
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
- mp, NULL, ip,
+ xfs_trans_unreserve_quota_nblks(
+ NULL, ip,
(long)alen, 0, rt ?
XFS_QMOPT_RES_RTBLKS :
XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
(int64_t)rtexts, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
(int64_t)del.br_blockcount, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
@@ -6085,6 +6086,7 @@ xfs_getbmap(
break;
}
+ kmem_free(out);
return error;
}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d352586..5c1ade06578 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
cur->bc_private.b.allocated++;
cur->bc_private.b.ip->i_d.di_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+ xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
XFS_TRANS_DQ_BCOUNT, 1L);
new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, bp);
return 0;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f304e..edf8bdf4141 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
* waiting for the lock because someone else is waiting on the lock we
* hold and we cannot drop that as we are in a transaction here.
*
- * Lucky for us, this inversion is rarely a problem because it's a
- * directory inode that we are trying to lock here and that means the
- * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
- * used. i.e. freeze, remount-ro, quotasync or unmount.
+ * Lucky for us, this inversion is not a problem because it's a
+ * directory inode that we are trying to lock here.
*
* So, if we can't get the iolock without sleeping then just give up
*/
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fac822..c4ea51b55dc 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
* Minimum and maximum sizes need for growth checks
*/
#define XFS_MIN_AG_BLOCKS 64
-#define XFS_MIN_LOG_BLOCKS 512
-#define XFS_MAX_LOG_BLOCKS (64 * 1024)
-#define XFS_MIN_LOG_BYTES (256 * 1024)
-#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024)
+#define XFS_MIN_LOG_BLOCKS 512ULL
+#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL)
+#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL)
+
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81eedce6..76c540f719e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
ip->i_size = 0;
ip->i_new_size = 0;
+ xfs_inode_init_acls(ip);
/*
* Initialize inode's trace buffers.
@@ -500,10 +502,7 @@ xfs_ireclaim(
* ilock one but will still hold the iolock.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- /*
- * Release dquots (and their references) if any.
- */
- XFS_QM_DQDETACH(ip->i_mount, ip);
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
switch (ip->i_d.di_mode & S_IFMT) {
@@ -561,6 +560,7 @@ xfs_ireclaim(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ xfs_inode_clear_acls(ip);
kmem_zone_free(xfs_inode_zone, ip);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c8cbf..1f22d65fed0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
#include "xfs_utils.h"
#include "xfs_dir2_trace.h"
#include "xfs_quota.h"
-#include "xfs_acl.h"
#include "xfs_filestream.h"
#include "xfs_vnodeops.h"
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1bc4b9..77016702938 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
#ifndef __XFS_INODE_H__
#define __XFS_INODE_H__
+struct posix_acl;
struct xfs_dinode;
struct xfs_inode;
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
+#ifdef CONFIG_XFS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+
/* Trace buffers per inode. */
#ifdef XFS_INODE_TRACE
struct ktrace *i_trace; /* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7ec15..67ae5555a30 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -385,7 +384,7 @@ xfs_iomap_write_direct(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -444,8 +443,7 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
goto error1;
@@ -495,7 +493,7 @@ xfs_iomap_write_direct(
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +580,7 @@ xfs_iomap_write_delay(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -684,7 +682,8 @@ xfs_iomap_write_allocate(
/*
* Make sure that the dquots are there.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return XFS_ERROR(error);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba450116d4..47da2fb4537 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
error = 0;
if (buf_f->blf_flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+ if (item->ri_buf[i].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ goto next;
+ }
+ if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[i].i_len, __func__);
+ goto next;
+ }
error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
item->ri_buf[i].i_addr,
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
+ if (error)
+ goto next;
}
- if (!error)
- memcpy(xfs_buf_offset(bp,
- (uint)bit << XFS_BLI_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
- nbits<<XFS_BLI_SHIFT); /* length */
+
+ memcpy(xfs_buf_offset(bp,
+ (uint)bit << XFS_BLI_SHIFT), /* dest */
+ item->ri_buf[i].i_addr, /* source */
+ nbits<<XFS_BLI_SHIFT); /* length */
+ next:
i++;
bit += nbits;
}
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
return (0);
recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
- ASSERT(recddq);
+
+ if (item->ri_buf[1].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ return XFS_ERROR(EIO);
+ }
+ if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[1].i_len, __func__);
+ return XFS_ERROR(EIO);
+ }
+
/*
* This type of quotas was turned off, so ignore this record.
*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a99725d0c..5c6f092659c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp)
}
/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+ struct xfs_mount *mp)
+{
+ int error;
+ struct xfs_trans *tp;
+
+ mp->m_qflags = 0;
+
+ /*
+ * It is OK to look at sb_qflags here in mount path,
+ * without m_sb_lock.
+ */
+ if (mp->m_sb.sb_qflags == 0)
+ return 0;
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags = 0;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * If the fs is readonly, let the incore superblock run
+ * with quotas off but don't flush the update out to disk
+ */
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return 0;
+
+#ifdef QUOTADEBUG
+ xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+ error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "xfs_mount_reset_sbqflags: Superblock update failed!");
+ return error;
+ }
+
+ xfs_mod_sb(tp, XFS_SB_QFLAGS);
+ return xfs_trans_commit(tp, 0);
+}
+
+/*
* This function does the following on an initial mount of a file system:
* - reads the superblock from disk and init the mount struct
* - if we're a 32-bit kernel, do a size check on the superblock
@@ -976,7 +1023,8 @@ xfs_mountfs(
xfs_sb_t *sbp = &(mp->m_sb);
xfs_inode_t *rip;
__uint64_t resblks;
- uint quotamount, quotaflags;
+ uint quotamount = 0;
+ uint quotaflags = 0;
int error = 0;
xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
/*
* Initialise the XFS quota management subsystem for this mount
*/
- error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
- if (error)
- goto out_rtunmount;
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
+ if (error)
+ goto out_rtunmount;
+ } else {
+ ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+ /*
+ * If a file system had quotas running earlier, but decided to
+ * mount without -o uquota/pquota/gquota options, revoke the
+ * quotachecked license.
+ */
+ if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+ cmn_err(CE_NOTE,
+ "XFS: resetting qflags for filesystem %s",
+ mp->m_fsname);
+
+ error = xfs_mount_reset_sbqflags(mp);
+ if (error)
+ return error;
+ }
+ }
/*
* Finish recovering the file system. This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
/*
* Complete the quota initialisation, post-log-replay component.
*/
- error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
- if (error)
- goto out_rtunmount;
+ if (quotamount) {
+ ASSERT(mp->m_qflags == 0);
+ mp->m_qflags = quotaflags;
+
+ xfs_qm_mount_quotas(mp);
+ }
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+ else
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
/*
* Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
__uint64_t resblks;
int error;
- /*
- * Release dquot that rootinode, rbmino and rsumino might be holding,
- * and release the quota inodes.
- */
- XFS_QM_UNMOUNT(mp);
-
+ xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
IRELE(mp->m_rootip);
@@ -1299,12 +1371,9 @@ xfs_unmountfs(
* need to force the log first.
*/
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
-
- XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
- if (mp->m_quotainfo)
- XFS_QM_DONE(mp);
+ xfs_qm_unmount(mp);
/*
* Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a64392f98..a5122382afd 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
+struct xfs_quotainfo;
+
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-
-typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void (*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void (*xfs_qmdone_t)(struct xfs_mount *);
-typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void (*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *,
- struct xfs_inode *, uid_t, gid_t, prid_t, uint,
- struct xfs_dquot **, struct xfs_dquot **);
-typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *);
-typedef int (*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
- struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot **, struct xfs_dquot *);
-typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
-
-typedef struct xfs_qmops {
- xfs_qminit_t xfs_qminit;
- xfs_qmdone_t xfs_qmdone;
- xfs_qmmount_t xfs_qmmount;
- xfs_qmunmount_t xfs_qmunmount;
- xfs_dqrele_t xfs_dqrele;
- xfs_dqattach_t xfs_dqattach;
- xfs_dqdetach_t xfs_dqdetach;
- xfs_dqpurgeall_t xfs_dqpurgeall;
- xfs_dqvopalloc_t xfs_dqvopalloc;
- xfs_dqvopcreate_t xfs_dqvopcreate;
- xfs_dqvoprename_t xfs_dqvoprename;
- xfs_dqvopchown_t xfs_dqvopchown;
- xfs_dqvopchownresv_t xfs_dqvopchownresv;
- xfs_dqstatvfs_t xfs_dqstatvfs;
- xfs_dqsync_t xfs_dqsync;
- struct xfs_dqtrxops *xfs_dqtrxops;
-} xfs_qmops_t;
-
-#define XFS_QM_INIT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
- (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
- (*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
- (*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
- (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
- (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
- (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
- (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
- (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
- (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-
#ifdef HAVE_PERCPU_SB
/*
@@ -510,8 +432,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
-extern int xfs_qmops_get(struct xfs_mount *);
-extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790ea8e..00000000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-
-
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- struct xfs_dquot **dqp,
- struct xfs_dquot *dq)
-{
- return NULL;
-}
-
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
- int error;
- xfs_trans_t *tp;
-
- mp->m_qflags = 0;
- /*
- * It is OK to look at sb_qflags here in mount path,
- * without m_sb_lock.
- */
- if (mp->m_sb.sb_qflags == 0)
- return 0;
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = 0;
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * if the fs is readonly, let the incore superblock run
- * with quotas off but don't flush the update out to disk
- */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
-#ifdef QUOTADEBUG
- xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- xfs_fs_cmn_err(CE_ALERT, mp,
- "xfs_mount_reset_sbqflags: Superblock update failed!");
- return error;
- }
- xfs_mod_sb(tp, XFS_SB_QFLAGS);
- error = xfs_trans_commit(tp, 0);
- return error;
-}
-
-STATIC int
-xfs_noquota_init(
- xfs_mount_t *mp,
- uint *needquotamount,
- uint *quotaflags)
-{
- int error = 0;
-
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
- ASSERT(!XFS_IS_QUOTA_ON(mp));
-
- /*
- * If a file system had quotas running earlier, but decided to
- * mount without -o uquota/pquota/gquota options, revoke the
- * quotachecked license.
- */
- if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
- cmn_err(CE_NOTE,
- "XFS resetting qflags for filesystem %s",
- mp->m_fsname);
-
- error = xfs_mount_reset_sbqflags(mp);
- }
- return error;
-}
-
-static struct xfs_qmops xfs_qmcore_stub = {
- .xfs_qminit = (xfs_qminit_t) xfs_noquota_init,
- .xfs_qmdone = (xfs_qmdone_t) fs_noerr,
- .xfs_qmmount = (xfs_qmmount_t) fs_noerr,
- .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr,
- .xfs_dqrele = (xfs_dqrele_t) fs_noerr,
- .xfs_dqattach = (xfs_dqattach_t) fs_noerr,
- .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr,
- .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr,
- .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr,
- .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr,
- .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr,
- .xfs_dqvopchown = xfs_dqvopchown_default,
- .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr,
- .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval,
- .xfs_dqsync = (xfs_dqsync_t) fs_noerr,
-};
-
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
- if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
- mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
- cmn_err(CE_WARN,
- "XFS: qouta support not available in this kernel.");
- return EINVAL;
-#endif
- } else {
- mp->m_qm_ops = &xfs_qmcore_stub;
- }
-
- return 0;
-}
-
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202dde2..3ec91ac74c2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,79 @@ typedef struct xfs_dqtrx {
long qt_delrtb_delta; /* delayed RT blk count changes */
} xfs_dqtrx_t;
-/*
- * Dquot transaction functions, used if quota is enabled.
- */
-typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
-typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *,
- struct xfs_inode *, uint, long);
-typedef void (*qo_free_dqinfo_t)(struct xfs_trans *);
-typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *);
-typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
-typedef int (*qo_reserve_quota_nblks_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_inode *, long, long, uint);
-typedef int (*qo_reserve_quota_bydquots_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_dquot *, struct xfs_dquot *,
- long, long, uint);
-typedef struct xfs_dqtrxops {
- qo_dup_dqinfo_t qo_dup_dqinfo;
- qo_free_dqinfo_t qo_free_dqinfo;
- qo_mod_dquot_byino_t qo_mod_dquot_byino;
- qo_apply_dquot_deltas_t qo_apply_dquot_deltas;
- qo_reserve_quota_nblks_t qo_reserve_quota_nblks;
- qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots;
- qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots;
-} xfs_dqtrxops_t;
-
-#define XFS_DQTRXOP(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
-
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
-
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
- XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
- XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
-
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
- f | XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
+ uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+ struct xfs_inode *, long, long, uint);
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+ struct xfs_mount *, struct xfs_dquot *,
+ struct xfs_dquot *, long, long, uint);
+
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+ struct xfs_dquot **, struct xfs_dquot **);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *);
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
+ struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
+extern int xfs_qm_sync(struct xfs_mount *, int);
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
+extern void xfs_qm_unmount(struct xfs_mount *);
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
+
+#else
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+ uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+ *udqp = NULL;
+ *gdqp = NULL;
+ return 0;
+}
+#define xfs_trans_dup_dqinfo(tp, tp2)
+#define xfs_trans_free_dqinfo(tp)
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
+#define xfs_trans_apply_dquot_deltas(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0)
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
+#define xfs_qm_vop_rename_dqattach(it) (0)
+#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0)
+#define xfs_qm_dqattach(ip, fl) (0)
+#define xfs_qm_dqattach_locked(ip, fl) (0)
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl) (0)
+#define xfs_qm_newmount(mp, a, b) (0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp) (0)
+#endif /* CONFIG_XFS_QUOTA */
+
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+ xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+ xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
f | XFS_QMOPT_RES_REGBLKS)
extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
-extern struct xfs_qmops xfs_qmcore_xfs;
-
#endif /* __KERNEL__ */
-
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9cd11..b81deea0ce1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
/*
* Attach the dquots to the inodes
*/
- if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+ error = xfs_qm_vop_rename_dqattach(inodes);
+ if (error) {
xfs_trans_cancel(tp, cancel_flags);
goto std_return;
}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21c54d..fea68615ed2 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
#include "xfs_ialloc.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
-#include "xfs_acl.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bcc39d358ad..66b849358e6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
- XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+ xfs_trans_dup_dqinfo(tp, ntp);
atomic_inc(&tp->t_mountp->m_active_trans);
return ntp;
@@ -829,7 +829,7 @@ shut_us_down:
* means is that we have some (non-persistent) quota
* reservations that need to be unreserved.
*/
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket,
NULL, log_flags);
@@ -848,10 +848,9 @@ shut_us_down:
/*
* If we need to update the superblock, then do it now.
*/
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+ if (tp->t_flags & XFS_TRANS_SB_DIRTY)
xfs_trans_apply_sb_deltas(tp);
- }
- XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
+ xfs_trans_apply_dquot_deltas(tp);
/*
* Ask each log item how many log_vector entries it will
@@ -1056,7 +1055,7 @@ xfs_trans_uncommit(
}
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
xfs_trans_free_items(tp, flags);
xfs_trans_free_busy(tp);
@@ -1181,7 +1180,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1211,7 +1210,7 @@ xfs_trans_free(
xfs_trans_t *tp)
{
atomic_dec(&tp->t_mountp->m_active_trans);
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp);
}
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5ea535..4d88616bde9 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
xfs_buf_relse(ialloc_context);
if (dqinfo) {
tp->t_dqinfo = dqinfo;
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
}
*tpp = ntp;
*ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a9c76..c4eca5ed5da 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
+#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_rw.h"
#include "xfs_error.h"
@@ -118,7 +119,7 @@ xfs_setattr(
*/
ASSERT(udqp == NULL);
ASSERT(gdqp == NULL);
- code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+ code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
qflags, &udqp, &gdqp);
if (code)
return code;
@@ -180,10 +181,11 @@ xfs_setattr(
* Do a quota reservation only if uid/gid is actually
* going to change.
*/
- if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -217,7 +219,7 @@ xfs_setattr(
/*
* Make sure that the dquots are attached to the inode.
*/
- code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ code = xfs_qm_dqattach_locked(ip, 0);
if (code)
goto error_return;
@@ -351,21 +353,21 @@ xfs_setattr(
* in the transaction.
*/
if (iuid != uid) {
- if (XFS_IS_UQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(mask & ATTR_UID);
ASSERT(udqp);
- olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
&ip->i_udquot, udqp);
}
ip->i_d.di_uid = uid;
inode->i_uid = uid;
}
if (igid != gid) {
- if (XFS_IS_GQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(!XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
- olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_gid = gid;
@@ -461,13 +463,25 @@ xfs_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot1);
- XFS_QM_DQRELE(mp, olddquot2);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot1);
+ xfs_qm_dqrele(olddquot2);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
- if (code) {
+ if (code)
return code;
+
+ /*
+ * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+ * update. We could avoid this with linked transactions
+ * and passing down the transaction pointer all the way
+ * to attr_set. No previous user of the generic
+ * Posix ACL code seems to care about this issue either.
+ */
+ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+ code = -xfs_acl_chmod(inode);
+ if (code)
+ return XFS_ERROR(code);
}
if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
@@ -482,8 +496,8 @@ xfs_setattr(
commit_flags |= XFS_TRANS_ABORT;
/* FALLTHROUGH */
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (tp) {
xfs_trans_cancel(tp, commit_flags);
}
@@ -739,7 +753,8 @@ xfs_free_eofblocks(
/*
* Attach the dquots to the inode up front.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
/*
@@ -1181,7 +1196,8 @@ xfs_inactive(
ASSERT(ip->i_d.di_nlink == 0);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return VN_INACTIVE_CACHE;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1323,7 @@ xfs_inactive(
/*
* Credit the quota account(s). The inode is gone.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
/*
* Just ignore errors at this point. There is nothing we can
@@ -1323,11 +1339,11 @@ xfs_inactive(
xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
"xfs_trans_commit() returned error %d", error);
}
+
/*
* Release the dquots held by inode, if any.
*/
- XFS_QM_DQDETACH(mp, ip);
-
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
out:
@@ -1427,8 +1443,7 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -1489,7 +1504,7 @@ xfs_create(
/*
* Reserve disk quota and the inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto out_trans_cancel;
@@ -1561,7 +1576,7 @@ xfs_create(
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
/*
* xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1595,8 @@ xfs_create(
goto out_dqrele;
}
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
*ipp = ip;
@@ -1602,8 +1617,8 @@ xfs_create(
out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
out_dqrele:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1852,11 @@ xfs_remove(
return error;
}
- error = XFS_QM_DQATTACH(mp, dp, 0);
+ error = xfs_qm_dqattach(dp, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, ip, 0);
+ error = xfs_qm_dqattach(ip, 0);
if (error)
goto std_return;
@@ -2028,11 +2043,11 @@ xfs_link(
/* Return through std_return after this point. */
- error = XFS_QM_DQATTACH(mp, sip, 0);
+ error = xfs_qm_dqattach(sip, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, tdp, 0);
+ error = xfs_qm_dqattach(tdp, 0);
if (error)
goto std_return;
@@ -2205,8 +2220,7 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -2248,7 +2262,7 @@ xfs_symlink(
/*
* Reserve disk quota : blocks and inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto error_return;
@@ -2288,7 +2302,7 @@ xfs_symlink(
/*
* Also attach the dquot(s) to it, if applicable.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
if (resblks)
resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2390,8 @@ xfs_symlink(
goto error2;
}
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
/* Fall through to std_return with error = 0 or errno from
* xfs_trans_commit */
@@ -2401,8 +2415,8 @@ std_return:
cancel_flags |= XFS_TRANS_ABORT;
error_return:
xfs_trans_cancel(tp, cancel_flags);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2555,8 @@ xfs_alloc_file_space(
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
if (len <= 0)
@@ -2628,8 +2643,8 @@ retry:
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+ 0, quota_flag);
if (error)
goto error1;
@@ -2688,7 +2703,7 @@ dmapi_enospc_check:
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2842,8 @@ xfs_free_file_space(
xfs_itrace_entry(ip);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
error = 0;
@@ -2953,9 +2969,9 @@ xfs_free_file_space(
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
- ip->i_udquot, ip->i_gdquot, resblks, 0,
- XFS_QMOPT_RES_REGBLKS);
+ error = xfs_trans_reserve_quota(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ resblks, 0, XFS_QMOPT_RES_REGBLKS);
if (error)
goto error1;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6c61f..a9e102de71a 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);
diff --git a/kernel/timer.c b/kernel/timer.c
index c01e568935e..faf2db897de 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -757,6 +757,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
wake_up_idle_cpu(cpu);
spin_unlock_irqrestore(&base->lock, flags);
}
+EXPORT_SYMBOL_GPL(add_timer_on);
/**
* del_timer - deactive a timer.