summaryrefslogtreecommitdiff
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/linux-2.6
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/kmem.c134
-rw-r--r--fs/xfs/linux-2.6/kmem.h157
-rw-r--r--fs/xfs/linux-2.6/mrlock.h106
-rw-r--r--fs/xfs/linux-2.6/mutex.h53
-rw-r--r--fs/xfs/linux-2.6/sema.h67
-rw-r--r--fs/xfs/linux-2.6/spin.h56
-rw-r--r--fs/xfs/linux-2.6/sv.h89
-rw-r--r--fs/xfs/linux-2.6/time.h51
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1275
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1980
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h591
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c205
-rw-r--r--fs/xfs/linux-2.6/xfs_export.h122
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c573
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c124
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h48
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c74
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h44
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c1336
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c163
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h34
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c680
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h51
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h374
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c1082
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h116
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c132
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h166
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c912
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h138
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c174
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h114
-rw-r--r--fs/xfs/linux-2.6/xfs_version.h44
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.c330
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h223
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c455
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h666
38 files changed, 12989 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
new file mode 100644
index 00000000000..364ea8c386b
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+
+#include "time.h"
+#include "kmem.h"
+
+#define MAX_VMALLOCS 6
+#define MAX_SLAB_SIZE 0x20000
+
+
+void *
+kmem_alloc(size_t size, int flags)
+{
+ int retries = 0;
+ int lflags = kmem_flags_convert(flags);
+ void *ptr;
+
+ do {
+ if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
+ ptr = kmalloc(size, lflags);
+ else
+ ptr = __vmalloc(size, lflags, PAGE_KERNEL);
+ if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ return ptr;
+ if (!(++retries % 100))
+ printk(KERN_ERR "XFS: possible memory allocation "
+ "deadlock in %s (mode:0x%x)\n",
+ __FUNCTION__, lflags);
+ blk_congestion_wait(WRITE, HZ/50);
+ } while (1);
+}
+
+void *
+kmem_zalloc(size_t size, int flags)
+{
+ void *ptr;
+
+ ptr = kmem_alloc(size, flags);
+ if (ptr)
+ memset((char *)ptr, 0, (int)size);
+ return ptr;
+}
+
+void
+kmem_free(void *ptr, size_t size)
+{
+ if (((unsigned long)ptr < VMALLOC_START) ||
+ ((unsigned long)ptr >= VMALLOC_END)) {
+ kfree(ptr);
+ } else {
+ vfree(ptr);
+ }
+}
+
+void *
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+{
+ void *new;
+
+ new = kmem_alloc(newsize, flags);
+ if (ptr) {
+ if (new)
+ memcpy(new, ptr,
+ ((oldsize < newsize) ? oldsize : newsize));
+ kmem_free(ptr, oldsize);
+ }
+ return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, int flags)
+{
+ int retries = 0;
+ int lflags = kmem_flags_convert(flags);
+ void *ptr;
+
+ do {
+ ptr = kmem_cache_alloc(zone, lflags);
+ if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ return ptr;
+ if (!(++retries % 100))
+ printk(KERN_ERR "XFS: possible memory allocation "
+ "deadlock in %s (mode:0x%x)\n",
+ __FUNCTION__, lflags);
+ blk_congestion_wait(WRITE, HZ/50);
+ } while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+{
+ void *ptr;
+
+ ptr = kmem_zone_alloc(zone, flags);
+ if (ptr)
+ memset((char *)ptr, 0, kmem_cache_size(zone));
+ return ptr;
+}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
new file mode 100644
index 00000000000..1397b669b05
--- /dev/null
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * memory management routines
+ */
+#define KM_SLEEP 0x0001
+#define KM_NOSLEEP 0x0002
+#define KM_NOFS 0x0004
+#define KM_MAYFAIL 0x0008
+
+#define kmem_zone kmem_cache_s
+#define kmem_zone_t kmem_cache_t
+
+typedef unsigned long xfs_pflags_t;
+
+#define PFLAGS_TEST_NOIO() (current->flags & PF_NOIO)
+#define PFLAGS_TEST_FSTRANS() (current->flags & PF_FSTRANS)
+
+#define PFLAGS_SET_NOIO() do { \
+ current->flags |= PF_NOIO; \
+} while (0)
+
+#define PFLAGS_CLEAR_NOIO() do { \
+ current->flags &= ~PF_NOIO; \
+} while (0)
+
+/* these could be nested, so we save state */
+#define PFLAGS_SET_FSTRANS(STATEP) do { \
+ *(STATEP) = current->flags; \
+ current->flags |= PF_FSTRANS; \
+} while (0)
+
+#define PFLAGS_CLEAR_FSTRANS(STATEP) do { \
+ *(STATEP) = current->flags; \
+ current->flags &= ~PF_FSTRANS; \
+} while (0)
+
+/* Restore the PF_FSTRANS state to what was saved in STATEP */
+#define PFLAGS_RESTORE_FSTRANS(STATEP) do { \
+ current->flags = ((current->flags & ~PF_FSTRANS) | \
+ (*(STATEP) & PF_FSTRANS)); \
+} while (0)
+
+#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
+ *(NSTATEP) = *(OSTATEP); \
+} while (0)
+
+static __inline unsigned int kmem_flags_convert(int flags)
+{
+ int lflags = __GFP_NOWARN; /* we'll report problems, if need be */
+
+#ifdef DEBUG
+ if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
+ printk(KERN_WARNING
+ "XFS: memory allocation with wrong flags (%x)\n", flags);
+ BUG();
+ }
+#endif
+
+ if (flags & KM_NOSLEEP) {
+ lflags |= GFP_ATOMIC;
+ } else {
+ lflags |= GFP_KERNEL;
+
+ /* avoid recusive callbacks to filesystem during transactions */
+ if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+ lflags &= ~__GFP_FS;
+ }
+
+ return lflags;
+}
+
+static __inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+ return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
+}
+
+static __inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+ kmem_cache_free(zone, ptr);
+}
+
+static __inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+ if (zone && kmem_cache_destroy(zone))
+ BUG();
+}
+
+extern void *kmem_zone_zalloc(kmem_zone_t *, int);
+extern void *kmem_zone_alloc(kmem_zone_t *, int);
+
+extern void *kmem_alloc(size_t, int);
+extern void *kmem_realloc(void *, size_t, size_t, int);
+extern void *kmem_zalloc(size_t, int);
+extern void kmem_free(void *, size_t);
+
+typedef struct shrinker *kmem_shaker_t;
+typedef int (*kmem_shake_func_t)(int, unsigned int);
+
+static __inline kmem_shaker_t
+kmem_shake_register(kmem_shake_func_t sfunc)
+{
+ return set_shrinker(DEFAULT_SEEKS, sfunc);
+}
+
+static __inline void
+kmem_shake_deregister(kmem_shaker_t shrinker)
+{
+ remove_shrinker(shrinker);
+}
+
+static __inline int
+kmem_shake_allow(unsigned int gfp_mask)
+{
+ return (gfp_mask & __GFP_WAIT);
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
new file mode 100644
index 00000000000..d2c11a098ff
--- /dev/null
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+enum { MR_NONE, MR_ACCESS, MR_UPDATE };
+
+typedef struct {
+ struct rw_semaphore mr_lock;
+ int mr_writer;
+} mrlock_t;
+
+#define mrinit(mrp, name) \
+ ( (mrp)->mr_writer = 0, init_rwsem(&(mrp)->mr_lock) )
+#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
+#define mrfree(mrp) do { } while (0)
+#define mraccess(mrp) mraccessf(mrp, 0)
+#define mrupdate(mrp) mrupdatef(mrp, 0)
+
+static inline void mraccessf(mrlock_t *mrp, int flags)
+{
+ down_read(&mrp->mr_lock);
+}
+
+static inline void mrupdatef(mrlock_t *mrp, int flags)
+{
+ down_write(&mrp->mr_lock);
+ mrp->mr_writer = 1;
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+ return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+ if (!down_write_trylock(&mrp->mr_lock))
+ return 0;
+ mrp->mr_writer = 1;
+ return 1;
+}
+
+static inline void mrunlock(mrlock_t *mrp)
+{
+ if (mrp->mr_writer) {
+ mrp->mr_writer = 0;
+ up_write(&mrp->mr_lock);
+ } else {
+ up_read(&mrp->mr_lock);
+ }
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+ mrp->mr_writer = 0;
+ downgrade_write(&mrp->mr_lock);
+}
+
+#ifdef DEBUG
+/*
+ * Debug-only routine, without some platform-specific asm code, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ * Note: means !ismrlocked would give false positivies, so don't do that.
+ */
+static inline int ismrlocked(mrlock_t *mrp, int type)
+{
+ if (mrp && type == MR_UPDATE)
+ return mrp->mr_writer;
+ return 1;
+}
+#endif
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/mutex.h b/fs/xfs/linux-2.6/mutex.h
new file mode 100644
index 00000000000..0b296bb944c
--- /dev/null
+++ b/fs/xfs/linux-2.6/mutex.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MUTEX_H__
+#define __XFS_SUPPORT_MUTEX_H__
+
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+
+/*
+ * Map the mutex'es from IRIX to Linux semaphores.
+ *
+ * Destroy just simply initializes to -99 which should block all other
+ * callers.
+ */
+#define MUTEX_DEFAULT 0x0
+typedef struct semaphore mutex_t;
+
+#define mutex_init(lock, type, name) sema_init(lock, 1)
+#define mutex_destroy(lock) sema_init(lock, -99)
+#define mutex_lock(lock, num) down(lock)
+#define mutex_trylock(lock) (down_trylock(lock) ? 0 : 1)
+#define mutex_unlock(lock) up(lock)
+
+#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
new file mode 100644
index 00000000000..30b67b4e1cb
--- /dev/null
+++ b/fs/xfs/linux-2.6/sema.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SEMA_H__
+#define __XFS_SUPPORT_SEMA_H__
+
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/*
+ * sema_t structure just maps to struct semaphore in Linux kernel.
+ */
+
+typedef struct semaphore sema_t;
+
+#define init_sema(sp, val, c, d) sema_init(sp, val)
+#define initsema(sp, val) sema_init(sp, val)
+#define initnsema(sp, val, name) sema_init(sp, val)
+#define psema(sp, b) down(sp)
+#define vsema(sp) up(sp)
+#define valusema(sp) (atomic_read(&(sp)->count))
+#define freesema(sema)
+
+/*
+ * Map cpsema (try to get the sema) to down_trylock. We need to switch
+ * the return values since cpsema returns 1 (acquired) 0 (failed) and
+ * down_trylock returns the reverse 0 (acquired) 1 (failed).
+ */
+
+#define cpsema(sp) (down_trylock(sp) ? 0 : 1)
+
+/*
+ * Didn't do cvsema(sp). Not sure how to map this to up/down/...
+ * It does a vsema if the values is < 0 other wise nothing.
+ */
+
+#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/spin.h b/fs/xfs/linux-2.6/spin.h
new file mode 100644
index 00000000000..bcf60a0b8df
--- /dev/null
+++ b/fs/xfs/linux-2.6/spin.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SPIN_H__
+#define __XFS_SUPPORT_SPIN_H__
+
+#include <linux/sched.h> /* preempt needs this */
+#include <linux/spinlock.h>
+
+/*
+ * Map lock_t from IRIX to Linux spinlocks.
+ *
+ * We do not make use of lock_t from interrupt context, so we do not
+ * have to worry about disabling interrupts at all (unlike IRIX).
+ */
+
+typedef spinlock_t lock_t;
+
+#define SPLDECL(s) unsigned long s
+
+#define spinlock_init(lock, name) spin_lock_init(lock)
+#define spinlock_destroy(lock)
+#define mutex_spinlock(lock) ({ spin_lock(lock); 0; })
+#define mutex_spinunlock(lock, s) do { spin_unlock(lock); (void)s; } while (0)
+#define nested_spinlock(lock) spin_lock(lock)
+#define nested_spinunlock(lock) spin_unlock(lock)
+
+#endif /* __XFS_SUPPORT_SPIN_H__ */
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
new file mode 100644
index 00000000000..821d3167e05
--- /dev/null
+++ b/fs/xfs/linux-2.6/sv.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SV_H__
+#define __XFS_SUPPORT_SV_H__
+
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+/*
+ * Synchronisation variables.
+ *
+ * (Parameters "pri", "svf" and "rts" are not implemented)
+ */
+
+typedef struct sv_s {
+ wait_queue_head_t waiters;
+} sv_t;
+
+#define SV_FIFO 0x0 /* sv_t is FIFO type */
+#define SV_LIFO 0x2 /* sv_t is LIFO type */
+#define SV_PRIO 0x4 /* sv_t is PRIO type */
+#define SV_KEYED 0x6 /* sv_t is KEYED type */
+#define SV_DEFAULT SV_FIFO
+
+
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
+ unsigned long timeout)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&sv->waiters, &wait);
+ __set_current_state(state);
+ spin_unlock(lock);
+
+ schedule_timeout(timeout);
+
+ remove_wait_queue(&sv->waiters, &wait);
+}
+
+#define init_sv(sv,type,name,flag) \
+ init_waitqueue_head(&(sv)->waiters)
+#define sv_init(sv,flag,name) \
+ init_waitqueue_head(&(sv)->waiters)
+#define sv_destroy(sv) \
+ /*NOTHING*/
+#define sv_wait(sv, pri, lock, s) \
+ _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_wait_sig(sv, pri, lock, s) \
+ _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
+ _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
+ _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_signal(sv) \
+ wake_up(&(sv)->waiters)
+#define sv_broadcast(sv) \
+ wake_up_all(&(sv)->waiters)
+
+#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
new file mode 100644
index 00000000000..6c6fd0faa8e
--- /dev/null
+++ b/fs/xfs/linux-2.6/time.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+ *tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
new file mode 100644
index 00000000000..76a84758073
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -0,0 +1,1275 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+
+STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
+STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *,
+ struct writeback_control *wbc, void *, int, int);
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_page_trace(
+ int tag,
+ struct inode *inode,
+ struct page *page,
+ int mask)
+{
+ xfs_inode_t *ip;
+ bhv_desc_t *bdp;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ loff_t isize = i_size_read(inode);
+ loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ int delalloc = -1, unmapped = -1, unwritten = -1;
+
+ if (page_has_buffers(page))
+ xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+
+ bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+ ip = XFS_BHVTOI(bdp);
+ if (!ip->i_rwtrace)
+ return;
+
+ ktrace_enter(ip->i_rwtrace,
+ (void *)((unsigned long)tag),
+ (void *)ip,
+ (void *)inode,
+ (void *)page,
+ (void *)((unsigned long)mask),
+ (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+ (void *)((unsigned long)((isize >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(isize & 0xffffffff)),
+ (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(offset & 0xffffffff)),
+ (void *)((unsigned long)delalloc),
+ (void *)((unsigned long)unmapped),
+ (void *)((unsigned long)unwritten),
+ (void *)NULL,
+ (void *)NULL);
+}
+#else
+#define xfs_page_trace(tag, inode, page, mask)
+#endif
+
+void
+linvfs_unwritten_done(
+ struct buffer_head *bh,
+ int uptodate)
+{
+ xfs_buf_t *pb = (xfs_buf_t *)bh->b_private;
+
+ ASSERT(buffer_unwritten(bh));
+ bh->b_end_io = NULL;
+ clear_buffer_unwritten(bh);
+ if (!uptodate)
+ pagebuf_ioerror(pb, EIO);
+ if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+ pagebuf_iodone(pb, 1, 1);
+ }
+ end_buffer_async_write(bh, uptodate);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (buffered IO).
+ */
+STATIC void
+linvfs_unwritten_convert(
+ xfs_buf_t *bp)
+{
+ vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
+ int error;
+
+ BUG_ON(atomic_read(&bp->pb_hold) < 1);
+ VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
+ BMAPI_UNWRITTEN, NULL, NULL, error);
+ XFS_BUF_SET_FSPRIVATE(bp, NULL);
+ XFS_BUF_CLR_IODONE_FUNC(bp);
+ XFS_BUF_UNDATAIO(bp);
+ iput(LINVFS_GET_IP(vp));
+ pagebuf_iodone(bp, 0, 0);
+}
+
+/*
+ * Issue transactions to convert a buffer range from unwritten
+ * to written extents (direct IO).
+ */
+STATIC void
+linvfs_unwritten_convert_direct(
+ struct inode *inode,
+ loff_t offset,
+ ssize_t size,
+ void *private)
+{
+ ASSERT(!private || inode == (struct inode *)private);
+
+ /* private indicates an unwritten extent lay beneath this IO */
+ if (private && size > 0) {
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+
+ VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+ }
+}
+
+STATIC int
+xfs_map_blocks(
+ struct inode *inode,
+ loff_t offset,
+ ssize_t count,
+ xfs_iomap_t *mapp,
+ int flags)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error, nmaps = 1;
+
+ VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error);
+ if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
+ VMODIFY(vp);
+ return -error;
+}
+
+/*
+ * Finds the corresponding mapping in block @map array of the
+ * given @offset within a @page.
+ */
+STATIC xfs_iomap_t *
+xfs_offset_to_map(
+ struct page *page,
+ xfs_iomap_t *iomapp,
+ unsigned long offset)
+{
+ loff_t full_offset; /* offset from start of file */
+
+ ASSERT(offset < PAGE_CACHE_SIZE);
+
+ full_offset = page->index; /* NB: using 64bit number */
+ full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */
+ full_offset += offset; /* offset from page start */
+
+ if (full_offset < iomapp->iomap_offset)
+ return NULL;
+ if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
+ return iomapp;
+ return NULL;
+}
+
+STATIC void
+xfs_map_at_offset(
+ struct page *page,
+ struct buffer_head *bh,
+ unsigned long offset,
+ int block_bits,
+ xfs_iomap_t *iomapp)
+{
+ xfs_daddr_t bn;
+ loff_t delta;
+ int sector_shift;
+
+ ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
+ ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+ ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
+
+ delta = page->index;
+ delta <<= PAGE_CACHE_SHIFT;
+ delta += offset;
+ delta -= iomapp->iomap_offset;
+ delta >>= block_bits;
+
+ sector_shift = block_bits - BBSHIFT;
+ bn = iomapp->iomap_bn >> sector_shift;
+ bn += delta;
+ BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+ ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
+
+ lock_buffer(bh);
+ bh->b_blocknr = bn;
+ bh->b_bdev = iomapp->iomap_target->pbr_bdev;
+ set_buffer_mapped(bh);
+ clear_buffer_delay(bh);
+}
+
+/*
+ * Look for a page at index which is unlocked and contains our
+ * unwritten extent flagged buffers at its head. Returns page
+ * locked and with an extra reference count, and length of the
+ * unwritten extent component on this page that we can write,
+ * in units of filesystem blocks.
+ */
+STATIC struct page *
+xfs_probe_unwritten_page(
+ struct address_space *mapping,
+ pgoff_t index,
+ xfs_iomap_t *iomapp,
+ xfs_buf_t *pb,
+ unsigned long max_offset,
+ unsigned long *fsbs,
+ unsigned int bbits)
+{
+ struct page *page;
+
+ page = find_trylock_page(mapping, index);
+ if (!page)
+ return NULL;
+ if (PageWriteback(page))
+ goto out;
+
+ if (page->mapping && page_has_buffers(page)) {
+ struct buffer_head *bh, *head;
+ unsigned long p_offset = 0;
+
+ *fsbs = 0;
+ bh = head = page_buffers(page);
+ do {
+ if (!buffer_unwritten(bh) || !buffer_uptodate(bh))
+ break;
+ if (!xfs_offset_to_map(page, iomapp, p_offset))
+ break;
+ if (p_offset >= max_offset)
+ break;
+ xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
+ set_buffer_unwritten_io(bh);
+ bh->b_private = pb;
+ p_offset += bh->b_size;
+ (*fsbs)++;
+ } while ((bh = bh->b_this_page) != head);
+
+ if (p_offset)
+ return page;
+ }
+
+out:
+ unlock_page(page);
+ return NULL;
+}
+
+/*
+ * Look for a page at index which is unlocked and not mapped
+ * yet - clustering for mmap write case.
+ */
+STATIC unsigned int
+xfs_probe_unmapped_page(
+ struct address_space *mapping,
+ pgoff_t index,
+ unsigned int pg_offset)
+{
+ struct page *page;
+ int ret = 0;
+
+ page = find_trylock_page(mapping, index);
+ if (!page)
+ return 0;
+ if (PageWriteback(page))
+ goto out;
+
+ if (page->mapping && PageDirty(page)) {
+ if (page_has_buffers(page)) {
+ struct buffer_head *bh, *head;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_mapped(bh) || !buffer_uptodate(bh))
+ break;
+ ret += bh->b_size;
+ if (ret >= pg_offset)
+ break;
+ } while ((bh = bh->b_this_page) != head);
+ } else
+ ret = PAGE_CACHE_SIZE;
+ }
+
+out:
+ unlock_page(page);
+ return ret;
+}
+
+STATIC unsigned int
+xfs_probe_unmapped_cluster(
+ struct inode *inode,
+ struct page *startpage,
+ struct buffer_head *bh,
+ struct buffer_head *head)
+{
+ pgoff_t tindex, tlast, tloff;
+ unsigned int pg_offset, len, total = 0;
+ struct address_space *mapping = inode->i_mapping;
+
+ /* First sum forwards in this page */
+ do {
+ if (buffer_mapped(bh))
+ break;
+ total += bh->b_size;
+ } while ((bh = bh->b_this_page) != head);
+
+ /* If we reached the end of the page, sum forwards in
+ * following pages.
+ */
+ if (bh == head) {
+ tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+ /* Prune this back to avoid pathological behavior */
+ tloff = min(tlast, startpage->index + 64);
+ for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
+ len = xfs_probe_unmapped_page(mapping, tindex,
+ PAGE_CACHE_SIZE);
+ if (!len)
+ return total;
+ total += len;
+ }
+ if (tindex == tlast &&
+ (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+ total += xfs_probe_unmapped_page(mapping,
+ tindex, pg_offset);
+ }
+ }
+ return total;
+}
+
+/*
+ * Probe for a given page (index) in the inode and test if it is delayed
+ * and without unwritten buffers. Returns page locked and with an extra
+ * reference count.
+ */
+STATIC struct page *
+xfs_probe_delalloc_page(
+ struct inode *inode,
+ pgoff_t index)
+{
+ struct page *page;
+
+ page = find_trylock_page(inode->i_mapping, index);
+ if (!page)
+ return NULL;
+ if (PageWriteback(page))
+ goto out;
+
+ if (page->mapping && page_has_buffers(page)) {
+ struct buffer_head *bh, *head;
+ int acceptable = 0;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_unwritten(bh)) {
+ acceptable = 0;
+ break;
+ } else if (buffer_delay(bh)) {
+ acceptable = 1;
+ }
+ } while ((bh = bh->b_this_page) != head);
+
+ if (acceptable)
+ return page;
+ }
+
+out:
+ unlock_page(page);
+ return NULL;
+}
+
+STATIC int
+xfs_map_unwritten(
+ struct inode *inode,
+ struct page *start_page,
+ struct buffer_head *head,
+ struct buffer_head *curr,
+ unsigned long p_offset,
+ int block_bits,
+ xfs_iomap_t *iomapp,
+ struct writeback_control *wbc,
+ int startio,
+ int all_bh)
+{
+ struct buffer_head *bh = curr;
+ xfs_iomap_t *tmp;
+ xfs_buf_t *pb;
+ loff_t offset, size;
+ unsigned long nblocks = 0;
+
+ offset = start_page->index;
+ offset <<= PAGE_CACHE_SHIFT;
+ offset += p_offset;
+
+ /* get an "empty" pagebuf to manage IO completion
+ * Proper values will be set before returning */
+ pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
+ if (!pb)
+ return -EAGAIN;
+
+ /* Take a reference to the inode to prevent it from
+ * being reclaimed while we have outstanding unwritten
+ * extent IO on it.
+ */
+ if ((igrab(inode)) != inode) {
+ pagebuf_free(pb);
+ return -EAGAIN;
+ }
+
+ /* Set the count to 1 initially, this will stop an I/O
+ * completion callout which happens before we have started
+ * all the I/O from calling pagebuf_iodone too early.
+ */
+ atomic_set(&pb->pb_io_remaining, 1);
+
+ /* First map forwards in the page consecutive buffers
+ * covering this unwritten extent
+ */
+ do {
+ if (!buffer_unwritten(bh))
+ break;
+ tmp = xfs_offset_to_map(start_page, iomapp, p_offset);
+ if (!tmp)
+ break;
+ xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
+ set_buffer_unwritten_io(bh);
+ bh->b_private = pb;
+ p_offset += bh->b_size;
+ nblocks++;
+ } while ((bh = bh->b_this_page) != head);
+
+ atomic_add(nblocks, &pb->pb_io_remaining);
+
+ /* If we reached the end of the page, map forwards in any
+ * following pages which are also covered by this extent.
+ */
+ if (bh == head) {
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t tindex, tloff, tlast;
+ unsigned long bs;
+ unsigned int pg_offset, bbits = inode->i_blkbits;
+ struct page *page;
+
+ tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+ tloff = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
+ tloff = min(tlast, tloff);
+ for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
+ page = xfs_probe_unwritten_page(mapping,
+ tindex, iomapp, pb,
+ PAGE_CACHE_SIZE, &bs, bbits);
+ if (!page)
+ break;
+ nblocks += bs;
+ atomic_add(bs, &pb->pb_io_remaining);
+ xfs_convert_page(inode, page, iomapp, wbc, pb,
+ startio, all_bh);
+ /* stop if converting the next page might add
+ * enough blocks that the corresponding byte
+ * count won't fit in our ulong page buf length */
+ if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+ goto enough;
+ }
+
+ if (tindex == tlast &&
+ (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
+ page = xfs_probe_unwritten_page(mapping,
+ tindex, iomapp, pb,
+ pg_offset, &bs, bbits);
+ if (page) {
+ nblocks += bs;
+ atomic_add(bs, &pb->pb_io_remaining);
+ xfs_convert_page(inode, page, iomapp, wbc, pb,
+ startio, all_bh);
+ if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
+ goto enough;
+ }
+ }
+ }
+
+enough:
+ size = nblocks; /* NB: using 64bit number here */
+ size <<= block_bits; /* convert fsb's to byte range */
+
+ XFS_BUF_DATAIO(pb);
+ XFS_BUF_ASYNC(pb);
+ XFS_BUF_SET_SIZE(pb, size);
+ XFS_BUF_SET_COUNT(pb, size);
+ XFS_BUF_SET_OFFSET(pb, offset);
+ XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
+ XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
+
+ if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+ pagebuf_iodone(pb, 1, 1);
+ }
+
+ return 0;
+}
+
+STATIC void
+xfs_submit_page(
+ struct page *page,
+ struct writeback_control *wbc,
+ struct buffer_head *bh_arr[],
+ int bh_count,
+ int probed_page,
+ int clear_dirty)
+{
+ struct buffer_head *bh;
+ int i;
+
+ BUG_ON(PageWriteback(page));
+ set_page_writeback(page);
+ if (clear_dirty)
+ clear_page_dirty(page);
+ unlock_page(page);
+
+ if (bh_count) {
+ for (i = 0; i < bh_count; i++) {
+ bh = bh_arr[i];
+ mark_buffer_async_write(bh);
+ if (buffer_unwritten(bh))
+ set_buffer_unwritten_io(bh);
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ }
+
+ for (i = 0; i < bh_count; i++)
+ submit_bh(WRITE, bh_arr[i]);
+
+ if (probed_page && clear_dirty)
+ wbc->nr_to_write--; /* Wrote an "extra" page */
+ } else {
+ end_page_writeback(page);
+ wbc->pages_skipped++; /* We didn't write this page */
+ }
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC void
+xfs_convert_page(
+ struct inode *inode,
+ struct page *page,
+ xfs_iomap_t *iomapp,
+ struct writeback_control *wbc,
+ void *private,
+ int startio,
+ int all_bh)
+{
+ struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+ xfs_iomap_t *mp = iomapp, *tmp;
+ unsigned long end, offset;
+ pgoff_t end_index;
+ int i = 0, index = 0;
+ int bbits = inode->i_blkbits;
+
+ end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+ if (page->index < end_index) {
+ end = PAGE_CACHE_SIZE;
+ } else {
+ end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
+ }
+ bh = head = page_buffers(page);
+ do {
+ offset = i << bbits;
+ if (offset >= end)
+ break;
+ if (!(PageUptodate(page) || buffer_uptodate(bh)))
+ continue;
+ if (buffer_mapped(bh) && all_bh &&
+ !(buffer_unwritten(bh) || buffer_delay(bh))) {
+ if (startio) {
+ lock_buffer(bh);
+ bh_arr[index++] = bh;
+ }
+ continue;
+ }
+ tmp = xfs_offset_to_map(page, mp, offset);
+ if (!tmp)
+ continue;
+ ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
+ ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
+
+ /* If this is a new unwritten extent buffer (i.e. one
+ * that we haven't passed in private data for, we must
+ * now map this buffer too.
+ */
+ if (buffer_unwritten(bh) && !bh->b_end_io) {
+ ASSERT(tmp->iomap_flags & IOMAP_UNWRITTEN);
+ xfs_map_unwritten(inode, page, head, bh, offset,
+ bbits, tmp, wbc, startio, all_bh);
+ } else if (! (buffer_unwritten(bh) && buffer_locked(bh))) {
+ xfs_map_at_offset(page, bh, offset, bbits, tmp);
+ if (buffer_unwritten(bh)) {
+ set_buffer_unwritten_io(bh);
+ bh->b_private = private;
+ ASSERT(private);
+ }
+ }
+ if (startio) {
+ bh_arr[index++] = bh;
+ } else {
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty(bh);
+ }
+ } while (i++, (bh = bh->b_this_page) != head);
+
+ if (startio) {
+ xfs_submit_page(page, wbc, bh_arr, index, 1, index == i);
+ } else {
+ unlock_page(page);
+ }
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+ struct inode *inode,
+ pgoff_t tindex,
+ xfs_iomap_t *iomapp,
+ struct writeback_control *wbc,
+ int startio,
+ int all_bh,
+ pgoff_t tlast)
+{
+ struct page *page;
+
+ for (; tindex <= tlast; tindex++) {
+ page = xfs_probe_delalloc_page(inode, tindex);
+ if (!page)
+ break;
+ xfs_convert_page(inode, page, iomapp, wbc, NULL,
+ startio, all_bh);
+ }
+}
+
+/*
+ * Calling this without startio set means we are being asked to make a dirty
+ * page ready for freeing it's buffers. When called with startio set then
+ * we are coming from writepage.
+ *
+ * When called with startio set it is important that we write the WHOLE
+ * page if possible.
+ * The bh->b_state's cannot know if any of the blocks or which block for
+ * that matter are dirty due to mmap writes, and therefore bh uptodate is
+ * only vaild if the page itself isn't completely uptodate. Some layers
+ * may clear the page dirty flag prior to calling write page, under the
+ * assumption the entire page will be written out; by not writing out the
+ * whole page the page can be reused before all valid dirty data is
+ * written out. Note: in the case of a page that has been dirty'd by
+ * mapwrite and but partially setup by block_prepare_write the
+ * bh->b_states's will not agree and only ones setup by BPW/BCW will have
+ * valid state, thus the whole page must be written out thing.
+ */
+
+STATIC int
+xfs_page_state_convert(
+ struct inode *inode,
+ struct page *page,
+ struct writeback_control *wbc,
+ int startio,
+ int unmapped) /* also implies page uptodate */
+{
+ struct buffer_head *bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
+ xfs_iomap_t *iomp, iomap;
+ loff_t offset;
+ unsigned long p_offset = 0;
+ __uint64_t end_offset;
+ pgoff_t end_index, last_index, tlast;
+ int len, err, i, cnt = 0, uptodate = 1;
+ int flags = startio ? 0 : BMAPI_TRYLOCK;
+ int page_dirty, delalloc = 0;
+
+ /* Is this page beyond the end of the file? */
+ offset = i_size_read(inode);
+ end_index = offset >> PAGE_CACHE_SHIFT;
+ last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+ if (page->index >= end_index) {
+ if ((page->index >= end_index + 1) ||
+ !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+ err = -EIO;
+ goto error;
+ }
+ }
+
+ offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ end_offset = min_t(unsigned long long,
+ offset + PAGE_CACHE_SIZE, i_size_read(inode));
+
+ bh = head = page_buffers(page);
+ iomp = NULL;
+
+ /*
+ * page_dirty is initially a count of buffers on the page and
+ * is decrememted as we move each into a cleanable state.
+ */
+ len = bh->b_size;
+ page_dirty = PAGE_CACHE_SIZE / len;
+
+ do {
+ if (offset >= end_offset)
+ break;
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+ if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio)
+ continue;
+
+ if (iomp) {
+ iomp = xfs_offset_to_map(page, &iomap, p_offset);
+ }
+
+ /*
+ * First case, map an unwritten extent and prepare for
+ * extent state conversion transaction on completion.
+ */
+ if (buffer_unwritten(bh)) {
+ if (!startio)
+ continue;
+ if (!iomp) {
+ err = xfs_map_blocks(inode, offset, len, &iomap,
+ BMAPI_READ|BMAPI_IGNSTATE);
+ if (err) {
+ goto error;
+ }
+ iomp = xfs_offset_to_map(page, &iomap,
+ p_offset);
+ }
+ if (iomp) {
+ if (!bh->b_end_io) {
+ err = xfs_map_unwritten(inode, page,
+ head, bh, p_offset,
+ inode->i_blkbits, iomp,
+ wbc, startio, unmapped);
+ if (err) {
+ goto error;
+ }
+ } else {
+ set_bit(BH_Lock, &bh->b_state);
+ }
+ BUG_ON(!buffer_locked(bh));
+ bh_arr[cnt++] = bh;
+ page_dirty--;
+ }
+ /*
+ * Second case, allocate space for a delalloc buffer.
+ * We can return EAGAIN here in the release page case.
+ */
+ } else if (buffer_delay(bh)) {
+ if (!iomp) {
+ delalloc = 1;
+ err = xfs_map_blocks(inode, offset, len, &iomap,
+ BMAPI_ALLOCATE | flags);
+ if (err) {
+ goto error;
+ }
+ iomp = xfs_offset_to_map(page, &iomap,
+ p_offset);
+ }
+ if (iomp) {
+ xfs_map_at_offset(page, bh, p_offset,
+ inode->i_blkbits, iomp);
+ if (startio) {
+ bh_arr[cnt++] = bh;
+ } else {
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty(bh);
+ }
+ page_dirty--;
+ }
+ } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+ (unmapped || startio)) {
+
+ if (!buffer_mapped(bh)) {
+ int size;
+
+ /*
+ * Getting here implies an unmapped buffer
+ * was found, and we are in a path where we
+ * need to write the whole page out.
+ */
+ if (!iomp) {
+ size = xfs_probe_unmapped_cluster(
+ inode, page, bh, head);
+ err = xfs_map_blocks(inode, offset,
+ size, &iomap,
+ BMAPI_WRITE|BMAPI_MMAP);
+ if (err) {
+ goto error;
+ }
+ iomp = xfs_offset_to_map(page, &iomap,
+ p_offset);
+ }
+ if (iomp) {
+ xfs_map_at_offset(page,
+ bh, p_offset,
+ inode->i_blkbits, iomp);
+ if (startio) {
+ bh_arr[cnt++] = bh;
+ } else {
+ set_buffer_dirty(bh);
+ unlock_buffer(bh);
+ mark_buffer_dirty(bh);
+ }
+ page_dirty--;
+ }
+ } else if (startio) {
+ if (buffer_uptodate(bh) &&
+ !test_and_set_bit(BH_Lock, &bh->b_state)) {
+ bh_arr[cnt++] = bh;
+ page_dirty--;
+ }
+ }
+ }
+ } while (offset += len, p_offset += len,
+ ((bh = bh->b_this_page) != head));
+
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ if (startio)
+ xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
+
+ if (iomp) {
+ tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+ PAGE_CACHE_SHIFT;
+ if (delalloc && (tlast > last_index))
+ tlast = last_index;
+ xfs_cluster_write(inode, page->index + 1, iomp, wbc,
+ startio, unmapped, tlast);
+ }
+
+ return page_dirty;
+
+error:
+ for (i = 0; i < cnt; i++) {
+ unlock_buffer(bh_arr[i]);
+ }
+
+ /*
+ * If it's delalloc and we have nowhere to put it,
+ * throw it away, unless the lower layers told
+ * us to try again.
+ */
+ if (err != -EAGAIN) {
+ if (!unmapped) {
+ block_invalidatepage(page, 0);
+ }
+ ClearPageUptodate(page);
+ }
+ return err;
+}
+
+STATIC int
+__linvfs_get_block(
+ struct inode *inode,
+ sector_t iblock,
+ unsigned long blocks,
+ struct buffer_head *bh_result,
+ int create,
+ int direct,
+ bmapi_flags_t flags)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ xfs_iomap_t iomap;
+ int retpbbm = 1;
+ int error;
+ ssize_t size;
+ loff_t offset = (loff_t)iblock << inode->i_blkbits;
+
+ if (blocks)
+ size = blocks << inode->i_blkbits;
+ else
+ size = 1 << inode->i_blkbits;
+
+ VOP_BMAP(vp, offset, size,
+ create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+ if (error)
+ return -error;
+
+ if (retpbbm == 0)
+ return 0;
+
+ if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+ xfs_daddr_t bn;
+ loff_t delta;
+
+ /* For unwritten extents do not report a disk address on
+ * the read case (treat as if we're reading into a hole).
+ */
+ if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+ delta = offset - iomap.iomap_offset;
+ delta >>= inode->i_blkbits;
+
+ bn = iomap.iomap_bn >> (inode->i_blkbits - BBSHIFT);
+ bn += delta;
+ BUG_ON(!bn && !(iomap.iomap_flags & IOMAP_REALTIME));
+ bh_result->b_blocknr = bn;
+ set_buffer_mapped(bh_result);
+ }
+ if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+ if (direct)
+ bh_result->b_private = inode;
+ set_buffer_unwritten(bh_result);
+ set_buffer_delay(bh_result);
+ }
+ }
+
+ /* If this is a realtime file, data might be on a new device */
+ bh_result->b_bdev = iomap.iomap_target->pbr_bdev;
+
+ /* If we previously allocated a block out beyond eof and
+ * we are now coming back to use it then we will need to
+ * flag it as new even if it has a disk address.
+ */
+ if (create &&
+ ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+ (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW))) {
+ set_buffer_new(bh_result);
+ }
+
+ if (iomap.iomap_flags & IOMAP_DELAY) {
+ BUG_ON(direct);
+ if (create) {
+ set_buffer_uptodate(bh_result);
+ set_buffer_mapped(bh_result);
+ set_buffer_delay(bh_result);
+ }
+ }
+
+ if (blocks) {
+ bh_result->b_size = (ssize_t)min(
+ (loff_t)(iomap.iomap_bsize - iomap.iomap_delta),
+ (loff_t)(blocks << inode->i_blkbits));
+ }
+
+ return 0;
+}
+
+int
+linvfs_get_block(
+ struct inode *inode,
+ sector_t iblock,
+ struct buffer_head *bh_result,
+ int create)
+{
+ return __linvfs_get_block(inode, iblock, 0, bh_result,
+ create, 0, BMAPI_WRITE);
+}
+
+STATIC int
+linvfs_get_blocks_direct(
+ struct inode *inode,
+ sector_t iblock,
+ unsigned long max_blocks,
+ struct buffer_head *bh_result,
+ int create)
+{
+ return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+ create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+}
+
+STATIC ssize_t
+linvfs_direct_IO(
+ int rw,
+ struct kiocb *iocb,
+ const struct iovec *iov,
+ loff_t offset,
+ unsigned long nr_segs)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ xfs_iomap_t iomap;
+ int maps = 1;
+ int error;
+
+ VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
+ if (error)
+ return -error;
+
+ return blockdev_direct_IO_own_locking(rw, iocb, inode,
+ iomap.iomap_target->pbr_bdev,
+ iov, offset, nr_segs,
+ linvfs_get_blocks_direct,
+ linvfs_unwritten_convert_direct);
+}
+
+
+STATIC sector_t
+linvfs_bmap(
+ struct address_space *mapping,
+ sector_t block)
+{
+ struct inode *inode = (struct inode *)mapping->host;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+
+ vn_trace_entry(vp, "linvfs_bmap", (inst_t *)__return_address);
+
+ VOP_RWLOCK(vp, VRWLOCK_READ);
+ VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
+ VOP_RWUNLOCK(vp, VRWLOCK_READ);
+ return generic_block_bmap(mapping, block, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpage(
+ struct file *unused,
+ struct page *page)
+{
+ return mpage_readpage(page, linvfs_get_block);
+}
+
+STATIC int
+linvfs_readpages(
+ struct file *unused,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned nr_pages)
+{
+ return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+}
+
+STATIC void
+xfs_count_page_state(
+ struct page *page,
+ int *delalloc,
+ int *unmapped,
+ int *unwritten)
+{
+ struct buffer_head *bh, *head;
+
+ *delalloc = *unmapped = *unwritten = 0;
+
+ bh = head = page_buffers(page);
+ do {
+ if (buffer_uptodate(bh) && !buffer_mapped(bh))
+ (*unmapped) = 1;
+ else if (buffer_unwritten(bh) && !buffer_delay(bh))
+ clear_buffer_unwritten(bh);
+ else if (buffer_unwritten(bh))
+ (*unwritten) = 1;
+ else if (buffer_delay(bh))
+ (*delalloc) = 1;
+ } while ((bh = bh->b_this_page) != head);
+}
+
+
+/*
+ * writepage: Called from one of two places:
+ *
+ * 1. we are flushing a delalloc buffer head.
+ *
+ * 2. we are writing out a dirty page. Typically the page dirty
+ * state is cleared before we get here. In this case is it
+ * conceivable we have no buffer heads.
+ *
+ * For delalloc space on the page we need to allocate space and
+ * flush it. For unmapped buffer heads on the page we should
+ * allocate space if the page is uptodate. For any other dirty
+ * buffer heads on the page we should flush them.
+ *
+ * If we detect that a transaction would be required to flush
+ * the page, we have to check the process flags first, if we
+ * are already in a transaction or disk I/O during allocations
+ * is off, we need to fail the writepage and redirty the page.
+ */
+
+STATIC int
+linvfs_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
+{
+ int error;
+ int need_trans;
+ int delalloc, unmapped, unwritten;
+ struct inode *inode = page->mapping->host;
+
+ xfs_page_trace(XFS_WRITEPAGE_ENTER, inode, page, 0);
+
+ /*
+ * We need a transaction if:
+ * 1. There are delalloc buffers on the page
+ * 2. The page is uptodate and we have unmapped buffers
+ * 3. The page is uptodate and we have no buffers
+ * 4. There are unwritten buffers on the page
+ */
+
+ if (!page_has_buffers(page)) {
+ unmapped = 1;
+ need_trans = 1;
+ } else {
+ xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+ if (!PageUptodate(page))
+ unmapped = 0;
+ need_trans = delalloc + unmapped + unwritten;
+ }
+
+ /*
+ * If we need a transaction and the process flags say
+ * we are already in a transaction, or no IO is allowed
+ * then mark the page dirty again and leave the page
+ * as is.
+ */
+ if (PFLAGS_TEST_FSTRANS() && need_trans)
+ goto out_fail;
+
+ /*
+ * Delay hooking up buffer heads until we have
+ * made our go/no-go decision.
+ */
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+
+ /*
+ * Convert delayed allocate, unwritten or unmapped space
+ * to real space and flush out to disk.
+ */
+ error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
+ if (error == -EAGAIN)
+ goto out_fail;
+ if (unlikely(error < 0))
+ goto out_unlock;
+
+ return 0;
+
+out_fail:
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+out_unlock:
+ unlock_page(page);
+ return error;
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. Possibly the page is already clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 0 if the page is ok to release, 1 otherwise.
+ *
+ * Possible scenarios are:
+ *
+ * 1. We are being called to release a page which has been written
+ * to via regular I/O. buffer heads will be dirty and possibly
+ * delalloc. If no delalloc buffer heads in this case then we
+ * can just return zero.
+ *
+ * 2. We are called to release a page which has been written via
+ * mmap, all we need to do is ensure there is no delalloc
+ * state in the buffer heads, if not we can let the caller
+ * free them and we should come back later via writepage.
+ */
+STATIC int
+linvfs_release_page(
+ struct page *page,
+ int gfp_mask)
+{
+ struct inode *inode = page->mapping->host;
+ int dirty, delalloc, unmapped, unwritten;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 1,
+ };
+
+ xfs_page_trace(XFS_RELEASEPAGE_ENTER, inode, page, gfp_mask);
+
+ xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
+ if (!delalloc && !unwritten)
+ goto free_buffers;
+
+ if (!(gfp_mask & __GFP_FS))
+ return 0;
+
+ /* If we are already inside a transaction or the thread cannot
+ * do I/O, we cannot release this page.
+ */
+ if (PFLAGS_TEST_FSTRANS())
+ return 0;
+
+ /*
+ * Convert delalloc space to real space, do not flush the
+ * data out to disk, that will be done by the caller.
+ * Never need to allocate space here - we will always
+ * come back to writepage in that case.
+ */
+ dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
+ if (dirty == 0 && !unwritten)
+ goto free_buffers;
+ return 0;
+
+free_buffers:
+ return try_to_free_buffers(page);
+}
+
+STATIC int
+linvfs_prepare_write(
+ struct file *file,
+ struct page *page,
+ unsigned int from,
+ unsigned int to)
+{
+ return block_prepare_write(page, from, to, linvfs_get_block);
+}
+
+struct address_space_operations linvfs_aops = {
+ .readpage = linvfs_readpage,
+ .readpages = linvfs_readpages,
+ .writepage = linvfs_writepage,
+ .sync_page = block_sync_page,
+ .releasepage = linvfs_release_page,
+ .prepare_write = linvfs_prepare_write,
+ .commit_write = generic_commit_write,
+ .bmap = linvfs_bmap,
+ .direct_IO = linvfs_direct_IO,
+};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
new file mode 100644
index 00000000000..23e0eb67fc2
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -0,0 +1,1980 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * The xfs_buf.c code provides an abstract buffer cache model on top
+ * of the Linux page cache. Cached metadata blocks for a file system
+ * are hashed to the inode for the block device. xfs_buf.c assembles
+ * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
+ *
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan
+ * and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+
+#include "xfs_linux.h"
+
+/*
+ * File wide globals
+ */
+
+STATIC kmem_cache_t *pagebuf_cache;
+STATIC kmem_shaker_t pagebuf_shake;
+STATIC int pagebuf_daemon_wakeup(int, unsigned int);
+STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+/*
+ * Pagebuf debugging
+ */
+
+#ifdef PAGEBUF_TRACE
+void
+pagebuf_trace(
+ xfs_buf_t *pb,
+ char *id,
+ void *data,
+ void *ra)
+{
+ ktrace_enter(pagebuf_trace_buf,
+ pb, id,
+ (void *)(unsigned long)pb->pb_flags,
+ (void *)(unsigned long)pb->pb_hold.counter,
+ (void *)(unsigned long)pb->pb_sema.count.counter,
+ (void *)current,
+ data, ra,
+ (void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+ (void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+ (void *)(unsigned long)pb->pb_buffer_length,
+ NULL, NULL, NULL, NULL, NULL);
+}
+ktrace_t *pagebuf_trace_buf;
+#define PAGEBUF_TRACE_SIZE 4096
+#define PB_TRACE(pb, id, data) \
+ pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+#else
+#define PB_TRACE(pb, id, data) do { } while (0)
+#endif
+
+#ifdef PAGEBUF_LOCK_TRACKING
+# define PB_SET_OWNER(pb) ((pb)->pb_last_holder = current->pid)
+# define PB_CLEAR_OWNER(pb) ((pb)->pb_last_holder = -1)
+# define PB_GET_OWNER(pb) ((pb)->pb_last_holder)
+#else
+# define PB_SET_OWNER(pb) do { } while (0)
+# define PB_CLEAR_OWNER(pb) do { } while (0)
+# define PB_GET_OWNER(pb) do { } while (0)
+#endif
+
+/*
+ * Pagebuf allocation / freeing.
+ */
+
+#define pb_to_gfp(flags) \
+ ((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+ ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define pb_to_km(flags) \
+ (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+
+#define pagebuf_allocate(flags) \
+ kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+#define pagebuf_deallocate(pb) \
+ kmem_zone_free(pagebuf_cache, (pb));
+
+/*
+ * Page Region interfaces.
+ *
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
+ *
+ * Each such region is "bytes per page / bits per long" bytes long.
+ *
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
+ */
+#if (BITS_PER_LONG == 32)
+#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
+#elif (BITS_PER_LONG == 64)
+#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
+#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
+#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
+
+STATIC unsigned long
+page_region_mask(
+ size_t offset,
+ size_t length)
+{
+ unsigned long mask;
+ int first, final;
+
+ first = BTOPR(offset);
+ final = BTOPRT(offset + length - 1);
+ first = min(first, final);
+
+ mask = ~0UL;
+ mask <<= BITS_PER_LONG - (final - first);
+ mask >>= BITS_PER_LONG - (final);
+
+ ASSERT(offset + length <= PAGE_CACHE_SIZE);
+ ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
+
+ return mask;
+}
+
+STATIC inline void
+set_page_region(
+ struct page *page,
+ size_t offset,
+ size_t length)
+{
+ page->private |= page_region_mask(offset, length);
+ if (page->private == ~0UL)
+ SetPageUptodate(page);
+}
+
+STATIC inline int
+test_page_region(
+ struct page *page,
+ size_t offset,
+ size_t length)
+{
+ unsigned long mask = page_region_mask(offset, length);
+
+ return (mask && (page->private & mask) == mask);
+}
+
+/*
+ * Mapping of multi-page buffers into contiguous virtual space
+ */
+
+typedef struct a_list {
+ void *vm_addr;
+ struct a_list *next;
+} a_list_t;
+
+STATIC a_list_t *as_free_head;
+STATIC int as_list_len;
+STATIC DEFINE_SPINLOCK(as_lock);
+
+/*
+ * Try to batch vunmaps because they are costly.
+ */
+STATIC void
+free_address(
+ void *addr)
+{
+ a_list_t *aentry;
+
+ aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+ if (likely(aentry)) {
+ spin_lock(&as_lock);
+ aentry->next = as_free_head;
+ aentry->vm_addr = addr;
+ as_free_head = aentry;
+ as_list_len++;
+ spin_unlock(&as_lock);
+ } else {
+ vunmap(addr);
+ }
+}
+
+STATIC void
+purge_addresses(void)
+{
+ a_list_t *aentry, *old;
+
+ if (as_free_head == NULL)
+ return;
+
+ spin_lock(&as_lock);
+ aentry = as_free_head;
+ as_free_head = NULL;
+ as_list_len = 0;
+ spin_unlock(&as_lock);
+
+ while ((old = aentry) != NULL) {
+ vunmap(aentry->vm_addr);
+ aentry = aentry->next;
+ kfree(old);
+ }
+}
+
+/*
+ * Internal pagebuf object manipulation
+ */
+
+STATIC void
+_pagebuf_initialize(
+ xfs_buf_t *pb,
+ xfs_buftarg_t *target,
+ loff_t range_base,
+ size_t range_length,
+ page_buf_flags_t flags)
+{
+ /*
+ * We don't want certain flags to appear in pb->pb_flags.
+ */
+ flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+
+ memset(pb, 0, sizeof(xfs_buf_t));
+ atomic_set(&pb->pb_hold, 1);
+ init_MUTEX_LOCKED(&pb->pb_iodonesema);
+ INIT_LIST_HEAD(&pb->pb_list);
+ INIT_LIST_HEAD(&pb->pb_hash_list);
+ init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+ PB_SET_OWNER(pb);
+ pb->pb_target = target;
+ pb->pb_file_offset = range_base;
+ /*
+ * Set buffer_length and count_desired to the same value initially.
+ * I/O routines should use count_desired, which will be the same in
+ * most cases but may be reset (e.g. XFS recovery).
+ */
+ pb->pb_buffer_length = pb->pb_count_desired = range_length;
+ pb->pb_flags = flags | PBF_NONE;
+ pb->pb_bn = XFS_BUF_DADDR_NULL;
+ atomic_set(&pb->pb_pin_count, 0);
+ init_waitqueue_head(&pb->pb_waiters);
+
+ XFS_STATS_INC(pb_create);
+ PB_TRACE(pb, "initialize", target);
+}
+
+/*
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
+ */
+STATIC int
+_pagebuf_get_pages(
+ xfs_buf_t *pb,
+ int page_count,
+ page_buf_flags_t flags)
+{
+ /* Make sure that we have a page list */
+ if (pb->pb_pages == NULL) {
+ pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+ pb->pb_page_count = page_count;
+ if (page_count <= PB_PAGES) {
+ pb->pb_pages = pb->pb_page_array;
+ } else {
+ pb->pb_pages = kmem_alloc(sizeof(struct page *) *
+ page_count, pb_to_km(flags));
+ if (pb->pb_pages == NULL)
+ return -ENOMEM;
+ }
+ memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+ }
+ return 0;
+}
+
+/*
+ * Frees pb_pages if it was malloced.
+ */
+STATIC void
+_pagebuf_free_pages(
+ xfs_buf_t *bp)
+{
+ if (bp->pb_pages != bp->pb_page_array) {
+ kmem_free(bp->pb_pages,
+ bp->pb_page_count * sizeof(struct page *));
+ }
+}
+
+/*
+ * Releases the specified buffer.
+ *
+ * The modification state of any associated pages is left unchanged.
+ * The buffer most not be on any hash - use pagebuf_rele instead for
+ * hashed and refcounted buffers
+ */
+void
+pagebuf_free(
+ xfs_buf_t *bp)
+{
+ PB_TRACE(bp, "free", 0);
+
+ ASSERT(list_empty(&bp->pb_hash_list));
+
+ if (bp->pb_flags & _PBF_PAGE_CACHE) {
+ uint i;
+
+ if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+ free_address(bp->pb_addr - bp->pb_offset);
+
+ for (i = 0; i < bp->pb_page_count; i++)
+ page_cache_release(bp->pb_pages[i]);
+ _pagebuf_free_pages(bp);
+ } else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+ /*
+ * XXX(hch): bp->pb_count_desired might be incorrect (see
+ * pagebuf_associate_memory for details), but fortunately
+ * the Linux version of kmem_free ignores the len argument..
+ */
+ kmem_free(bp->pb_addr, bp->pb_count_desired);
+ _pagebuf_free_pages(bp);
+ }
+
+ pagebuf_deallocate(bp);
+}
+
+/*
+ * Finds all pages for buffer in question and builds it's page list.
+ */
+STATIC int
+_pagebuf_lookup_pages(
+ xfs_buf_t *bp,
+ uint flags)
+{
+ struct address_space *mapping = bp->pb_target->pbr_mapping;
+ size_t blocksize = bp->pb_target->pbr_bsize;
+ size_t size = bp->pb_count_desired;
+ size_t nbytes, offset;
+ int gfp_mask = pb_to_gfp(flags);
+ unsigned short page_count, i;
+ pgoff_t first;
+ loff_t end;
+ int error;
+
+ end = bp->pb_file_offset + bp->pb_buffer_length;
+ page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
+
+ error = _pagebuf_get_pages(bp, page_count, flags);
+ if (unlikely(error))
+ return error;
+ bp->pb_flags |= _PBF_PAGE_CACHE;
+
+ offset = bp->pb_offset;
+ first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+
+ for (i = 0; i < bp->pb_page_count; i++) {
+ struct page *page;
+ uint retries = 0;
+
+ retry:
+ page = find_or_create_page(mapping, first + i, gfp_mask);
+ if (unlikely(page == NULL)) {
+ if (flags & PBF_READ_AHEAD) {
+ bp->pb_page_count = i;
+ for (i = 0; i < bp->pb_page_count; i++)
+ unlock_page(bp->pb_pages[i]);
+ return -ENOMEM;
+ }
+
+ /*
+ * This could deadlock.
+ *
+ * But until all the XFS lowlevel code is revamped to
+ * handle buffer allocation failures we can't do much.
+ */
+ if (!(++retries % 100))
+ printk(KERN_ERR
+ "XFS: possible memory allocation "
+ "deadlock in %s (mode:0x%x)\n",
+ __FUNCTION__, gfp_mask);
+
+ XFS_STATS_INC(pb_page_retries);
+ pagebuf_daemon_wakeup(0, gfp_mask);
+ blk_congestion_wait(WRITE, HZ/50);
+ goto retry;
+ }
+
+ XFS_STATS_INC(pb_page_found);
+
+ nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+ size -= nbytes;
+
+ if (!PageUptodate(page)) {
+ page_count--;
+ if (blocksize >= PAGE_CACHE_SIZE) {
+ if (flags & PBF_READ)
+ bp->pb_locked = 1;
+ } else if (!PagePrivate(page)) {
+ if (test_page_region(page, offset, nbytes))
+ page_count++;
+ }
+ }
+
+ bp->pb_pages[i] = page;
+ offset = 0;
+ }
+
+ if (!bp->pb_locked) {
+ for (i = 0; i < bp->pb_page_count; i++)
+ unlock_page(bp->pb_pages[i]);
+ }
+
+ if (page_count) {
+ /* if we have any uptodate pages, mark that in the buffer */
+ bp->pb_flags &= ~PBF_NONE;
+
+ /* if some pages aren't uptodate, mark that in the buffer */
+ if (page_count != bp->pb_page_count)
+ bp->pb_flags |= PBF_PARTIAL;
+ }
+
+ PB_TRACE(bp, "lookup_pages", (long)page_count);
+ return error;
+}
+
+/*
+ * Map buffer into kernel address-space if nessecary.
+ */
+STATIC int
+_pagebuf_map_pages(
+ xfs_buf_t *bp,
+ uint flags)
+{
+ /* A single page buffer is always mappable */
+ if (bp->pb_page_count == 1) {
+ bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+ bp->pb_flags |= PBF_MAPPED;
+ } else if (flags & PBF_MAPPED) {
+ if (as_list_len > 64)
+ purge_addresses();
+ bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+ VM_MAP, PAGE_KERNEL);
+ if (unlikely(bp->pb_addr == NULL))
+ return -ENOMEM;
+ bp->pb_addr += bp->pb_offset;
+ bp->pb_flags |= PBF_MAPPED;
+ }
+
+ return 0;
+}
+
+/*
+ * Finding and Reading Buffers
+ */
+
+/*
+ * _pagebuf_find
+ *
+ * Looks up, and creates if absent, a lockable buffer for
+ * a given range of an inode. The buffer is returned
+ * locked. If other overlapping buffers exist, they are
+ * released before the new buffer is created and locked,
+ * which may imply that this call will block until those buffers
+ * are unlocked. No I/O is implied by this call.
+ */
+xfs_buf_t *
+_pagebuf_find(
+ xfs_buftarg_t *btp, /* block device target */
+ loff_t ioff, /* starting offset of range */
+ size_t isize, /* length of range */
+ page_buf_flags_t flags, /* PBF_TRYLOCK */
+ xfs_buf_t *new_pb)/* newly allocated buffer */
+{
+ loff_t range_base;
+ size_t range_length;
+ xfs_bufhash_t *hash;
+ xfs_buf_t *pb, *n;
+
+ range_base = (ioff << BBSHIFT);
+ range_length = (isize << BBSHIFT);
+
+ /* Check for IOs smaller than the sector size / not sector aligned */
+ ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+ ASSERT(!(range_base & (loff_t)btp->pbr_smask));
+
+ hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
+
+ spin_lock(&hash->bh_lock);
+
+ list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+ ASSERT(btp == pb->pb_target);
+ if (pb->pb_file_offset == range_base &&
+ pb->pb_buffer_length == range_length) {
+ /*
+ * If we look at something bring it to the
+ * front of the list for next time.
+ */
+ atomic_inc(&pb->pb_hold);
+ list_move(&pb->pb_hash_list, &hash->bh_list);
+ goto found;
+ }
+ }
+
+ /* No match found */
+ if (new_pb) {
+ _pagebuf_initialize(new_pb, btp, range_base,
+ range_length, flags);
+ new_pb->pb_hash = hash;
+ list_add(&new_pb->pb_hash_list, &hash->bh_list);
+ } else {
+ XFS_STATS_INC(pb_miss_locked);
+ }
+
+ spin_unlock(&hash->bh_lock);
+ return new_pb;
+
+found:
+ spin_unlock(&hash->bh_lock);
+
+ /* Attempt to get the semaphore without sleeping,
+ * if this does not work then we need to drop the
+ * spinlock and do a hard attempt on the semaphore.
+ */
+ if (down_trylock(&pb->pb_sema)) {
+ if (!(flags & PBF_TRYLOCK)) {
+ /* wait for buffer ownership */
+ PB_TRACE(pb, "get_lock", 0);
+ pagebuf_lock(pb);
+ XFS_STATS_INC(pb_get_locked_waited);
+ } else {
+ /* We asked for a trylock and failed, no need
+ * to look at file offset and length here, we
+ * know that this pagebuf at least overlaps our
+ * pagebuf and is locked, therefore our buffer
+ * either does not exist, or is this buffer
+ */
+
+ pagebuf_rele(pb);
+ XFS_STATS_INC(pb_busy_locked);
+ return (NULL);
+ }
+ } else {
+ /* trylock worked */
+ PB_SET_OWNER(pb);
+ }
+
+ if (pb->pb_flags & PBF_STALE)
+ pb->pb_flags &= PBF_MAPPED;
+ PB_TRACE(pb, "got_lock", 0);
+ XFS_STATS_INC(pb_get_locked);
+ return (pb);
+}
+
+/*
+ * xfs_buf_get_flags assembles a buffer covering the specified range.
+ *
+ * Storage in memory for all portions of the buffer will be allocated,
+ * although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get_flags( /* allocate a buffer */
+ xfs_buftarg_t *target,/* target for buffer */
+ loff_t ioff, /* starting offset of range */
+ size_t isize, /* length of range */
+ page_buf_flags_t flags) /* PBF_TRYLOCK */
+{
+ xfs_buf_t *pb, *new_pb;
+ int error = 0, i;
+
+ new_pb = pagebuf_allocate(flags);
+ if (unlikely(!new_pb))
+ return NULL;
+
+ pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+ if (pb == new_pb) {
+ error = _pagebuf_lookup_pages(pb, flags);
+ if (error)
+ goto no_buffer;
+ } else {
+ pagebuf_deallocate(new_pb);
+ if (unlikely(pb == NULL))
+ return NULL;
+ }
+
+ for (i = 0; i < pb->pb_page_count; i++)
+ mark_page_accessed(pb->pb_pages[i]);
+
+ if (!(pb->pb_flags & PBF_MAPPED)) {
+ error = _pagebuf_map_pages(pb, flags);
+ if (unlikely(error)) {
+ printk(KERN_WARNING "%s: failed to map pages\n",
+ __FUNCTION__);
+ goto no_buffer;
+ }
+ }
+
+ XFS_STATS_INC(pb_get);
+
+ /*
+ * Always fill in the block number now, the mapped cases can do
+ * their own overlay of this later.
+ */
+ pb->pb_bn = ioff;
+ pb->pb_count_desired = pb->pb_buffer_length;
+
+ PB_TRACE(pb, "get", (unsigned long)flags);
+ return pb;
+
+ no_buffer:
+ if (flags & (PBF_LOCK | PBF_TRYLOCK))
+ pagebuf_unlock(pb);
+ pagebuf_rele(pb);
+ return NULL;
+}
+
+xfs_buf_t *
+xfs_buf_read_flags(
+ xfs_buftarg_t *target,
+ loff_t ioff,
+ size_t isize,
+ page_buf_flags_t flags)
+{
+ xfs_buf_t *pb;
+
+ flags |= PBF_READ;
+
+ pb = xfs_buf_get_flags(target, ioff, isize, flags);
+ if (pb) {
+ if (PBF_NOT_DONE(pb)) {
+ PB_TRACE(pb, "read", (unsigned long)flags);
+ XFS_STATS_INC(pb_get_read);
+ pagebuf_iostart(pb, flags);
+ } else if (flags & PBF_ASYNC) {
+ PB_TRACE(pb, "read_async", (unsigned long)flags);
+ /*
+ * Read ahead call which is already satisfied,
+ * drop the buffer
+ */
+ goto no_buffer;
+ } else {
+ PB_TRACE(pb, "read_done", (unsigned long)flags);
+ /* We do not want read in the flags */
+ pb->pb_flags &= ~PBF_READ;
+ }
+ }
+
+ return pb;
+
+ no_buffer:
+ if (flags & (PBF_LOCK | PBF_TRYLOCK))
+ pagebuf_unlock(pb);
+ pagebuf_rele(pb);
+ return NULL;
+}
+
+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+xfs_buf_t *
+pagebuf_lookup(
+ xfs_buftarg_t *target,
+ loff_t ioff,
+ size_t isize,
+ page_buf_flags_t flags)
+{
+ xfs_buf_t *pb;
+
+ pb = pagebuf_allocate(flags);
+ if (pb) {
+ _pagebuf_initialize(pb, target, ioff, isize, flags);
+ }
+ return pb;
+}
+
+/*
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
+ */
+void
+pagebuf_readahead(
+ xfs_buftarg_t *target,
+ loff_t ioff,
+ size_t isize,
+ page_buf_flags_t flags)
+{
+ struct backing_dev_info *bdi;
+
+ bdi = target->pbr_mapping->backing_dev_info;
+ if (bdi_read_congested(bdi))
+ return;
+
+ flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+ xfs_buf_read_flags(target, ioff, isize, flags);
+}
+
+xfs_buf_t *
+pagebuf_get_empty(
+ size_t len,
+ xfs_buftarg_t *target)
+{
+ xfs_buf_t *pb;
+
+ pb = pagebuf_allocate(0);
+ if (pb)
+ _pagebuf_initialize(pb, target, 0, len, 0);
+ return pb;
+}
+
+static inline struct page *
+mem_to_page(
+ void *addr)
+{
+ if (((unsigned long)addr < VMALLOC_START) ||
+ ((unsigned long)addr >= VMALLOC_END)) {
+ return virt_to_page(addr);
+ } else {
+ return vmalloc_to_page(addr);
+ }
+}
+
+int
+pagebuf_associate_memory(
+ xfs_buf_t *pb,
+ void *mem,
+ size_t len)
+{
+ int rval;
+ int i = 0;
+ size_t ptr;
+ size_t end, end_cur;
+ off_t offset;
+ int page_count;
+
+ page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+ offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
+ if (offset && (len > PAGE_CACHE_SIZE))
+ page_count++;
+
+ /* Free any previous set of page pointers */
+ if (pb->pb_pages)
+ _pagebuf_free_pages(pb);
+
+ pb->pb_pages = NULL;
+ pb->pb_addr = mem;
+
+ rval = _pagebuf_get_pages(pb, page_count, 0);
+ if (rval)
+ return rval;
+
+ pb->pb_offset = offset;
+ ptr = (size_t) mem & PAGE_CACHE_MASK;
+ end = PAGE_CACHE_ALIGN((size_t) mem + len);
+ end_cur = end;
+ /* set up first page */
+ pb->pb_pages[0] = mem_to_page(mem);
+
+ ptr += PAGE_CACHE_SIZE;
+ pb->pb_page_count = ++i;
+ while (ptr < end) {
+ pb->pb_pages[i] = mem_to_page((void *)ptr);
+ pb->pb_page_count = ++i;
+ ptr += PAGE_CACHE_SIZE;
+ }
+ pb->pb_locked = 0;
+
+ pb->pb_count_desired = pb->pb_buffer_length = len;
+ pb->pb_flags |= PBF_MAPPED;
+
+ return 0;
+}
+
+xfs_buf_t *
+pagebuf_get_no_daddr(
+ size_t len,
+ xfs_buftarg_t *target)
+{
+ size_t malloc_len = len;
+ xfs_buf_t *bp;
+ void *data;
+ int error;
+
+ bp = pagebuf_allocate(0);
+ if (unlikely(bp == NULL))
+ goto fail;
+ _pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+
+ try_again:
+ data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL);
+ if (unlikely(data == NULL))
+ goto fail_free_buf;
+
+ /* check whether alignment matches.. */
+ if ((__psunsigned_t)data !=
+ ((__psunsigned_t)data & ~target->pbr_smask)) {
+ /* .. else double the size and try again */
+ kmem_free(data, malloc_len);
+ malloc_len <<= 1;
+ goto try_again;
+ }
+
+ error = pagebuf_associate_memory(bp, data, len);
+ if (error)
+ goto fail_free_mem;
+ bp->pb_flags |= _PBF_KMEM_ALLOC;
+
+ pagebuf_unlock(bp);
+
+ PB_TRACE(bp, "no_daddr", data);
+ return bp;
+ fail_free_mem:
+ kmem_free(data, malloc_len);
+ fail_free_buf:
+ pagebuf_free(bp);
+ fail:
+ return NULL;
+}
+
+/*
+ * pagebuf_hold
+ *
+ * Increment reference count on buffer, to hold the buffer concurrently
+ * with another thread which may release (free) the buffer asynchronously.
+ *
+ * Must hold the buffer already to call this function.
+ */
+void
+pagebuf_hold(
+ xfs_buf_t *pb)
+{
+ atomic_inc(&pb->pb_hold);
+ PB_TRACE(pb, "hold", 0);
+}
+
+/*
+ * pagebuf_rele
+ *
+ * pagebuf_rele releases a hold on the specified buffer. If the
+ * the hold count is 1, pagebuf_rele calls pagebuf_free.
+ */
+void
+pagebuf_rele(
+ xfs_buf_t *pb)
+{
+ xfs_bufhash_t *hash = pb->pb_hash;
+
+ PB_TRACE(pb, "rele", pb->pb_relse);
+
+ /*
+ * pagebuf_lookup buffers are not hashed, not delayed write,
+ * and don't have their own release routines. Special case.
+ */
+ if (unlikely(!hash)) {
+ ASSERT(!pb->pb_relse);
+ if (atomic_dec_and_test(&pb->pb_hold))
+ xfs_buf_free(pb);
+ return;
+ }
+
+ if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
+ int do_free = 1;
+
+ if (pb->pb_relse) {
+ atomic_inc(&pb->pb_hold);
+ spin_unlock(&hash->bh_lock);
+ (*(pb->pb_relse)) (pb);
+ spin_lock(&hash->bh_lock);
+ do_free = 0;
+ }
+
+ if (pb->pb_flags & PBF_DELWRI) {
+ pb->pb_flags |= PBF_ASYNC;
+ atomic_inc(&pb->pb_hold);
+ pagebuf_delwri_queue(pb, 0);
+ do_free = 0;
+ } else if (pb->pb_flags & PBF_FS_MANAGED) {
+ do_free = 0;
+ }
+
+ if (do_free) {
+ list_del_init(&pb->pb_hash_list);
+ spin_unlock(&hash->bh_lock);
+ pagebuf_free(pb);
+ } else {
+ spin_unlock(&hash->bh_lock);
+ }
+ }
+}
+
+
+/*
+ * Mutual exclusion on buffers. Locking model:
+ *
+ * Buffers associated with inodes for which buffer locking
+ * is not enabled are not protected by semaphores, and are
+ * assumed to be exclusively owned by the caller. There is a
+ * spinlock in the buffer, used by the caller when concurrent
+ * access is possible.
+ */
+
+/*
+ * pagebuf_cond_lock
+ *
+ * pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ * Note that this in no way
+ * locks the underlying pages, so it is only useful for synchronizing
+ * concurrent use of page buffer objects, not for synchronizing independent
+ * access to the underlying pages.
+ */
+int
+pagebuf_cond_lock( /* lock buffer, if not locked */
+ /* returns -EBUSY if locked) */
+ xfs_buf_t *pb)
+{
+ int locked;
+
+ locked = down_trylock(&pb->pb_sema) == 0;
+ if (locked) {
+ PB_SET_OWNER(pb);
+ }
+ PB_TRACE(pb, "cond_lock", (long)locked);
+ return(locked ? 0 : -EBUSY);
+}
+
+#if defined(DEBUG) || defined(XFS_BLI_TRACE)
+/*
+ * pagebuf_lock_value
+ *
+ * Return lock value for a pagebuf
+ */
+int
+pagebuf_lock_value(
+ xfs_buf_t *pb)
+{
+ return(atomic_read(&pb->pb_sema.count));
+}
+#endif
+
+/*
+ * pagebuf_lock
+ *
+ * pagebuf_lock locks a buffer object. Note that this in no way
+ * locks the underlying pages, so it is only useful for synchronizing
+ * concurrent use of page buffer objects, not for synchronizing independent
+ * access to the underlying pages.
+ */
+int
+pagebuf_lock(
+ xfs_buf_t *pb)
+{
+ PB_TRACE(pb, "lock", 0);
+ if (atomic_read(&pb->pb_io_remaining))
+ blk_run_address_space(pb->pb_target->pbr_mapping);
+ down(&pb->pb_sema);
+ PB_SET_OWNER(pb);
+ PB_TRACE(pb, "locked", 0);
+ return 0;
+}
+
+/*
+ * pagebuf_unlock
+ *
+ * pagebuf_unlock releases the lock on the buffer object created by
+ * pagebuf_lock or pagebuf_cond_lock (not any
+ * pinning of underlying pages created by pagebuf_pin).
+ */
+void
+pagebuf_unlock( /* unlock buffer */
+ xfs_buf_t *pb) /* buffer to unlock */
+{
+ PB_CLEAR_OWNER(pb);
+ up(&pb->pb_sema);
+ PB_TRACE(pb, "unlock", 0);
+}
+
+
+/*
+ * Pinning Buffer Storage in Memory
+ */
+
+/*
+ * pagebuf_pin
+ *
+ * pagebuf_pin locks all of the memory represented by a buffer in
+ * memory. Multiple calls to pagebuf_pin and pagebuf_unpin, for
+ * the same or different buffers affecting a given page, will
+ * properly count the number of outstanding "pin" requests. The
+ * buffer may be released after the pagebuf_pin and a different
+ * buffer used when calling pagebuf_unpin, if desired.
+ * pagebuf_pin should be used by the file system when it wants be
+ * assured that no attempt will be made to force the affected
+ * memory to disk. It does not assure that a given logical page
+ * will not be moved to a different physical page.
+ */
+void
+pagebuf_pin(
+ xfs_buf_t *pb)
+{
+ atomic_inc(&pb->pb_pin_count);
+ PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+}
+
+/*
+ * pagebuf_unpin
+ *
+ * pagebuf_unpin reverses the locking of memory performed by
+ * pagebuf_pin. Note that both functions affected the logical
+ * pages associated with the buffer, not the buffer itself.
+ */
+void
+pagebuf_unpin(
+ xfs_buf_t *pb)
+{
+ if (atomic_dec_and_test(&pb->pb_pin_count)) {
+ wake_up_all(&pb->pb_waiters);
+ }
+ PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+}
+
+int
+pagebuf_ispin(
+ xfs_buf_t *pb)
+{
+ return atomic_read(&pb->pb_pin_count);
+}
+
+/*
+ * pagebuf_wait_unpin
+ *
+ * pagebuf_wait_unpin waits until all of the memory associated
+ * with the buffer is not longer locked in memory. It returns
+ * immediately if none of the affected pages are locked.
+ */
+static inline void
+_pagebuf_wait_unpin(
+ xfs_buf_t *pb)
+{
+ DECLARE_WAITQUEUE (wait, current);
+
+ if (atomic_read(&pb->pb_pin_count) == 0)
+ return;
+
+ add_wait_queue(&pb->pb_waiters, &wait);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&pb->pb_pin_count) == 0)
+ break;
+ if (atomic_read(&pb->pb_io_remaining))
+ blk_run_address_space(pb->pb_target->pbr_mapping);
+ schedule();
+ }
+ remove_wait_queue(&pb->pb_waiters, &wait);
+ set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Buffer Utility Routines
+ */
+
+/*
+ * pagebuf_iodone
+ *
+ * pagebuf_iodone marks a buffer for which I/O is in progress
+ * done with respect to that I/O. The pb_iodone routine, if
+ * present, will be called as a side-effect.
+ */
+STATIC void
+pagebuf_iodone_work(
+ void *v)
+{
+ xfs_buf_t *bp = (xfs_buf_t *)v;
+
+ if (bp->pb_iodone)
+ (*(bp->pb_iodone))(bp);
+ else if (bp->pb_flags & PBF_ASYNC)
+ xfs_buf_relse(bp);
+}
+
+void
+pagebuf_iodone(
+ xfs_buf_t *pb,
+ int dataio,
+ int schedule)
+{
+ pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+ if (pb->pb_error == 0) {
+ pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
+ }
+
+ PB_TRACE(pb, "iodone", pb->pb_iodone);
+
+ if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+ if (schedule) {
+ INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+ queue_work(dataio ? pagebuf_dataio_workqueue :
+ pagebuf_logio_workqueue, &pb->pb_iodone_work);
+ } else {
+ pagebuf_iodone_work(pb);
+ }
+ } else {
+ up(&pb->pb_iodonesema);
+ }
+}
+
+/*
+ * pagebuf_ioerror
+ *
+ * pagebuf_ioerror sets the error code for a buffer.
+ */
+void
+pagebuf_ioerror( /* mark/clear buffer error flag */
+ xfs_buf_t *pb, /* buffer to mark */
+ int error) /* error to store (0 if none) */
+{
+ ASSERT(error >= 0 && error <= 0xffff);
+ pb->pb_error = (unsigned short)error;
+ PB_TRACE(pb, "ioerror", (unsigned long)error);
+}
+
+/*
+ * pagebuf_iostart
+ *
+ * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
+ * If necessary, it will arrange for any disk space allocation required,
+ * and it will break up the request if the block mappings require it.
+ * The pb_iodone routine in the buffer supplied will only be called
+ * when all of the subsidiary I/O requests, if any, have been completed.
+ * pagebuf_iostart calls the pagebuf_ioinitiate routine or
+ * pagebuf_iorequest, if the former routine is not defined, to start
+ * the I/O on a given low-level request.
+ */
+int
+pagebuf_iostart( /* start I/O on a buffer */
+ xfs_buf_t *pb, /* buffer to start */
+ page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+ /* PBF_WRITE, PBF_DELWRI, */
+ /* PBF_DONT_BLOCK */
+{
+ int status = 0;
+
+ PB_TRACE(pb, "iostart", (unsigned long)flags);
+
+ if (flags & PBF_DELWRI) {
+ pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+ pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC);
+ pagebuf_delwri_queue(pb, 1);
+ return status;
+ }
+
+ pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
+ PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+ pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+ PBF_READ_AHEAD | _PBF_RUN_QUEUES);
+
+ BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
+
+ /* For writes allow an alternate strategy routine to precede
+ * the actual I/O request (which may not be issued at all in
+ * a shutdown situation, for example).
+ */
+ status = (flags & PBF_WRITE) ?
+ pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+
+ /* Wait for I/O if we are not an async request.
+ * Note: async I/O request completion will release the buffer,
+ * and that can already be done by this point. So using the
+ * buffer pointer from here on, after async I/O, is invalid.
+ */
+ if (!status && !(flags & PBF_ASYNC))
+ status = pagebuf_iowait(pb);
+
+ return status;
+}
+
+/*
+ * Helper routine for pagebuf_iorequest
+ */
+
+STATIC __inline__ int
+_pagebuf_iolocked(
+ xfs_buf_t *pb)
+{
+ ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+ if (pb->pb_flags & PBF_READ)
+ return pb->pb_locked;
+ return 0;
+}
+
+STATIC __inline__ void
+_pagebuf_iodone(
+ xfs_buf_t *pb,
+ int schedule)
+{
+ if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+ pb->pb_locked = 0;
+ pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+ }
+}
+
+STATIC int
+bio_end_io_pagebuf(
+ struct bio *bio,
+ unsigned int bytes_done,
+ int error)
+{
+ xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
+ unsigned int i, blocksize = pb->pb_target->pbr_bsize;
+ struct bio_vec *bvec = bio->bi_io_vec;
+
+ if (bio->bi_size)
+ return 1;
+
+ if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ pb->pb_error = EIO;
+
+ for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+ struct page *page = bvec->bv_page;
+
+ if (pb->pb_error) {
+ SetPageError(page);
+ } else if (blocksize == PAGE_CACHE_SIZE) {
+ SetPageUptodate(page);
+ } else if (!PagePrivate(page) &&
+ (pb->pb_flags & _PBF_PAGE_CACHE)) {
+ set_page_region(page, bvec->bv_offset, bvec->bv_len);
+ }
+
+ if (_pagebuf_iolocked(pb)) {
+ unlock_page(page);
+ }
+ }
+
+ _pagebuf_iodone(pb, 1);
+ bio_put(bio);
+ return 0;
+}
+
+STATIC void
+_pagebuf_ioapply(
+ xfs_buf_t *pb)
+{
+ int i, rw, map_i, total_nr_pages, nr_pages;
+ struct bio *bio;
+ int offset = pb->pb_offset;
+ int size = pb->pb_count_desired;
+ sector_t sector = pb->pb_bn;
+ unsigned int blocksize = pb->pb_target->pbr_bsize;
+ int locking = _pagebuf_iolocked(pb);
+
+ total_nr_pages = pb->pb_page_count;
+ map_i = 0;
+
+ if (pb->pb_flags & _PBF_RUN_QUEUES) {
+ pb->pb_flags &= ~_PBF_RUN_QUEUES;
+ rw = (pb->pb_flags & PBF_READ) ? READ_SYNC : WRITE_SYNC;
+ } else {
+ rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
+ }
+
+ /* Special code path for reading a sub page size pagebuf in --
+ * we populate up the whole page, and hence the other metadata
+ * in the same page. This optimization is only valid when the
+ * filesystem block size and the page size are equal.
+ */
+ if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+ (pb->pb_flags & PBF_READ) && locking &&
+ (blocksize == PAGE_CACHE_SIZE)) {
+ bio = bio_alloc(GFP_NOIO, 1);
+
+ bio->bi_bdev = pb->pb_target->pbr_bdev;
+ bio->bi_sector = sector - (offset >> BBSHIFT);
+ bio->bi_end_io = bio_end_io_pagebuf;
+ bio->bi_private = pb;
+
+ bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+ size = 0;
+
+ atomic_inc(&pb->pb_io_remaining);
+
+ goto submit_io;
+ }
+
+ /* Lock down the pages which we need to for the request */
+ if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+ for (i = 0; size; i++) {
+ int nbytes = PAGE_CACHE_SIZE - offset;
+ struct page *page = pb->pb_pages[i];
+
+ if (nbytes > size)
+ nbytes = size;
+
+ lock_page(page);
+
+ size -= nbytes;
+ offset = 0;
+ }
+ offset = pb->pb_offset;
+ size = pb->pb_count_desired;
+ }
+
+next_chunk:
+ atomic_inc(&pb->pb_io_remaining);
+ nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+ if (nr_pages > total_nr_pages)
+ nr_pages = total_nr_pages;
+
+ bio = bio_alloc(GFP_NOIO, nr_pages);
+ bio->bi_bdev = pb->pb_target->pbr_bdev;
+ bio->bi_sector = sector;
+ bio->bi_end_io = bio_end_io_pagebuf;
+ bio->bi_private = pb;
+
+ for (; size && nr_pages; nr_pages--, map_i++) {
+ int nbytes = PAGE_CACHE_SIZE - offset;
+
+ if (nbytes > size)
+ nbytes = size;
+
+ if (bio_add_page(bio, pb->pb_pages[map_i],
+ nbytes, offset) < nbytes)
+ break;
+
+ offset = 0;
+ sector += nbytes >> BBSHIFT;
+ size -= nbytes;
+ total_nr_pages--;
+ }
+
+submit_io:
+ if (likely(bio->bi_size)) {
+ submit_bio(rw, bio);
+ if (size)
+ goto next_chunk;
+ } else {
+ bio_put(bio);
+ pagebuf_ioerror(pb, EIO);
+ }
+}
+
+/*
+ * pagebuf_iorequest -- the core I/O request routine.
+ */
+int
+pagebuf_iorequest( /* start real I/O */
+ xfs_buf_t *pb) /* buffer to convey to device */
+{
+ PB_TRACE(pb, "iorequest", 0);
+
+ if (pb->pb_flags & PBF_DELWRI) {
+ pagebuf_delwri_queue(pb, 1);
+ return 0;
+ }
+
+ if (pb->pb_flags & PBF_WRITE) {
+ _pagebuf_wait_unpin(pb);
+ }
+
+ pagebuf_hold(pb);
+
+ /* Set the count to 1 initially, this will stop an I/O
+ * completion callout which happens before we have started
+ * all the I/O from calling pagebuf_iodone too early.
+ */
+ atomic_set(&pb->pb_io_remaining, 1);
+ _pagebuf_ioapply(pb);
+ _pagebuf_iodone(pb, 0);
+
+ pagebuf_rele(pb);
+ return 0;
+}
+
+/*
+ * pagebuf_iowait
+ *
+ * pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ * It returns immediately if no I/O is pending. In any case, it returns
+ * the error code, if any, or 0 if there is no error.
+ */
+int
+pagebuf_iowait(
+ xfs_buf_t *pb)
+{
+ PB_TRACE(pb, "iowait", 0);
+ if (atomic_read(&pb->pb_io_remaining))
+ blk_run_address_space(pb->pb_target->pbr_mapping);
+ down(&pb->pb_iodonesema);
+ PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+ return pb->pb_error;
+}
+
+caddr_t
+pagebuf_offset(
+ xfs_buf_t *pb,
+ size_t offset)
+{
+ struct page *page;
+
+ offset += pb->pb_offset;
+
+ page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+ return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+}
+
+/*
+ * pagebuf_iomove
+ *
+ * Move data into or out of a buffer.
+ */
+void
+pagebuf_iomove(
+ xfs_buf_t *pb, /* buffer to process */
+ size_t boff, /* starting buffer offset */
+ size_t bsize, /* length to copy */
+ caddr_t data, /* data address */
+ page_buf_rw_t mode) /* read/write flag */
+{
+ size_t bend, cpoff, csize;
+ struct page *page;
+
+ bend = boff + bsize;
+ while (boff < bend) {
+ page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+ cpoff = page_buf_poff(boff + pb->pb_offset);
+ csize = min_t(size_t,
+ PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+
+ ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+
+ switch (mode) {
+ case PBRW_ZERO:
+ memset(page_address(page) + cpoff, 0, csize);
+ break;
+ case PBRW_READ:
+ memcpy(data, page_address(page) + cpoff, csize);
+ break;
+ case PBRW_WRITE:
+ memcpy(page_address(page) + cpoff, data, csize);
+ }
+
+ boff += csize;
+ data += csize;
+ }
+}
+
+/*
+ * Handling of buftargs.
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but
+ * have not yet returned... walk the hash list for the target.
+ */
+void
+xfs_wait_buftarg(
+ xfs_buftarg_t *btp)
+{
+ xfs_buf_t *bp, *n;
+ xfs_bufhash_t *hash;
+ uint i;
+
+ for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+ hash = &btp->bt_hash[i];
+again:
+ spin_lock(&hash->bh_lock);
+ list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+ ASSERT(btp == bp->pb_target);
+ if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+ spin_unlock(&hash->bh_lock);
+ delay(100);
+ goto again;
+ }
+ }
+ spin_unlock(&hash->bh_lock);
+ }
+}
+
+/*
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
+ */
+STATIC void
+xfs_alloc_bufhash(
+ xfs_buftarg_t *btp,
+ int external)
+{
+ unsigned int i;
+
+ btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
+ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+ btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
+ sizeof(xfs_bufhash_t), KM_SLEEP);
+ for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+ spin_lock_init(&btp->bt_hash[i].bh_lock);
+ INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+ }
+}
+
+STATIC void
+xfs_free_bufhash(
+ xfs_buftarg_t *btp)
+{
+ kmem_free(btp->bt_hash,
+ (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ btp->bt_hash = NULL;
+}
+
+void
+xfs_free_buftarg(
+ xfs_buftarg_t *btp,
+ int external)
+{
+ xfs_flush_buftarg(btp, 1);
+ if (external)
+ xfs_blkdev_put(btp->pbr_bdev);
+ xfs_free_bufhash(btp);
+ iput(btp->pbr_mapping->host);
+ kmem_free(btp, sizeof(*btp));
+}
+
+void
+xfs_incore_relse(
+ xfs_buftarg_t *btp,
+ int delwri_only,
+ int wait)
+{
+ invalidate_bdev(btp->pbr_bdev, 1);
+ truncate_inode_pages(btp->pbr_mapping, 0LL);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+ xfs_buftarg_t *btp,
+ unsigned int blocksize,
+ unsigned int sectorsize,
+ int verbose)
+{
+ btp->pbr_bsize = blocksize;
+ btp->pbr_sshift = ffs(sectorsize) - 1;
+ btp->pbr_smask = sectorsize - 1;
+
+ if (set_blocksize(btp->pbr_bdev, sectorsize)) {
+ printk(KERN_WARNING
+ "XFS: Cannot set_blocksize to %u on device %s\n",
+ sectorsize, XFS_BUFTARG_NAME(btp));
+ return EINVAL;
+ }
+
+ if (verbose &&
+ (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
+ printk(KERN_WARNING
+ "XFS: %u byte sectors in use on device %s. "
+ "This is suboptimal; %u or greater is ideal.\n",
+ sectorsize, XFS_BUFTARG_NAME(btp),
+ (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
+ }
+
+ return 0;
+}
+
+/*
+* When allocating the initial buffer target we have not yet
+* read in the superblock, so don't know what sized sectors
+* are being used is at this early stage. Play safe.
+*/
+STATIC int
+xfs_setsize_buftarg_early(
+ xfs_buftarg_t *btp,
+ struct block_device *bdev)
+{
+ return xfs_setsize_buftarg_flags(btp,
+ PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+ xfs_buftarg_t *btp,
+ unsigned int blocksize,
+ unsigned int sectorsize)
+{
+ return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_mapping_buftarg(
+ xfs_buftarg_t *btp,
+ struct block_device *bdev)
+{
+ struct backing_dev_info *bdi;
+ struct inode *inode;
+ struct address_space *mapping;
+ static struct address_space_operations mapping_aops = {
+ .sync_page = block_sync_page,
+ };
+
+ inode = new_inode(bdev->bd_inode->i_sb);
+ if (!inode) {
+ printk(KERN_WARNING
+ "XFS: Cannot allocate mapping inode for device %s\n",
+ XFS_BUFTARG_NAME(btp));
+ return ENOMEM;
+ }
+ inode->i_mode = S_IFBLK;
+ inode->i_bdev = bdev;
+ inode->i_rdev = bdev->bd_dev;
+ bdi = blk_get_backing_dev_info(bdev);
+ if (!bdi)
+ bdi = &default_backing_dev_info;
+ mapping = &inode->i_data;
+ mapping->a_ops = &mapping_aops;
+ mapping->backing_dev_info = bdi;
+ mapping_set_gfp_mask(mapping, GFP_NOFS);
+ btp->pbr_mapping = mapping;
+ return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+ struct block_device *bdev,
+ int external)
+{
+ xfs_buftarg_t *btp;
+
+ btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+ btp->pbr_dev = bdev->bd_dev;
+ btp->pbr_bdev = bdev;
+ if (xfs_setsize_buftarg_early(btp, bdev))
+ goto error;
+ if (xfs_mapping_buftarg(btp, bdev))
+ goto error;
+ xfs_alloc_bufhash(btp, external);
+ return btp;
+
+error:
+ kmem_free(btp, sizeof(*btp));
+ return NULL;
+}
+
+
+/*
+ * Pagebuf delayed write buffer handling
+ */
+
+STATIC LIST_HEAD(pbd_delwrite_queue);
+STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
+
+STATIC void
+pagebuf_delwri_queue(
+ xfs_buf_t *pb,
+ int unlock)
+{
+ PB_TRACE(pb, "delwri_q", (long)unlock);
+ ASSERT(pb->pb_flags & PBF_DELWRI);
+
+ spin_lock(&pbd_delwrite_lock);
+ /* If already in the queue, dequeue and place at tail */
+ if (!list_empty(&pb->pb_list)) {
+ if (unlock) {
+ atomic_dec(&pb->pb_hold);
+ }
+ list_del(&pb->pb_list);
+ }
+
+ list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+ pb->pb_queuetime = jiffies;
+ spin_unlock(&pbd_delwrite_lock);
+
+ if (unlock)
+ pagebuf_unlock(pb);
+}
+
+void
+pagebuf_delwri_dequeue(
+ xfs_buf_t *pb)
+{
+ int dequeued = 0;
+
+ spin_lock(&pbd_delwrite_lock);
+ if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+ list_del_init(&pb->pb_list);
+ dequeued = 1;
+ }
+ pb->pb_flags &= ~PBF_DELWRI;
+ spin_unlock(&pbd_delwrite_lock);
+
+ if (dequeued)
+ pagebuf_rele(pb);
+
+ PB_TRACE(pb, "delwri_dq", (long)dequeued);
+}
+
+STATIC void
+pagebuf_runall_queues(
+ struct workqueue_struct *queue)
+{
+ flush_workqueue(queue);
+}
+
+/* Defines for pagebuf daemon */
+STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
+STATIC struct task_struct *pagebuf_daemon_task;
+STATIC int pagebuf_daemon_active;
+STATIC int force_flush;
+
+
+STATIC int
+pagebuf_daemon_wakeup(
+ int priority,
+ unsigned int mask)
+{
+ force_flush = 1;
+ barrier();
+ wake_up_process(pagebuf_daemon_task);
+ return 0;
+}
+
+STATIC int
+pagebuf_daemon(
+ void *data)
+{
+ struct list_head tmp;
+ unsigned long age;
+ xfs_buftarg_t *target;
+ xfs_buf_t *pb, *n;
+
+ /* Set up the thread */
+ daemonize("xfsbufd");
+ current->flags |= PF_MEMALLOC;
+
+ pagebuf_daemon_task = current;
+ pagebuf_daemon_active = 1;
+ barrier();
+
+ INIT_LIST_HEAD(&tmp);
+ do {
+ try_to_freeze(PF_FREEZE);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+
+ age = (xfs_buf_age_centisecs * HZ) / 100;
+ spin_lock(&pbd_delwrite_lock);
+ list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+ PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+ ASSERT(pb->pb_flags & PBF_DELWRI);
+
+ if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+ if (!force_flush &&
+ time_before(jiffies,
+ pb->pb_queuetime + age)) {
+ pagebuf_unlock(pb);
+ break;
+ }
+
+ pb->pb_flags &= ~PBF_DELWRI;
+ pb->pb_flags |= PBF_WRITE;
+ list_move(&pb->pb_list, &tmp);
+ }
+ }
+ spin_unlock(&pbd_delwrite_lock);
+
+ while (!list_empty(&tmp)) {
+ pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+ target = pb->pb_target;
+
+ list_del_init(&pb->pb_list);
+ pagebuf_iostrategy(pb);
+
+ blk_run_address_space(target->pbr_mapping);
+ }
+
+ if (as_list_len > 0)
+ purge_addresses();
+
+ force_flush = 0;
+ } while (pagebuf_daemon_active);
+
+ complete_and_exit(&pagebuf_daemon_done, 0);
+}
+
+/*
+ * Go through all incore buffers, and release buffers if they belong to
+ * the given device. This is used in filesystem error handling to
+ * preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+ xfs_buftarg_t *target,
+ int wait)
+{
+ struct list_head tmp;
+ xfs_buf_t *pb, *n;
+ int pincount = 0;
+
+ pagebuf_runall_queues(pagebuf_dataio_workqueue);
+ pagebuf_runall_queues(pagebuf_logio_workqueue);
+
+ INIT_LIST_HEAD(&tmp);
+ spin_lock(&pbd_delwrite_lock);
+ list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
+
+ if (pb->pb_target != target)
+ continue;
+
+ ASSERT(pb->pb_flags & PBF_DELWRI);
+ PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
+ if (pagebuf_ispin(pb)) {
+ pincount++;
+ continue;
+ }
+
+ pb->pb_flags &= ~PBF_DELWRI;
+ pb->pb_flags |= PBF_WRITE;
+ list_move(&pb->pb_list, &tmp);
+ }
+ spin_unlock(&pbd_delwrite_lock);
+
+ /*
+ * Dropped the delayed write list lock, now walk the temporary list
+ */
+ list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+ if (wait)
+ pb->pb_flags &= ~PBF_ASYNC;
+ else
+ list_del_init(&pb->pb_list);
+
+ pagebuf_lock(pb);
+ pagebuf_iostrategy(pb);
+ }
+
+ /*
+ * Remaining list items must be flushed before returning
+ */
+ while (!list_empty(&tmp)) {
+ pb = list_entry(tmp.next, xfs_buf_t, pb_list);
+
+ list_del_init(&pb->pb_list);
+ xfs_iowait(pb);
+ xfs_buf_relse(pb);
+ }
+
+ if (wait)
+ blk_run_address_space(target->pbr_mapping);
+
+ return pincount;
+}
+
+STATIC int
+pagebuf_daemon_start(void)
+{
+ int rval;
+
+ pagebuf_logio_workqueue = create_workqueue("xfslogd");
+ if (!pagebuf_logio_workqueue)
+ return -ENOMEM;
+
+ pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+ if (!pagebuf_dataio_workqueue) {
+ destroy_workqueue(pagebuf_logio_workqueue);
+ return -ENOMEM;
+ }
+
+ rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
+ if (rval < 0) {
+ destroy_workqueue(pagebuf_logio_workqueue);
+ destroy_workqueue(pagebuf_dataio_workqueue);
+ }
+
+ return rval;
+}
+
+/*
+ * pagebuf_daemon_stop
+ *
+ * Note: do not mark as __exit, it is called from pagebuf_terminate.
+ */
+STATIC void
+pagebuf_daemon_stop(void)
+{
+ pagebuf_daemon_active = 0;
+ barrier();
+ wait_for_completion(&pagebuf_daemon_done);
+
+ destroy_workqueue(pagebuf_logio_workqueue);
+ destroy_workqueue(pagebuf_dataio_workqueue);
+}
+
+/*
+ * Initialization and Termination
+ */
+
+int __init
+pagebuf_init(void)
+{
+ pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (pagebuf_cache == NULL) {
+ printk("XFS: couldn't init xfs_buf_t cache\n");
+ pagebuf_terminate();
+ return -ENOMEM;
+ }
+
+#ifdef PAGEBUF_TRACE
+ pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+
+ pagebuf_daemon_start();
+
+ pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup);
+ if (pagebuf_shake == NULL) {
+ pagebuf_terminate();
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+
+/*
+ * pagebuf_terminate.
+ *
+ * Note: do not mark as __exit, this is also called from the __init code.
+ */
+void
+pagebuf_terminate(void)
+{
+ pagebuf_daemon_stop();
+
+#ifdef PAGEBUF_TRACE
+ ktrace_free(pagebuf_trace_buf);
+#endif
+
+ kmem_zone_destroy(pagebuf_cache);
+ kmem_shake_deregister(pagebuf_shake);
+}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
new file mode 100644
index 00000000000..74deed8e6d9
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ */
+
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ * Base types
+ */
+
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
+
+#define page_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
+#define page_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define page_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
+#define page_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum page_buf_rw_e {
+ PBRW_READ = 1, /* transfer into target memory */
+ PBRW_WRITE = 2, /* transfer from target memory */
+ PBRW_ZERO = 3 /* Zero target memory */
+} page_buf_rw_t;
+
+
+typedef enum page_buf_flags_e { /* pb_flags values */
+ PBF_READ = (1 << 0), /* buffer intended for reading from device */
+ PBF_WRITE = (1 << 1), /* buffer intended for writing to device */
+ PBF_MAPPED = (1 << 2), /* buffer mapped (pb_addr valid) */
+ PBF_PARTIAL = (1 << 3), /* buffer partially read */
+ PBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
+ PBF_NONE = (1 << 5), /* buffer not read at all */
+ PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
+ PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
+ PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
+ PBF_FS_DATAIOD = (1 << 9), /* schedule IO completion on fs datad */
+ PBF_FORCEIO = (1 << 10), /* ignore any cache state */
+ PBF_FLUSH = (1 << 11), /* flush disk write cache */
+ PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
+
+ /* flags used only as arguments to access routines */
+ PBF_LOCK = (1 << 14), /* lock requested */
+ PBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
+ PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
+
+ /* flags used only internally */
+ _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
+ _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */
+ _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
+} page_buf_flags_t;
+
+#define PBF_UPDATE (PBF_READ | PBF_WRITE)
+#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
+#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+
+typedef struct xfs_bufhash {
+ struct list_head bh_list;
+ spinlock_t bh_lock;
+} xfs_bufhash_t;
+
+typedef struct xfs_buftarg {
+ dev_t pbr_dev;
+ struct block_device *pbr_bdev;
+ struct address_space *pbr_mapping;
+ unsigned int pbr_bsize;
+ unsigned int pbr_sshift;
+ size_t pbr_smask;
+
+ /* per-device buffer hash table */
+ uint bt_hashmask;
+ uint bt_hashshift;
+ xfs_bufhash_t *bt_hash;
+} xfs_buftarg_t;
+
+/*
+ * xfs_buf_t: Buffer structure for page cache-based buffers
+ *
+ * This buffer structure is used by the page cache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer. The actual I/O
+ * is performed with buffer_head structures, as required by drivers.
+ *
+ * The buffer structure is used on temporary basis only, and discarded when
+ * released. The real data storage is recorded in the page cache. Metadata is
+ * hashed to the block device on which the file system resides.
+ */
+
+struct xfs_buf;
+
+/* call-back function on I/O completion */
+typedef void (*page_buf_iodone_t)(struct xfs_buf *);
+/* call-back function on I/O completion */
+typedef void (*page_buf_relse_t)(struct xfs_buf *);
+/* pre-write function */
+typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
+
+#define PB_PAGES 2
+
+typedef struct xfs_buf {
+ struct semaphore pb_sema; /* semaphore for lockables */
+ unsigned long pb_queuetime; /* time buffer was queued */
+ atomic_t pb_pin_count; /* pin count */
+ wait_queue_head_t pb_waiters; /* unpin waiters */
+ struct list_head pb_list;
+ page_buf_flags_t pb_flags; /* status flags */
+ struct list_head pb_hash_list; /* hash table list */
+ xfs_bufhash_t *pb_hash; /* hash table list start */
+ xfs_buftarg_t *pb_target; /* buffer target (device) */
+ atomic_t pb_hold; /* reference count */
+ xfs_daddr_t pb_bn; /* block number for I/O */
+ loff_t pb_file_offset; /* offset in file */
+ size_t pb_buffer_length; /* size of buffer in bytes */
+ size_t pb_count_desired; /* desired transfer size */
+ void *pb_addr; /* virtual address of buffer */
+ struct work_struct pb_iodone_work;
+ atomic_t pb_io_remaining;/* #outstanding I/O requests */
+ page_buf_iodone_t pb_iodone; /* I/O completion function */
+ page_buf_relse_t pb_relse; /* releasing function */
+ page_buf_bdstrat_t pb_strat; /* pre-write function */
+ struct semaphore pb_iodonesema; /* Semaphore for I/O waiters */
+ void *pb_fspriv;
+ void *pb_fspriv2;
+ void *pb_fspriv3;
+ unsigned short pb_error; /* error code on I/O */
+ unsigned short pb_locked; /* page array is locked */
+ unsigned int pb_page_count; /* size of page array */
+ unsigned int pb_offset; /* page offset in first page */
+ struct page **pb_pages; /* array of page pointers */
+ struct page *pb_page_array[PB_PAGES]; /* inline pages */
+#ifdef PAGEBUF_LOCK_TRACKING
+ int pb_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+
+extern xfs_buf_t *_pagebuf_find( /* find buffer for block if */
+ /* the block is in memory */
+ xfs_buftarg_t *, /* inode for block */
+ loff_t, /* starting offset of range */
+ size_t, /* length of range */
+ page_buf_flags_t, /* PBF_LOCK */
+ xfs_buf_t *); /* newly allocated buffer */
+
+#define xfs_incore(buftarg,blkno,len,lockit) \
+ _pagebuf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get_flags( /* allocate a buffer */
+ xfs_buftarg_t *, /* inode for buffer */
+ loff_t, /* starting offset of range */
+ size_t, /* length of range */
+ page_buf_flags_t); /* PBF_LOCK, PBF_READ, */
+ /* PBF_ASYNC */
+
+#define xfs_buf_get(target, blkno, len, flags) \
+ xfs_buf_get_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+
+extern xfs_buf_t *xfs_buf_read_flags( /* allocate and read a buffer */
+ xfs_buftarg_t *, /* inode for buffer */
+ loff_t, /* starting offset of range */
+ size_t, /* length of range */
+ page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC */
+
+#define xfs_buf_read(target, blkno, len, flags) \
+ xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
+
+extern xfs_buf_t *pagebuf_lookup(
+ xfs_buftarg_t *,
+ loff_t, /* starting offset of range */
+ size_t, /* length of range */
+ page_buf_flags_t); /* PBF_READ, PBF_WRITE, */
+ /* PBF_FORCEIO, */
+
+extern xfs_buf_t *pagebuf_get_empty( /* allocate pagebuf struct with */
+ /* no memory or disk address */
+ size_t len,
+ xfs_buftarg_t *); /* mount point "fake" inode */
+
+extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
+ /* without disk address */
+ size_t len,
+ xfs_buftarg_t *); /* mount point "fake" inode */
+
+extern int pagebuf_associate_memory(
+ xfs_buf_t *,
+ void *,
+ size_t);
+
+extern void pagebuf_hold( /* increment reference count */
+ xfs_buf_t *); /* buffer to hold */
+
+extern void pagebuf_readahead( /* read ahead into cache */
+ xfs_buftarg_t *, /* target for buffer (or NULL) */
+ loff_t, /* starting offset of range */
+ size_t, /* length of range */
+ page_buf_flags_t); /* additional read flags */
+
+/* Releasing Buffers */
+
+extern void pagebuf_free( /* deallocate a buffer */
+ xfs_buf_t *); /* buffer to deallocate */
+
+extern void pagebuf_rele( /* release hold on a buffer */
+ xfs_buf_t *); /* buffer to release */
+
+/* Locking and Unlocking Buffers */
+
+extern int pagebuf_cond_lock( /* lock buffer, if not locked */
+ /* (returns -EBUSY if locked) */
+ xfs_buf_t *); /* buffer to lock */
+
+extern int pagebuf_lock_value( /* return count on lock */
+ xfs_buf_t *); /* buffer to check */
+
+extern int pagebuf_lock( /* lock buffer */
+ xfs_buf_t *); /* buffer to lock */
+
+extern void pagebuf_unlock( /* unlock buffer */
+ xfs_buf_t *); /* buffer to unlock */
+
+/* Buffer Read and Write Routines */
+
+extern void pagebuf_iodone( /* mark buffer I/O complete */
+ xfs_buf_t *, /* buffer to mark */
+ int, /* use data/log helper thread. */
+ int); /* run completion locally, or in
+ * a helper thread. */
+
+extern void pagebuf_ioerror( /* mark buffer in error (or not) */
+ xfs_buf_t *, /* buffer to mark */
+ int); /* error to store (0 if none) */
+
+extern int pagebuf_iostart( /* start I/O on a buffer */
+ xfs_buf_t *, /* buffer to start */
+ page_buf_flags_t); /* PBF_LOCK, PBF_ASYNC, */
+ /* PBF_READ, PBF_WRITE, */
+ /* PBF_DELWRI */
+
+extern int pagebuf_iorequest( /* start real I/O */
+ xfs_buf_t *); /* buffer to convey to device */
+
+extern int pagebuf_iowait( /* wait for buffer I/O done */
+ xfs_buf_t *); /* buffer to wait on */
+
+extern void pagebuf_iomove( /* move data in/out of pagebuf */
+ xfs_buf_t *, /* buffer to manipulate */
+ size_t, /* starting buffer offset */
+ size_t, /* length in buffer */
+ caddr_t, /* data pointer */
+ page_buf_rw_t); /* direction */
+
+static inline int pagebuf_iostrategy(xfs_buf_t *pb)
+{
+ return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+}
+
+static inline int pagebuf_geterror(xfs_buf_t *pb)
+{
+ return pb ? pb->pb_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+
+extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
+ xfs_buf_t *, /* buffer to offset into */
+ size_t); /* offset */
+
+/* Pinning Buffer Storage in Memory */
+
+extern void pagebuf_pin( /* pin buffer in memory */
+ xfs_buf_t *); /* buffer to pin */
+
+extern void pagebuf_unpin( /* unpin buffered data */
+ xfs_buf_t *); /* buffer to unpin */
+
+extern int pagebuf_ispin( /* check if buffer is pinned */
+ xfs_buf_t *); /* buffer to check */
+
+/* Delayed Write Buffer Routines */
+
+extern void pagebuf_delwri_dequeue(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+
+extern int pagebuf_init(void);
+extern void pagebuf_terminate(void);
+
+
+#ifdef PAGEBUF_TRACE
+extern ktrace_t *pagebuf_trace_buf;
+extern void pagebuf_trace(
+ xfs_buf_t *, /* buffer being traced */
+ char *, /* description of operation */
+ void *, /* arbitrary diagnostic value */
+ void *); /* return address */
+#else
+# define pagebuf_trace(pb, id, ptr, ra) do { } while (0)
+#endif
+
+#define pagebuf_target_name(target) \
+ ({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+
+
+
+
+
+/* These are just for xfs_syncsub... it sets an internal variable
+ * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+ */
+#define XFS_B_ASYNC PBF_ASYNC
+#define XFS_B_DELWRI PBF_DELWRI
+#define XFS_B_READ PBF_READ
+#define XFS_B_WRITE PBF_WRITE
+#define XFS_B_STALE PBF_STALE
+
+#define XFS_BUF_TRYLOCK PBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK PBF_TRYLOCK
+#define XFS_BUF_LOCK PBF_LOCK
+#define XFS_BUF_MAPPED PBF_MAPPED
+
+#define BUF_BUSY PBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(x) ((x)->pb_flags)
+#define XFS_BUF_ZEROFLAGS(x) \
+ ((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI))
+
+#define XFS_BUF_STALE(x) ((x)->pb_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(x) ((x)->pb_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(x) ((x)->pb_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(x) do { \
+ XFS_BUF_STALE(x); \
+ pagebuf_delwri_dequeue(x); \
+ XFS_BUF_DONE(x); \
+ } while (0)
+
+#define XFS_BUF_MANAGE PBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(x) ((x)->pb_flags &= ~PBF_FS_MANAGED)
+
+#define XFS_BUF_DELAYWRITE(x) ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(x) pagebuf_delwri_dequeue(x)
+#define XFS_BUF_ISDELAYWRITE(x) ((x)->pb_flags & PBF_DELWRI)
+
+#define XFS_BUF_ERROR(x,no) pagebuf_ioerror(x,no)
+#define XFS_BUF_GETERROR(x) pagebuf_geterror(x)
+#define XFS_BUF_ISERROR(x) (pagebuf_geterror(x)?1:0)
+
+#define XFS_BUF_DONE(x) ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
+#define XFS_BUF_UNDONE(x) ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
+#define XFS_BUF_ISDONE(x) (!(PBF_NOT_DONE(x)))
+
+#define XFS_BUF_BUSY(x) ((x)->pb_flags |= PBF_FORCEIO)
+#define XFS_BUF_UNBUSY(x) ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_ISBUSY(x) (1)
+
+#define XFS_BUF_ASYNC(x) ((x)->pb_flags |= PBF_ASYNC)
+#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
+#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
+
+#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH)
+#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH)
+#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH)
+
+#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
+#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
+#define XFS_BUF_ISSHUT(x) (0)
+
+#define XFS_BUF_HOLD(x) pagebuf_hold(x)
+#define XFS_BUF_READ(x) ((x)->pb_flags |= PBF_READ)
+#define XFS_BUF_UNREAD(x) ((x)->pb_flags &= ~PBF_READ)
+#define XFS_BUF_ISREAD(x) ((x)->pb_flags & PBF_READ)
+
+#define XFS_BUF_WRITE(x) ((x)->pb_flags |= PBF_WRITE)
+#define XFS_BUF_UNWRITE(x) ((x)->pb_flags &= ~PBF_WRITE)
+#define XFS_BUF_ISWRITE(x) ((x)->pb_flags & PBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(x) (0)
+#define XFS_BUF_UNUNINITIAL(x) (0)
+
+#define XFS_BUF_BP_ISMAPPED(bp) 1
+
+#define XFS_BUF_DATAIO(x) ((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x) ((x)->pb_flags &= ~PBF_FS_DATAIOD)
+
+#define XFS_BUF_IODONE_FUNC(buf) (buf)->pb_iodone
+#define XFS_BUF_SET_IODONE_FUNC(buf, func) \
+ (buf)->pb_iodone = (func)
+#define XFS_BUF_CLR_IODONE_FUNC(buf) \
+ (buf)->pb_iodone = NULL
+#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func) \
+ (buf)->pb_strat = (func)
+#define XFS_BUF_CLR_BDSTRAT_FUNC(buf) \
+ (buf)->pb_strat = NULL
+
+#define XFS_BUF_FSPRIVATE(buf, type) \
+ ((type)(buf)->pb_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(buf, value) \
+ (buf)->pb_fspriv = (void *)(value)
+#define XFS_BUF_FSPRIVATE2(buf, type) \
+ ((type)(buf)->pb_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(buf, value) \
+ (buf)->pb_fspriv2 = (void *)(value)
+#define XFS_BUF_FSPRIVATE3(buf, type) \
+ ((type)(buf)->pb_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(buf, value) \
+ (buf)->pb_fspriv3 = (void *)(value)
+#define XFS_BUF_SET_START(buf)
+
+#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
+ (buf)->pb_relse = (value)
+
+#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->pb_addr)
+
+extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+{
+ if (bp->pb_flags & PBF_MAPPED)
+ return XFS_BUF_PTR(bp) + offset;
+ return (xfs_caddr_t) pagebuf_offset(bp, offset);
+}
+
+#define XFS_BUF_SET_PTR(bp, val, count) \
+ pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_ADDR(bp) ((bp)->pb_bn)
+#define XFS_BUF_SET_ADDR(bp, blk) \
+ ((bp)->pb_bn = (xfs_daddr_t)(blk))
+#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off) \
+ ((bp)->pb_file_offset = (off))
+#define XFS_BUF_COUNT(bp) ((bp)->pb_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt) \
+ ((bp)->pb_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp) ((bp)->pb_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt) \
+ ((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define XFS_BUF_ISPINNED(bp) pagebuf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp) pagebuf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp) (pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp) pagebuf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x) pagebuf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+
+/* setup the buffer target from a buftarg structure */
+#define XFS_BUF_SET_TARGET(bp, target) \
+ (bp)->pb_target = (target)
+#define XFS_BUF_TARGET(bp) ((bp)->pb_target)
+#define XFS_BUFTARG_NAME(target) \
+ pagebuf_target_name(target)
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
+{
+ bp->pb_fspriv3 = mp;
+ bp->pb_strat = xfs_bdstrat_cb;
+ pagebuf_delwri_dequeue(bp);
+ return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
+}
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+ if (!bp->pb_relse)
+ pagebuf_unlock(bp);
+ pagebuf_rele(bp);
+}
+
+#define xfs_bpin(bp) pagebuf_pin(bp)
+#define xfs_bunpin(bp) pagebuf_unpin(bp)
+
+#define xfs_buftrace(id, bp) \
+ pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+
+#define xfs_biodone(pb) \
+ pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+
+#define xfs_biomove(pb, off, len, data, rw) \
+ pagebuf_iomove((pb), (off), (len), (data), \
+ ((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+
+#define xfs_biozero(pb, off, len) \
+ pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+
+
+static inline int XFS_bwrite(xfs_buf_t *pb)
+{
+ int iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+ int error = 0;
+
+ if (!iowait)
+ pb->pb_flags |= _PBF_RUN_QUEUES;
+
+ pagebuf_delwri_dequeue(pb);
+ pagebuf_iostrategy(pb);
+ if (iowait) {
+ error = pagebuf_iowait(pb);
+ xfs_buf_relse(pb);
+ }
+ return error;
+}
+
+#define XFS_bdwrite(pb) \
+ pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+
+static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
+{
+ bp->pb_strat = xfs_bdstrat_cb;
+ bp->pb_fspriv3 = mp;
+
+ return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+}
+
+#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+
+#define xfs_iowait(pb) pagebuf_iowait(pb)
+
+#define xfs_baread(target, rablkno, ralen) \
+ pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+
+#define xfs_buf_get_empty(len, target) pagebuf_get_empty((len), (target))
+#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
+#define xfs_buf_free(bp) pagebuf_free(bp)
+
+
+/*
+ * Handling of buftargs.
+ */
+
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern void xfs_incore_relse(xfs_buftarg_t *, int, int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#define xfs_getsize_buftarg(buftarg) \
+ block_size((buftarg)->pbr_bdev)
+#define xfs_readonly_buftarg(buftarg) \
+ bdev_read_only((buftarg)->pbr_bdev)
+#define xfs_binval(buftarg) \
+ xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg) \
+ xfs_flush_buftarg(buftarg, 1)
+
+#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
new file mode 100644
index 00000000000..00c45849d41
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_CRED_H__
+#define __XFS_CRED_H__
+
+/*
+ * Credentials
+ */
+typedef struct cred {
+ /* EMPTY */
+} cred_t;
+
+extern struct cred *sys_cred;
+
+/* this is a hack.. (assums sys_cred is the only cred_t in the system) */
+static __inline int capable_cred(cred_t *cr, int cid)
+{
+ return (cr == sys_cred) ? 1 : capable(cid);
+}
+
+#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
new file mode 100644
index 00000000000..f372a1a5e16
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_dmapi.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+
+/*
+ * XFS encode and decodes the fileid portion of NFS filehandles
+ * itself instead of letting the generic NFS code do it. This
+ * allows filesystems with 64 bit inode numbers to be exported.
+ *
+ * Note that a side effect is that xfs_vget() won't be passed a
+ * zero inode/generation pair under normal circumstances. As
+ * however a malicious client could send us such data, the check
+ * remains in that code.
+ */
+
+
+STATIC struct dentry *
+linvfs_decode_fh(
+ struct super_block *sb,
+ __u32 *fh,
+ int fh_len,
+ int fileid_type,
+ int (*acceptable)(
+ void *context,
+ struct dentry *de),
+ void *context)
+{
+ xfs_fid2_t ifid;
+ xfs_fid2_t pfid;
+ void *parent = NULL;
+ int is64 = 0;
+ __u32 *p = fh;
+
+#if XFS_BIG_INUMS
+ is64 = (fileid_type & XFS_FILEID_TYPE_64FLAG);
+ fileid_type &= ~XFS_FILEID_TYPE_64FLAG;
+#endif
+
+ /*
+ * Note that we only accept fileids which are long enough
+ * rather than allow the parent generation number to default
+ * to zero. XFS considers zero a valid generation number not
+ * an invalid/wildcard value. There's little point printk'ing
+ * a warning here as we don't have the client information
+ * which would make such a warning useful.
+ */
+ if (fileid_type > 2 ||
+ fh_len < xfs_fileid_length((fileid_type == 2), is64))
+ return NULL;
+
+ p = xfs_fileid_decode_fid2(p, &ifid, is64);
+
+ if (fileid_type == 2) {
+ p = xfs_fileid_decode_fid2(p, &pfid, is64);
+ parent = &pfid;
+ }
+
+ fh = (__u32 *)&ifid;
+ return find_exported_dentry(sb, fh, parent, acceptable, context);
+}
+
+
+STATIC int
+linvfs_encode_fh(
+ struct dentry *dentry,
+ __u32 *fh,
+ int *max_len,
+ int connectable)
+{
+ struct inode *inode = dentry->d_inode;
+ int type = 1;
+ __u32 *p = fh;
+ int len;
+ int is64 = 0;
+#if XFS_BIG_INUMS
+ vfs_t *vfs = LINVFS_GET_VFS(inode->i_sb);
+ xfs_mount_t *mp = XFS_VFSTOM(vfs);
+
+ if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT)) {
+ /* filesystem may contain 64bit inode numbers */
+ is64 = XFS_FILEID_TYPE_64FLAG;
+ }
+#endif
+
+ /* Directories don't need their parent encoded, they have ".." */
+ if (S_ISDIR(inode->i_mode))
+ connectable = 0;
+
+ /*
+ * Only encode if there is enough space given. In practice
+ * this means we can't export a filesystem with 64bit inodes
+ * over NFSv2 with the subtree_check export option; the other
+ * seven combinations work. The real answer is "don't use v2".
+ */
+ len = xfs_fileid_length(connectable, is64);
+ if (*max_len < len)
+ return 255;
+ *max_len = len;
+
+ p = xfs_fileid_encode_inode(p, inode, is64);
+ if (connectable) {
+ spin_lock(&dentry->d_lock);
+ p = xfs_fileid_encode_inode(p, dentry->d_parent->d_inode, is64);
+ spin_unlock(&dentry->d_lock);
+ type = 2;
+ }
+ BUG_ON((p - fh) != len);
+ return type | is64;
+}
+
+STATIC struct dentry *
+linvfs_get_dentry(
+ struct super_block *sb,
+ void *data)
+{
+ vnode_t *vp;
+ struct inode *inode;
+ struct dentry *result;
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ VFS_VGET(vfsp, &vp, (fid_t *)data, error);
+ if (error || vp == NULL)
+ return ERR_PTR(-ESTALE) ;
+
+ inode = LINVFS_GET_IP(vp);
+ result = d_alloc_anon(inode);
+ if (!result) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ return result;
+}
+
+STATIC struct dentry *
+linvfs_get_parent(
+ struct dentry *child)
+{
+ int error;
+ vnode_t *vp, *cvp;
+ struct dentry *parent;
+ struct dentry dotdot;
+
+ dotdot.d_name.name = "..";
+ dotdot.d_name.len = 2;
+ dotdot.d_inode = NULL;
+
+ cvp = NULL;
+ vp = LINVFS_GET_VP(child->d_inode);
+ VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+ if (unlikely(error))
+ return ERR_PTR(-error);
+
+ parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+ if (unlikely(!parent)) {
+ VN_RELE(cvp);
+ return ERR_PTR(-ENOMEM);
+ }
+ return parent;
+}
+
+struct export_operations linvfs_export_ops = {
+ .decode_fh = linvfs_decode_fh,
+ .encode_fh = linvfs_encode_fh,
+ .get_parent = linvfs_get_parent,
+ .get_dentry = linvfs_get_dentry,
+};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
new file mode 100644
index 00000000000..60b2abac1c1
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order. There are 5 possible
+ * formats.
+ *
+ * (1) fileid_type=0x00
+ * (no fileid data; handled by the generic code)
+ *
+ * (2) fileid_type=0x01
+ * inode-num
+ * generation
+ *
+ * (3) fileid_type=0x02
+ * inode-num
+ * generation
+ * parent-inode-num
+ * parent-generation
+ *
+ * (4) fileid_type=0x81
+ * inode-num-lo32
+ * inode-num-hi32
+ * generation
+ *
+ * (5) fileid_type=0x82
+ * inode-num-lo32
+ * inode-num-hi32
+ * generation
+ * parent-inode-num-lo32
+ * parent-inode-num-hi32
+ * parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it. That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it. In
+ * practice this means when exporting an XFS filesytem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+/* This flag goes on the wire. Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */
+
+/* Calculate the length in u32 units of the fileid data */
+static inline int
+xfs_fileid_length(int hasparent, int is64)
+{
+ return hasparent ? (is64 ? 6 : 4) : (is64 ? 3 : 2);
+}
+
+/*
+ * Decode encoded inode information (either for the inode itself
+ * or the parent) into an xfs_fid2_t structure. Advances and
+ * returns the new data pointer
+ */
+static inline __u32 *
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+{
+ fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+ fid->fid_pad = 0;
+ fid->fid_ino = *p++;
+#if XFS_BIG_INUMS
+ if (is64)
+ fid->fid_ino |= (((__u64)(*p++)) << 32);
+#endif
+ fid->fid_gen = *p++;
+ return p;
+}
+
+/*
+ * Encode inode information (either for the inode itself or the
+ * parent) into a fileid buffer. Advances and returns the new
+ * data pointer.
+ */
+static inline __u32 *
+xfs_fileid_encode_inode(__u32 *p, struct inode *inode, int is64)
+{
+ *p++ = (__u32)inode->i_ino;
+#if XFS_BIG_INUMS
+ if (is64)
+ *p++ = (__u32)(inode->i_ino >> 32);
+#endif
+ *p++ = inode->i_generation;
+ return p;
+}
+
+#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
new file mode 100644
index 00000000000..9f057a4a5b0
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_trans.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_ioctl32.h"
+
+#include <linux/dcache.h>
+#include <linux/smp_lock.h>
+
+static struct vm_operations_struct linvfs_file_vm_ops;
+
+
+STATIC inline ssize_t
+__linvfs_read(
+ struct kiocb *iocb,
+ char __user *buf,
+ int ioflags,
+ size_t count,
+ loff_t pos)
+{
+ struct iovec iov = {buf, count};
+ struct file *file = iocb->ki_filp;
+ vnode_t *vp = LINVFS_GET_VP(file->f_dentry->d_inode);
+ ssize_t rval;
+
+ BUG_ON(iocb->ki_pos != pos);
+
+ if (unlikely(file->f_flags & O_DIRECT))
+ ioflags |= IO_ISDIRECT;
+ VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ return rval;
+}
+
+
+STATIC ssize_t
+linvfs_aio_read(
+ struct kiocb *iocb,
+ char __user *buf,
+ size_t count,
+ loff_t pos)
+{
+ return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
+}
+
+STATIC ssize_t
+linvfs_aio_read_invis(
+ struct kiocb *iocb,
+ char __user *buf,
+ size_t count,
+ loff_t pos)
+{
+ return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_write(
+ struct kiocb *iocb,
+ const char __user *buf,
+ int ioflags,
+ size_t count,
+ loff_t pos)
+{
+ struct iovec iov = {(void __user *)buf, count};
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ ssize_t rval;
+
+ BUG_ON(iocb->ki_pos != pos);
+ if (unlikely(file->f_flags & O_DIRECT))
+ ioflags |= IO_ISDIRECT;
+
+ VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
+ return rval;
+}
+
+
+STATIC ssize_t
+linvfs_aio_write(
+ struct kiocb *iocb,
+ const char __user *buf,
+ size_t count,
+ loff_t pos)
+{
+ return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
+}
+
+STATIC ssize_t
+linvfs_aio_write_invis(
+ struct kiocb *iocb,
+ const char __user *buf,
+ size_t count,
+ loff_t pos)
+{
+ return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_readv(
+ struct file *file,
+ const struct iovec *iov,
+ int ioflags,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ struct inode *inode = file->f_mapping->host;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ struct kiocb kiocb;
+ ssize_t rval;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
+
+ if (unlikely(file->f_flags & O_DIRECT))
+ ioflags |= IO_ISDIRECT;
+ VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+
+ *ppos = kiocb.ki_pos;
+ return rval;
+}
+
+STATIC ssize_t
+linvfs_readv(
+ struct file *file,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ return __linvfs_readv(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_readv_invis(
+ struct file *file,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ return __linvfs_readv(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+
+STATIC inline ssize_t
+__linvfs_writev(
+ struct file *file,
+ const struct iovec *iov,
+ int ioflags,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ struct inode *inode = file->f_mapping->host;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ struct kiocb kiocb;
+ ssize_t rval;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *ppos;
+ if (unlikely(file->f_flags & O_DIRECT))
+ ioflags |= IO_ISDIRECT;
+
+ VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
+
+ *ppos = kiocb.ki_pos;
+ return rval;
+}
+
+
+STATIC ssize_t
+linvfs_writev(
+ struct file *file,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ return __linvfs_writev(file, iov, 0, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_writev_invis(
+ struct file *file,
+ const struct iovec *iov,
+ unsigned long nr_segs,
+ loff_t *ppos)
+{
+ return __linvfs_writev(file, iov, IO_INVIS, nr_segs, ppos);
+}
+
+STATIC ssize_t
+linvfs_sendfile(
+ struct file *filp,
+ loff_t *ppos,
+ size_t count,
+ read_actor_t actor,
+ void *target)
+{
+ vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+ ssize_t rval;
+
+ VOP_SENDFILE(vp, filp, ppos, 0, count, actor, target, NULL, rval);
+ return rval;
+}
+
+
+STATIC int
+linvfs_open(
+ struct inode *inode,
+ struct file *filp)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+
+ if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+ return -EFBIG;
+
+ ASSERT(vp);
+ VOP_OPEN(vp, NULL, error);
+ return -error;
+}
+
+
+STATIC int
+linvfs_release(
+ struct inode *inode,
+ struct file *filp)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error = 0;
+
+ if (vp)
+ VOP_RELEASE(vp, error);
+ return -error;
+}
+
+
+STATIC int
+linvfs_fsync(
+ struct file *filp,
+ struct dentry *dentry,
+ int datasync)
+{
+ struct inode *inode = dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+ int flags = FSYNC_WAIT;
+
+ if (datasync)
+ flags |= FSYNC_DATA;
+
+ ASSERT(vp);
+ VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
+ return -error;
+}
+
+/*
+ * linvfs_readdir maps to VOP_READDIR().
+ * We need to build a uio, cred, ...
+ */
+
+#define nextdp(dp) ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
+
+STATIC int
+linvfs_readdir(
+ struct file *filp,
+ void *dirent,
+ filldir_t filldir)
+{
+ int error = 0;
+ vnode_t *vp;
+ uio_t uio;
+ iovec_t iov;
+ int eof = 0;
+ caddr_t read_buf;
+ int namelen, size = 0;
+ size_t rlen = PAGE_CACHE_SIZE;
+ xfs_off_t start_offset, curr_offset;
+ xfs_dirent_t *dbp = NULL;
+
+ vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
+ ASSERT(vp);
+
+ /* Try fairly hard to get memory */
+ do {
+ if ((read_buf = (caddr_t)kmalloc(rlen, GFP_KERNEL)))
+ break;
+ rlen >>= 1;
+ } while (rlen >= 1024);
+
+ if (read_buf == NULL)
+ return -ENOMEM;
+
+ uio.uio_iov = &iov;
+ uio.uio_segflg = UIO_SYSSPACE;
+ curr_offset = filp->f_pos;
+ if (filp->f_pos != 0x7fffffff)
+ uio.uio_offset = filp->f_pos;
+ else
+ uio.uio_offset = 0xffffffff;
+
+ while (!eof) {
+ uio.uio_resid = iov.iov_len = rlen;
+ iov.iov_base = read_buf;
+ uio.uio_iovcnt = 1;
+
+ start_offset = uio.uio_offset;
+
+ VOP_READDIR(vp, &uio, NULL, &eof, error);
+ if ((uio.uio_offset == start_offset) || error) {
+ size = 0;
+ break;
+ }
+
+ size = rlen - uio.uio_resid;
+ dbp = (xfs_dirent_t *)read_buf;
+ while (size > 0) {
+ namelen = strlen(dbp->d_name);
+
+ if (filldir(dirent, dbp->d_name, namelen,
+ (loff_t) curr_offset & 0x7fffffff,
+ (ino_t) dbp->d_ino,
+ DT_UNKNOWN)) {
+ goto done;
+ }
+ size -= dbp->d_reclen;
+ curr_offset = (loff_t)dbp->d_off /* & 0x7fffffff */;
+ dbp = nextdp(dbp);
+ }
+ }
+done:
+ if (!error) {
+ if (size == 0)
+ filp->f_pos = uio.uio_offset & 0x7fffffff;
+ else if (dbp)
+ filp->f_pos = curr_offset;
+ }
+
+ kfree(read_buf);
+ return -error;
+}
+
+
+STATIC int
+linvfs_file_mmap(
+ struct file *filp,
+ struct vm_area_struct *vma)
+{
+ struct inode *ip = filp->f_dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(ip);
+ vattr_t va = { .va_mask = XFS_AT_UPDATIME };
+ int error;
+
+ if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+ xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
+
+ error = -XFS_SEND_MMAP(mp, vma, 0);
+ if (error)
+ return error;
+ }
+
+ vma->vm_ops = &linvfs_file_vm_ops;
+
+ VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
+ if (!error)
+ vn_revalidate(vp); /* update Linux inode flags */
+ return 0;
+}
+
+
+STATIC long
+linvfs_ioctl(
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ int error;
+ struct inode *inode = filp->f_dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+
+ VOP_IOCTL(vp, inode, filp, 0, cmd, (void __user *)arg, error);
+ VMODIFY(vp);
+
+ /* NOTE: some of the ioctl's return positive #'s as a
+ * byte count indicating success, such as
+ * readlink_by_handle. So we don't "sign flip"
+ * like most other routines. This means true
+ * errors need to be returned as a negative value.
+ */
+ return error;
+}
+
+STATIC long
+linvfs_ioctl_invis(
+ struct file *filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ int error;
+ struct inode *inode = filp->f_dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+
+ ASSERT(vp);
+ VOP_IOCTL(vp, inode, filp, IO_INVIS, cmd, (void __user *)arg, error);
+ VMODIFY(vp);
+
+ /* NOTE: some of the ioctl's return positive #'s as a
+ * byte count indicating success, such as
+ * readlink_by_handle. So we don't "sign flip"
+ * like most other routines. This means true
+ * errors need to be returned as a negative value.
+ */
+ return error;
+}
+
+#ifdef HAVE_VMOP_MPROTECT
+STATIC int
+linvfs_mprotect(
+ struct vm_area_struct *vma,
+ unsigned int newflags)
+{
+ vnode_t *vp = LINVFS_GET_VP(vma->vm_file->f_dentry->d_inode);
+ int error = 0;
+
+ if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+ if ((vma->vm_flags & VM_MAYSHARE) &&
+ (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) {
+ xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
+
+ error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
+ }
+ }
+ return error;
+}
+#endif /* HAVE_VMOP_MPROTECT */
+
+#ifdef HAVE_FOP_OPEN_EXEC
+/* If the user is attempting to execute a file that is offline then
+ * we have to trigger a DMAPI READ event before the file is marked as busy
+ * otherwise the invisible I/O will not be able to write to the file to bring
+ * it back online.
+ */
+STATIC int
+linvfs_open_exec(
+ struct inode *inode)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ xfs_mount_t *mp = XFS_VFSTOM(vp->v_vfsp);
+ int error = 0;
+ bhv_desc_t *bdp;
+ xfs_inode_t *ip;
+
+ if (vp->v_vfsp->vfs_flag & VFS_DMI) {
+ bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
+ if (!bdp) {
+ error = -EINVAL;
+ goto open_exec_out;
+ }
+ ip = XFS_BHVTOI(bdp);
+ if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)) {
+ error = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp,
+ 0, 0, 0, NULL);
+ }
+ }
+open_exec_out:
+ return error;
+}
+#endif /* HAVE_FOP_OPEN_EXEC */
+
+struct file_operations linvfs_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = linvfs_readv,
+ .writev = linvfs_writev,
+ .aio_read = linvfs_aio_read,
+ .aio_write = linvfs_aio_write,
+ .sendfile = linvfs_sendfile,
+ .unlocked_ioctl = linvfs_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = xfs_compat_ioctl,
+#endif
+ .mmap = linvfs_file_mmap,
+ .open = linvfs_open,
+ .release = linvfs_release,
+ .fsync = linvfs_fsync,
+#ifdef HAVE_FOP_OPEN_EXEC
+ .open_exec = linvfs_open_exec,
+#endif
+};
+
+struct file_operations linvfs_invis_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = linvfs_readv_invis,
+ .writev = linvfs_writev_invis,
+ .aio_read = linvfs_aio_read_invis,
+ .aio_write = linvfs_aio_write_invis,
+ .sendfile = linvfs_sendfile,
+ .unlocked_ioctl = linvfs_ioctl_invis,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = xfs_compat_invis_ioctl,
+#endif
+ .mmap = linvfs_file_mmap,
+ .open = linvfs_open,
+ .release = linvfs_release,
+ .fsync = linvfs_fsync,
+};
+
+
+struct file_operations linvfs_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = linvfs_readdir,
+ .unlocked_ioctl = linvfs_ioctl,
+ .fsync = linvfs_fsync,
+};
+
+static struct vm_operations_struct linvfs_file_vm_ops = {
+ .nopage = filemap_nopage,
+ .populate = filemap_populate,
+#ifdef HAVE_VMOP_MPROTECT
+ .mprotect = linvfs_mprotect,
+#endif
+};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
new file mode 100644
index 00000000000..05ebd30ec96
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+/*
+ * Stub for no-op vnode operations that return error status.
+ */
+int
+fs_noerr(void)
+{
+ return 0;
+}
+
+/*
+ * Operation unsupported under this file system.
+ */
+int
+fs_nosys(void)
+{
+ return ENOSYS;
+}
+
+/*
+ * Stub for inactive, strategy, and read/write lock/unlock. Does nothing.
+ */
+/* ARGSUSED */
+void
+fs_noval(void)
+{
+}
+
+/*
+ * vnode pcache layer for vnode_tosspages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_tosspages(
+ bhv_desc_t *bdp,
+ xfs_off_t first,
+ xfs_off_t last,
+ int fiopt)
+{
+ vnode_t *vp = BHV_TO_VNODE(bdp);
+ struct inode *ip = LINVFS_GET_IP(vp);
+
+ if (VN_CACHED(vp))
+ truncate_inode_pages(ip->i_mapping, first);
+}
+
+
+/*
+ * vnode pcache layer for vnode_flushinval_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+void
+fs_flushinval_pages(
+ bhv_desc_t *bdp,
+ xfs_off_t first,
+ xfs_off_t last,
+ int fiopt)
+{
+ vnode_t *vp = BHV_TO_VNODE(bdp);
+ struct inode *ip = LINVFS_GET_IP(vp);
+
+ if (VN_CACHED(vp)) {
+ filemap_fdatawrite(ip->i_mapping);
+ filemap_fdatawait(ip->i_mapping);
+
+ truncate_inode_pages(ip->i_mapping, first);
+ }
+}
+
+/*
+ * vnode pcache layer for vnode_flush_pages.
+ * 'last' parameter unused but left in for IRIX compatibility
+ */
+int
+fs_flush_pages(
+ bhv_desc_t *bdp,
+ xfs_off_t first,
+ xfs_off_t last,
+ uint64_t flags,
+ int fiopt)
+{
+ vnode_t *vp = BHV_TO_VNODE(bdp);
+ struct inode *ip = LINVFS_GET_IP(vp);
+
+ if (VN_CACHED(vp)) {
+ filemap_fdatawrite(ip->i_mapping);
+ filemap_fdatawait(ip->i_mapping);
+ }
+
+ return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
new file mode 100644
index 00000000000..2db9ddbd456
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000, 2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUBR_H__
+#define __XFS_SUBR_H__
+
+/*
+ * Utilities shared among file system implementations.
+ */
+
+struct cred;
+
+extern int fs_noerr(void);
+extern int fs_nosys(void);
+extern void fs_noval(void);
+extern void fs_tosspages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern void fs_flushinval_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+extern int fs_flush_pages(bhv_desc_t *, xfs_off_t, xfs_off_t, uint64_t, int);
+
+#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
new file mode 100644
index 00000000000..a6da5b4fd24
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * This file contains globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+#include "xfs.h"
+#include "xfs_cred.h"
+#include "xfs_sysctl.h"
+
+/*
+ * System memory size - used to scale certain data structures in XFS.
+ */
+unsigned long xfs_physmem;
+
+/*
+ * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values. Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+ /* MIN DFLT MAX */
+ .restrict_chown = { 0, 1, 1 },
+ .sgid_inherit = { 0, 0, 1 },
+ .symlink_mode = { 0, 0, 1 },
+ .panic_mask = { 0, 0, 127 },
+ .error_level = { 0, 3, 11 },
+ .syncd_timer = { 1*100, 30*100, 7200*100},
+ .stats_clear = { 0, 0, 1 },
+ .inherit_sync = { 0, 1, 1 },
+ .inherit_nodump = { 0, 1, 1 },
+ .inherit_noatim = { 0, 1, 1 },
+ .xfs_buf_timer = { 100/2, 1*100, 30*100 },
+ .xfs_buf_age = { 1*100, 15*100, 7200*100},
+ .inherit_nosym = { 0, 0, 1 },
+ .rotorstep = { 1, 1, 255 },
+};
+
+/*
+ * Global system credential structure.
+ */
+cred_t sys_cred_val, *sys_cred = &sys_cred_val;
+
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
new file mode 100644
index 00000000000..e81e2f38a85
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_GLOBALS_H__
+#define __XFS_GLOBALS_H__
+
+/*
+ * This file declares globals needed by XFS that were normally defined
+ * somewhere else in IRIX.
+ */
+
+extern uint64_t xfs_panic_mask; /* set to cause more panics */
+extern unsigned long xfs_physmem;
+extern struct cred *sys_cred;
+
+#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
new file mode 100644
index 00000000000..69809eef8a5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -0,0 +1,1336 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ * returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ * returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ * returns full handle for a path
+ */
+STATIC int
+xfs_find_handle(
+ unsigned int cmd,
+ void __user *arg)
+{
+ int hsize;
+ xfs_handle_t handle;
+ xfs_fsop_handlereq_t hreq;
+ struct inode *inode;
+ struct vnode *vp;
+
+ if (copy_from_user(&hreq, arg, sizeof(hreq)))
+ return -XFS_ERROR(EFAULT);
+
+ memset((char *)&handle, 0, sizeof(handle));
+
+ switch (cmd) {
+ case XFS_IOC_PATH_TO_FSHANDLE:
+ case XFS_IOC_PATH_TO_HANDLE: {
+ struct nameidata nd;
+ int error;
+
+ error = user_path_walk_link((const char __user *)hreq.path, &nd);
+ if (error)
+ return error;
+
+ ASSERT(nd.dentry);
+ ASSERT(nd.dentry->d_inode);
+ inode = igrab(nd.dentry->d_inode);
+ path_release(&nd);
+ break;
+ }
+
+ case XFS_IOC_FD_TO_HANDLE: {
+ struct file *file;
+
+ file = fget(hreq.fd);
+ if (!file)
+ return -EBADF;
+
+ ASSERT(file->f_dentry);
+ ASSERT(file->f_dentry->d_inode);
+ inode = igrab(file->f_dentry->d_inode);
+ fput(file);
+ break;
+ }
+
+ default:
+ ASSERT(0);
+ return -XFS_ERROR(EINVAL);
+ }
+
+ if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
+ /* we're not in XFS anymore, Toto */
+ iput(inode);
+ return -XFS_ERROR(EINVAL);
+ }
+
+ /* we need the vnode */
+ vp = LINVFS_GET_VP(inode);
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+ iput(inode);
+ return -XFS_ERROR(EBADF);
+ }
+
+ /* now we can grab the fsid */
+ memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
+ hsize = sizeof(xfs_fsid_t);
+
+ if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
+ xfs_inode_t *ip;
+ bhv_desc_t *bhv;
+ int lock_mode;
+
+ /* need to get access to the xfs_inode to read the generation */
+ bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops);
+ ASSERT(bhv);
+ ip = XFS_BHVTOI(bhv);
+ ASSERT(ip);
+ lock_mode = xfs_ilock_map_shared(ip);
+
+ /* fill in fid section of handle from inode */
+ handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
+ sizeof(handle.ha_fid.xfs_fid_len);
+ handle.ha_fid.xfs_fid_pad = 0;
+ handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
+ handle.ha_fid.xfs_fid_ino = ip->i_ino;
+
+ xfs_iunlock_map_shared(ip, lock_mode);
+
+ hsize = XFS_HSIZE(handle);
+ }
+
+ /* now copy our handle into the user buffer & write out the size */
+ if (copy_to_user(hreq.ohandle, &handle, hsize) ||
+ copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
+ iput(inode);
+ return -XFS_ERROR(EFAULT);
+ }
+
+ iput(inode);
+ return 0;
+}
+
+
+/*
+ * Convert userspace handle data into vnode (and inode).
+ * We [ab]use the fact that all the fsop_handlereq ioctl calls
+ * have a data structure argument whose first component is always
+ * a xfs_fsop_handlereq_t, so we can cast to and from this type.
+ * This allows us to optimise the copy_from_user calls and gives
+ * a handy, shared routine.
+ *
+ * If no error, caller must always VN_RELE the returned vp.
+ */
+STATIC int
+xfs_vget_fsop_handlereq(
+ xfs_mount_t *mp,
+ struct inode *parinode, /* parent inode pointer */
+ xfs_fsop_handlereq_t *hreq,
+ vnode_t **vp,
+ struct inode **inode)
+{
+ void __user *hanp;
+ size_t hlen;
+ xfs_fid_t *xfid;
+ xfs_handle_t *handlep;
+ xfs_handle_t handle;
+ xfs_inode_t *ip;
+ struct inode *inodep;
+ vnode_t *vpp;
+ xfs_ino_t ino;
+ __u32 igen;
+ int error;
+
+ /*
+ * Only allow handle opens under a directory.
+ */
+ if (!S_ISDIR(parinode->i_mode))
+ return XFS_ERROR(ENOTDIR);
+
+ hanp = hreq->ihandle;
+ hlen = hreq->ihandlen;
+ handlep = &handle;
+
+ if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
+ return XFS_ERROR(EINVAL);
+ if (copy_from_user(handlep, hanp, hlen))
+ return XFS_ERROR(EFAULT);
+ if (hlen < sizeof(*handlep))
+ memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
+ if (hlen > sizeof(handlep->ha_fsid)) {
+ if (handlep->ha_fid.xfs_fid_len !=
+ (hlen - sizeof(handlep->ha_fsid)
+ - sizeof(handlep->ha_fid.xfs_fid_len))
+ || handlep->ha_fid.xfs_fid_pad)
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * Crack the handle, obtain the inode # & generation #
+ */
+ xfid = (struct xfs_fid *)&handlep->ha_fid;
+ if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
+ ino = xfid->xfs_fid_ino;
+ igen = xfid->xfs_fid_gen;
+ } else {
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
+ * Get the XFS inode, building a vnode to go with it.
+ */
+ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
+ if (error)
+ return error;
+ if (ip == NULL)
+ return XFS_ERROR(EIO);
+ if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+ xfs_iput_new(ip, XFS_ILOCK_SHARED);
+ return XFS_ERROR(ENOENT);
+ }
+
+ vpp = XFS_ITOV(ip);
+ inodep = LINVFS_GET_IP(vpp);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ *vp = vpp;
+ *inode = inodep;
+ return 0;
+}
+
+STATIC int
+xfs_open_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct file *parfilp,
+ struct inode *parinode)
+{
+ int error;
+ int new_fd;
+ int permflag;
+ struct file *filp;
+ struct inode *inode;
+ struct dentry *dentry;
+ vnode_t *vp;
+ xfs_fsop_handlereq_t hreq;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+ if (error)
+ return -error;
+
+ /* Restrict xfs_open_by_handle to directories & regular files. */
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+ iput(inode);
+ return -XFS_ERROR(EINVAL);
+ }
+
+#if BITS_PER_LONG != 32
+ hreq.oflags |= O_LARGEFILE;
+#endif
+ /* Put open permission in namei format. */
+ permflag = hreq.oflags;
+ if ((permflag+1) & O_ACCMODE)
+ permflag++;
+ if (permflag & O_TRUNC)
+ permflag |= 2;
+
+ if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+ (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+ iput(inode);
+ return -XFS_ERROR(EPERM);
+ }
+
+ if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+ iput(inode);
+ return -XFS_ERROR(EACCES);
+ }
+
+ /* Can't write directories. */
+ if ( S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+ iput(inode);
+ return -XFS_ERROR(EISDIR);
+ }
+
+ if ((new_fd = get_unused_fd()) < 0) {
+ iput(inode);
+ return new_fd;
+ }
+
+ dentry = d_alloc_anon(inode);
+ if (dentry == NULL) {
+ iput(inode);
+ put_unused_fd(new_fd);
+ return -XFS_ERROR(ENOMEM);
+ }
+
+ /* Ensure umount returns EBUSY on umounts while this file is open. */
+ mntget(parfilp->f_vfsmnt);
+
+ /* Create file pointer. */
+ filp = dentry_open(dentry, parfilp->f_vfsmnt, hreq.oflags);
+ if (IS_ERR(filp)) {
+ put_unused_fd(new_fd);
+ return -XFS_ERROR(-PTR_ERR(filp));
+ }
+ if (inode->i_mode & S_IFREG)
+ filp->f_op = &linvfs_invis_file_operations;
+
+ fd_install(new_fd, filp);
+ return new_fd;
+}
+
+STATIC int
+xfs_readlink_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct file *parfilp,
+ struct inode *parinode)
+{
+ int error;
+ struct iovec aiov;
+ struct uio auio;
+ struct inode *inode;
+ xfs_fsop_handlereq_t hreq;
+ vnode_t *vp;
+ __u32 olen;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &vp, &inode);
+ if (error)
+ return -error;
+
+ /* Restrict this handle operation to symlinks only. */
+ if (vp->v_type != VLNK) {
+ VN_RELE(vp);
+ return -XFS_ERROR(EINVAL);
+ }
+
+ if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) {
+ VN_RELE(vp);
+ return -XFS_ERROR(EFAULT);
+ }
+ aiov.iov_len = olen;
+ aiov.iov_base = hreq.ohandle;
+
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_resid = olen;
+
+ VOP_READLINK(vp, &auio, IO_INVIS, NULL, error);
+
+ VN_RELE(vp);
+ return (olen - auio.uio_resid);
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct file *parfilp,
+ struct inode *parinode)
+{
+ int error;
+ struct fsdmidata fsd;
+ xfs_fsop_setdm_handlereq_t dmhreq;
+ struct inode *inode;
+ bhv_desc_t *bdp;
+ vnode_t *vp;
+
+ if (!capable(CAP_MKNOD))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq(mp, parinode, &dmhreq.hreq, &vp, &inode);
+ if (error)
+ return -error;
+
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
+ VN_RELE(vp);
+ return -XFS_ERROR(EPERM);
+ }
+
+ if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+ VN_RELE(vp);
+ return -XFS_ERROR(EFAULT);
+ }
+
+ bdp = bhv_base_unlocked(VN_BHV_HEAD(vp));
+ error = xfs_set_dmattrs(bdp, fsd.fsd_dmevmask, fsd.fsd_dmstate, NULL);
+
+ VN_RELE(vp);
+ if (error)
+ return -error;
+ return 0;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct file *parfilp,
+ struct inode *parinode)
+{
+ int error;
+ attrlist_cursor_kern_t *cursor;
+ xfs_fsop_attrlist_handlereq_t al_hreq;
+ struct inode *inode;
+ vnode_t *vp;
+ char *kbuf;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+ if (al_hreq.buflen > XATTR_LIST_MAX)
+ return -XFS_ERROR(EINVAL);
+
+ error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq,
+ &vp, &inode);
+ if (error)
+ goto out;
+
+ kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+ if (!kbuf)
+ goto out_vn_rele;
+
+ cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+ VOP_ATTR_LIST(vp, kbuf, al_hreq.buflen, al_hreq.flags,
+ cursor, NULL, error);
+ if (error)
+ goto out_kfree;
+
+ if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+ error = -EFAULT;
+
+ out_kfree:
+ kfree(kbuf);
+ out_vn_rele:
+ VN_RELE(vp);
+ out:
+ return -error;
+}
+
+STATIC int
+xfs_attrmulti_attr_get(
+ struct vnode *vp,
+ char *name,
+ char __user *ubuf,
+ __uint32_t *len,
+ __uint32_t flags)
+{
+ char *kbuf;
+ int error = EFAULT;
+
+ if (*len > XATTR_SIZE_MAX)
+ return EINVAL;
+ kbuf = kmalloc(*len, GFP_KERNEL);
+ if (!kbuf)
+ return ENOMEM;
+
+ VOP_ATTR_GET(vp, name, kbuf, len, flags, NULL, error);
+ if (error)
+ goto out_kfree;
+
+ if (copy_to_user(ubuf, kbuf, *len))
+ error = EFAULT;
+
+ out_kfree:
+ kfree(kbuf);
+ return error;
+}
+
+STATIC int
+xfs_attrmulti_attr_set(
+ struct vnode *vp,
+ char *name,
+ const char __user *ubuf,
+ __uint32_t len,
+ __uint32_t flags)
+{
+ char *kbuf;
+ int error = EFAULT;
+
+ if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+ return EPERM;
+ if (len > XATTR_SIZE_MAX)
+ return EINVAL;
+
+ kbuf = kmalloc(len, GFP_KERNEL);
+ if (!kbuf)
+ return ENOMEM;
+
+ if (copy_from_user(kbuf, ubuf, len))
+ goto out_kfree;
+
+ VOP_ATTR_SET(vp, name, kbuf, len, flags, NULL, error);
+
+ out_kfree:
+ kfree(kbuf);
+ return error;
+}
+
+STATIC int
+xfs_attrmulti_attr_remove(
+ struct vnode *vp,
+ char *name,
+ __uint32_t flags)
+{
+ int error;
+
+ if (IS_IMMUTABLE(&vp->v_inode) || IS_APPEND(&vp->v_inode))
+ return EPERM;
+
+ VOP_ATTR_REMOVE(vp, name, flags, NULL, error);
+ return error;
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+ xfs_mount_t *mp,
+ void __user *arg,
+ struct file *parfilp,
+ struct inode *parinode)
+{
+ int error;
+ xfs_attr_multiop_t *ops;
+ xfs_fsop_attrmulti_handlereq_t am_hreq;
+ struct inode *inode;
+ vnode_t *vp;
+ unsigned int i, size;
+ char *attr_name;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -XFS_ERROR(EPERM);
+ if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_vget_fsop_handlereq(mp, parinode, &am_hreq.hreq, &vp, &inode);
+ if (error)
+ goto out;
+
+ error = E2BIG;
+ size = am_hreq.opcount * sizeof(attr_multiop_t);
+ if (!size || size > 16 * PAGE_SIZE)
+ goto out_vn_rele;
+
+ error = ENOMEM;
+ ops = kmalloc(size, GFP_KERNEL);
+ if (!ops)
+ goto out_vn_rele;
+
+ error = EFAULT;
+ if (copy_from_user(ops, am_hreq.ops, size))
+ goto out_kfree_ops;
+
+ attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+ if (!attr_name)
+ goto out_kfree_ops;
+
+
+ error = 0;
+ for (i = 0; i < am_hreq.opcount; i++) {
+ ops[i].am_error = strncpy_from_user(attr_name,
+ ops[i].am_attrname, MAXNAMELEN);
+ if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+ error = -ERANGE;
+ if (ops[i].am_error < 0)
+ break;
+
+ switch (ops[i].am_opcode) {
+ case ATTR_OP_GET:
+ ops[i].am_error = xfs_attrmulti_attr_get(vp,
+ attr_name, ops[i].am_attrvalue,
+ &ops[i].am_length, ops[i].am_flags);
+ break;
+ case ATTR_OP_SET:
+ ops[i].am_error = xfs_attrmulti_attr_set(vp,
+ attr_name, ops[i].am_attrvalue,
+ ops[i].am_length, ops[i].am_flags);
+ break;
+ case ATTR_OP_REMOVE:
+ ops[i].am_error = xfs_attrmulti_attr_remove(vp,
+ attr_name, ops[i].am_flags);
+ break;
+ default:
+ ops[i].am_error = EINVAL;
+ }
+ }
+
+ if (copy_to_user(am_hreq.ops, ops, size))
+ error = XFS_ERROR(EFAULT);
+
+ kfree(attr_name);
+ out_kfree_ops:
+ kfree(ops);
+ out_vn_rele:
+ VN_RELE(vp);
+ out:
+ return -error;
+}
+
+/* prototypes for a few of the stack-hungry cases that have
+ * their own functions. Functions are defined after their use
+ * so gcc doesn't get fancy and inline them with -03 */
+
+STATIC int
+xfs_ioc_space(
+ bhv_desc_t *bdp,
+ vnode_t *vp,
+ struct file *filp,
+ int flags,
+ unsigned int cmd,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_bulkstat(
+ xfs_mount_t *mp,
+ unsigned int cmd,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+ xfs_mount_t *mp,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_fsgeometry(
+ xfs_mount_t *mp,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_xattr(
+ vnode_t *vp,
+ xfs_inode_t *ip,
+ struct file *filp,
+ unsigned int cmd,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_getbmap(
+ bhv_desc_t *bdp,
+ struct file *filp,
+ int flags,
+ unsigned int cmd,
+ void __user *arg);
+
+STATIC int
+xfs_ioc_getbmapx(
+ bhv_desc_t *bdp,
+ void __user *arg);
+
+int
+xfs_ioctl(
+ bhv_desc_t *bdp,
+ struct inode *inode,
+ struct file *filp,
+ int ioflags,
+ unsigned int cmd,
+ void __user *arg)
+{
+ int error;
+ vnode_t *vp;
+ xfs_inode_t *ip;
+ xfs_mount_t *mp;
+
+ vp = LINVFS_GET_VP(inode);
+
+ vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address);
+
+ ip = XFS_BHVTOI(bdp);
+ mp = ip->i_mount;
+
+ switch (cmd) {
+
+ case XFS_IOC_ALLOCSP:
+ case XFS_IOC_FREESP:
+ case XFS_IOC_RESVSP:
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_ALLOCSP64:
+ case XFS_IOC_FREESP64:
+ case XFS_IOC_RESVSP64:
+ case XFS_IOC_UNRESVSP64:
+ /*
+ * Only allow the sys admin to reserve space unless
+ * unwritten extents are enabled.
+ */
+ if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg);
+
+ case XFS_IOC_DIOINFO: {
+ struct dioattr da;
+ xfs_buftarg_t *target =
+ (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ da.d_mem = da.d_miniosz = 1 << target->pbr_sshift;
+ /* The size dio will do in one go */
+ da.d_maxiosz = 64 * PAGE_CACHE_SIZE;
+
+ if (copy_to_user(arg, &da, sizeof(da)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_FSBULKSTAT_SINGLE:
+ case XFS_IOC_FSBULKSTAT:
+ case XFS_IOC_FSINUMBERS:
+ return xfs_ioc_bulkstat(mp, cmd, arg);
+
+ case XFS_IOC_FSGEOMETRY_V1:
+ return xfs_ioc_fsgeometry_v1(mp, arg);
+
+ case XFS_IOC_FSGEOMETRY:
+ return xfs_ioc_fsgeometry(mp, arg);
+
+ case XFS_IOC_GETVERSION:
+ case XFS_IOC_GETXFLAGS:
+ case XFS_IOC_SETXFLAGS:
+ case XFS_IOC_FSGETXATTR:
+ case XFS_IOC_FSSETXATTR:
+ case XFS_IOC_FSGETXATTRA:
+ return xfs_ioc_xattr(vp, ip, filp, cmd, arg);
+
+ case XFS_IOC_FSSETDM: {
+ struct fsdmidata dmi;
+
+ if (copy_from_user(&dmi, arg, sizeof(dmi)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate,
+ NULL);
+ return -error;
+ }
+
+ case XFS_IOC_GETBMAP:
+ case XFS_IOC_GETBMAPA:
+ return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
+
+ case XFS_IOC_GETBMAPX:
+ return xfs_ioc_getbmapx(bdp, arg);
+
+ case XFS_IOC_FD_TO_HANDLE:
+ case XFS_IOC_PATH_TO_HANDLE:
+ case XFS_IOC_PATH_TO_FSHANDLE:
+ return xfs_find_handle(cmd, arg);
+
+ case XFS_IOC_OPEN_BY_HANDLE:
+ return xfs_open_by_handle(mp, arg, filp, inode);
+
+ case XFS_IOC_FSSETDM_BY_HANDLE:
+ return xfs_fssetdm_by_handle(mp, arg, filp, inode);
+
+ case XFS_IOC_READLINK_BY_HANDLE:
+ return xfs_readlink_by_handle(mp, arg, filp, inode);
+
+ case XFS_IOC_ATTRLIST_BY_HANDLE:
+ return xfs_attrlist_by_handle(mp, arg, filp, inode);
+
+ case XFS_IOC_ATTRMULTI_BY_HANDLE:
+ return xfs_attrmulti_by_handle(mp, arg, filp, inode);
+
+ case XFS_IOC_SWAPEXT: {
+ error = xfs_swapext((struct xfs_swapext __user *)arg);
+ return -error;
+ }
+
+ case XFS_IOC_FSCOUNTS: {
+ xfs_fsop_counts_t out;
+
+ error = xfs_fs_counts(mp, &out);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &out, sizeof(out)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_SET_RESBLKS: {
+ xfs_fsop_resblks_t inout;
+ __uint64_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&inout, arg, sizeof(inout)))
+ return -XFS_ERROR(EFAULT);
+
+ /* input parameter is passed in resblks field of structure */
+ in = inout.resblks;
+ error = xfs_reserve_blocks(mp, &in, &inout);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &inout, sizeof(inout)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_GET_RESBLKS: {
+ xfs_fsop_resblks_t out;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ error = xfs_reserve_blocks(mp, NULL, &out);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &out, sizeof(out)))
+ return -XFS_ERROR(EFAULT);
+
+ return 0;
+ }
+
+ case XFS_IOC_FSGROWFSDATA: {
+ xfs_growfs_data_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&in, arg, sizeof(in)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_growfs_data(mp, &in);
+ return -error;
+ }
+
+ case XFS_IOC_FSGROWFSLOG: {
+ xfs_growfs_log_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&in, arg, sizeof(in)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_growfs_log(mp, &in);
+ return -error;
+ }
+
+ case XFS_IOC_FSGROWFSRT: {
+ xfs_growfs_rt_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&in, arg, sizeof(in)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_growfs_rt(mp, &in);
+ return -error;
+ }
+
+ case XFS_IOC_FREEZE:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (inode->i_sb->s_frozen == SB_UNFROZEN)
+ freeze_bdev(inode->i_sb->s_bdev);
+ return 0;
+
+ case XFS_IOC_THAW:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (inode->i_sb->s_frozen != SB_UNFROZEN)
+ thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+ return 0;
+
+ case XFS_IOC_GOINGDOWN: {
+ __uint32_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(in, (__uint32_t __user *)arg))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_fs_goingdown(mp, in);
+ return -error;
+ }
+
+ case XFS_IOC_ERROR_INJECTION: {
+ xfs_error_injection_t in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&in, arg, sizeof(in)))
+ return -XFS_ERROR(EFAULT);
+
+ error = xfs_errortag_add(in.errtag, mp);
+ return -error;
+ }
+
+ case XFS_IOC_ERROR_CLEARALL:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ error = xfs_errortag_clearall(mp);
+ return -error;
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+STATIC int
+xfs_ioc_space(
+ bhv_desc_t *bdp,
+ vnode_t *vp,
+ struct file *filp,
+ int ioflags,
+ unsigned int cmd,
+ void __user *arg)
+{
+ xfs_flock64_t bf;
+ int attr_flags = 0;
+ int error;
+
+ if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
+ return -XFS_ERROR(EPERM);
+
+ if (!(filp->f_flags & FMODE_WRITE))
+ return -XFS_ERROR(EBADF);
+
+ if (vp->v_type != VREG)
+ return -XFS_ERROR(EINVAL);
+
+ if (copy_from_user(&bf, arg, sizeof(bf)))
+ return -XFS_ERROR(EFAULT);
+
+ if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+ attr_flags |= ATTR_NONBLOCK;
+ if (ioflags & IO_INVIS)
+ attr_flags |= ATTR_DMI;
+
+ error = xfs_change_file_space(bdp, cmd, &bf, filp->f_pos,
+ NULL, attr_flags);
+ return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+ xfs_mount_t *mp,
+ unsigned int cmd,
+ void __user *arg)
+{
+ xfs_fsop_bulkreq_t bulkreq;
+ int count; /* # of records returned */
+ xfs_ino_t inlast; /* last inode number */
+ int done;
+ int error;
+
+ /* done = 1 if there are more stats to get and if bulkstat */
+ /* should be called again (unused here, but used in dmapi) */
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -XFS_ERROR(EIO);
+
+ if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+ return -XFS_ERROR(EFAULT);
+
+ if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+ return -XFS_ERROR(EFAULT);
+
+ if ((count = bulkreq.icount) <= 0)
+ return -XFS_ERROR(EINVAL);
+
+ if (cmd == XFS_IOC_FSINUMBERS)
+ error = xfs_inumbers(mp, &inlast, &count,
+ bulkreq.ubuffer);
+ else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+ error = xfs_bulkstat_single(mp, &inlast,
+ bulkreq.ubuffer, &done);
+ else { /* XFS_IOC_FSBULKSTAT */
+ if (count == 1 && inlast != 0) {
+ inlast++;
+ error = xfs_bulkstat_single(mp, &inlast,
+ bulkreq.ubuffer, &done);
+ } else {
+ error = xfs_bulkstat(mp, &inlast, &count,
+ (bulkstat_one_pf)xfs_bulkstat_one, NULL,
+ sizeof(xfs_bstat_t), bulkreq.ubuffer,
+ BULKSTAT_FG_QUICK, &done);
+ }
+ }
+
+ if (error)
+ return -error;
+
+ if (bulkreq.ocount != NULL) {
+ if (copy_to_user(bulkreq.lastip, &inlast,
+ sizeof(xfs_ino_t)))
+ return -XFS_ERROR(EFAULT);
+
+ if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+ return -XFS_ERROR(EFAULT);
+ }
+
+ return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+ xfs_mount_t *mp,
+ void __user *arg)
+{
+ xfs_fsop_geom_v1_t fsgeo;
+ int error;
+
+ error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+ xfs_mount_t *mp,
+ void __user *arg)
+{
+ xfs_fsop_geom_t fsgeo;
+ int error;
+
+ error = xfs_fs_geometry(mp, &fsgeo, 4);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */
+#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */
+#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
+#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
+#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+ unsigned int flags,
+ unsigned int start)
+{
+ unsigned int xflags = start;
+
+ if (flags & LINUX_XFLAG_IMMUTABLE)
+ xflags |= XFS_XFLAG_IMMUTABLE;
+ else
+ xflags &= ~XFS_XFLAG_IMMUTABLE;
+ if (flags & LINUX_XFLAG_APPEND)
+ xflags |= XFS_XFLAG_APPEND;
+ else
+ xflags &= ~XFS_XFLAG_APPEND;
+ if (flags & LINUX_XFLAG_SYNC)
+ xflags |= XFS_XFLAG_SYNC;
+ else
+ xflags &= ~XFS_XFLAG_SYNC;
+ if (flags & LINUX_XFLAG_NOATIME)
+ xflags |= XFS_XFLAG_NOATIME;
+ else
+ xflags &= ~XFS_XFLAG_NOATIME;
+ if (flags & LINUX_XFLAG_NODUMP)
+ xflags |= XFS_XFLAG_NODUMP;
+ else
+ xflags &= ~XFS_XFLAG_NODUMP;
+
+ return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+ __uint16_t di_flags)
+{
+ unsigned int flags = 0;
+
+ if (di_flags & XFS_DIFLAG_IMMUTABLE)
+ flags |= LINUX_XFLAG_IMMUTABLE;
+ if (di_flags & XFS_DIFLAG_APPEND)
+ flags |= LINUX_XFLAG_APPEND;
+ if (di_flags & XFS_DIFLAG_SYNC)
+ flags |= LINUX_XFLAG_SYNC;
+ if (di_flags & XFS_DIFLAG_NOATIME)
+ flags |= LINUX_XFLAG_NOATIME;
+ if (di_flags & XFS_DIFLAG_NODUMP)
+ flags |= LINUX_XFLAG_NODUMP;
+ return flags;
+}
+
+STATIC int
+xfs_ioc_xattr(
+ vnode_t *vp,
+ xfs_inode_t *ip,
+ struct file *filp,
+ unsigned int cmd,
+ void __user *arg)
+{
+ struct fsxattr fa;
+ vattr_t va;
+ int error;
+ int attr_flags;
+ unsigned int flags;
+
+ switch (cmd) {
+ case XFS_IOC_FSGETXATTR: {
+ va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS;
+ VOP_GETATTR(vp, &va, 0, NULL, error);
+ if (error)
+ return -error;
+
+ fa.fsx_xflags = va.va_xflags;
+ fa.fsx_extsize = va.va_extsize;
+ fa.fsx_nextents = va.va_nextents;
+
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_FSSETXATTR: {
+ if (copy_from_user(&fa, arg, sizeof(fa)))
+ return -XFS_ERROR(EFAULT);
+
+ attr_flags = 0;
+ if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+ attr_flags |= ATTR_NONBLOCK;
+
+ va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
+ va.va_xflags = fa.fsx_xflags;
+ va.va_extsize = fa.fsx_extsize;
+
+ VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+ if (!error)
+ vn_revalidate(vp); /* update Linux inode flags */
+ return -error;
+ }
+
+ case XFS_IOC_FSGETXATTRA: {
+ va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS;
+ VOP_GETATTR(vp, &va, 0, NULL, error);
+ if (error)
+ return -error;
+
+ fa.fsx_xflags = va.va_xflags;
+ fa.fsx_extsize = va.va_extsize;
+ fa.fsx_nextents = va.va_anextents;
+
+ if (copy_to_user(arg, &fa, sizeof(fa)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_GETXFLAGS: {
+ flags = xfs_di2lxflags(ip->i_d.di_flags);
+ if (copy_to_user(arg, &flags, sizeof(flags)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ case XFS_IOC_SETXFLAGS: {
+ if (copy_from_user(&flags, arg, sizeof(flags)))
+ return -XFS_ERROR(EFAULT);
+
+ if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
+ LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
+ LINUX_XFLAG_SYNC))
+ return -XFS_ERROR(EOPNOTSUPP);
+
+ attr_flags = 0;
+ if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+ attr_flags |= ATTR_NONBLOCK;
+
+ va.va_mask = XFS_AT_XFLAGS;
+ va.va_xflags = xfs_merge_ioc_xflags(flags,
+ xfs_ip2xflags(ip));
+
+ VOP_SETATTR(vp, &va, attr_flags, NULL, error);
+ if (!error)
+ vn_revalidate(vp); /* update Linux inode flags */
+ return -error;
+ }
+
+ case XFS_IOC_GETVERSION: {
+ flags = LINVFS_GET_IP(vp)->i_generation;
+ if (copy_to_user(arg, &flags, sizeof(flags)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+ }
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+STATIC int
+xfs_ioc_getbmap(
+ bhv_desc_t *bdp,
+ struct file *filp,
+ int ioflags,
+ unsigned int cmd,
+ void __user *arg)
+{
+ struct getbmap bm;
+ int iflags;
+ int error;
+
+ if (copy_from_user(&bm, arg, sizeof(bm)))
+ return -XFS_ERROR(EFAULT);
+
+ if (bm.bmv_count < 2)
+ return -XFS_ERROR(EINVAL);
+
+ iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+ if (ioflags & IO_INVIS)
+ iflags |= BMV_IF_NO_DMAPI_READ;
+
+ error = xfs_getbmap(bdp, &bm, (struct getbmap __user *)arg+1, iflags);
+ if (error)
+ return -error;
+
+ if (copy_to_user(arg, &bm, sizeof(bm)))
+ return -XFS_ERROR(EFAULT);
+ return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+ bhv_desc_t *bdp,
+ void __user *arg)
+{
+ struct getbmapx bmx;
+ struct getbmap bm;
+ int iflags;
+ int error;
+
+ if (copy_from_user(&bmx, arg, sizeof(bmx)))
+ return -XFS_ERROR(EFAULT);
+
+ if (bmx.bmv_count < 2)
+ return -XFS_ERROR(EINVAL);
+
+ /*
+ * Map input getbmapx structure to a getbmap
+ * structure for xfs_getbmap.
+ */
+ GETBMAP_CONVERT(bmx, bm);
+
+ iflags = bmx.bmv_iflags;
+
+ if (iflags & (~BMV_IF_VALID))
+ return -XFS_ERROR(EINVAL);
+
+ iflags |= BMV_IF_EXTENDED;
+
+ error = xfs_getbmap(bdp, &bm, (struct getbmapx __user *)arg+1, iflags);
+ if (error)
+ return -error;
+
+ GETBMAP_CONVERT(bm, bmx);
+
+ if (copy_to_user(arg, &bmx, sizeof(bmx)))
+ return -XFS_ERROR(EFAULT);
+
+ return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
new file mode 100644
index 00000000000..7a12c83184f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <linux/config.h>
+#include <linux/compat.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/ioctl32.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_fs.h"
+#include "xfs_vfs.h"
+#include "xfs_vnode.h"
+#include "xfs_dfrag.h"
+
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#else
+
+typedef struct xfs_fsop_bulkreq32 {
+ compat_uptr_t lastip; /* last inode # pointer */
+ __s32 icount; /* count of entries in buffer */
+ compat_uptr_t ubuffer; /* user buffer for inode desc. */
+ __s32 ocount; /* output count pointer */
+} xfs_fsop_bulkreq32_t;
+
+static unsigned long
+xfs_ioctl32_bulkstat(unsigned long arg)
+{
+ xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg;
+ xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p));
+ u32 addr;
+
+ if (get_user(addr, &p32->lastip) ||
+ put_user(compat_ptr(addr), &p->lastip) ||
+ copy_in_user(&p->icount, &p32->icount, sizeof(s32)) ||
+ get_user(addr, &p32->ubuffer) ||
+ put_user(compat_ptr(addr), &p->ubuffer) ||
+ get_user(addr, &p32->ocount) ||
+ put_user(compat_ptr(addr), &p->ocount))
+ return -EFAULT;
+
+ return (unsigned long)p;
+}
+#endif
+
+static long
+__xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+{
+ int error;
+ struct inode *inode = f->f_dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+
+ switch (cmd) {
+ case XFS_IOC_DIOINFO:
+ case XFS_IOC_FSGEOMETRY_V1:
+ case XFS_IOC_FSGEOMETRY:
+ case XFS_IOC_GETVERSION:
+ case XFS_IOC_GETXFLAGS:
+ case XFS_IOC_SETXFLAGS:
+ case XFS_IOC_FSGETXATTR:
+ case XFS_IOC_FSSETXATTR:
+ case XFS_IOC_FSGETXATTRA:
+ case XFS_IOC_FSSETDM:
+ case XFS_IOC_GETBMAP:
+ case XFS_IOC_GETBMAPA:
+ case XFS_IOC_GETBMAPX:
+/* not handled
+ case XFS_IOC_FD_TO_HANDLE:
+ case XFS_IOC_PATH_TO_HANDLE:
+ case XFS_IOC_PATH_TO_HANDLE:
+ case XFS_IOC_PATH_TO_FSHANDLE:
+ case XFS_IOC_OPEN_BY_HANDLE:
+ case XFS_IOC_FSSETDM_BY_HANDLE:
+ case XFS_IOC_READLINK_BY_HANDLE:
+ case XFS_IOC_ATTRLIST_BY_HANDLE:
+ case XFS_IOC_ATTRMULTI_BY_HANDLE:
+*/
+ case XFS_IOC_FSCOUNTS:
+ case XFS_IOC_SET_RESBLKS:
+ case XFS_IOC_GET_RESBLKS:
+ case XFS_IOC_FSGROWFSDATA:
+ case XFS_IOC_FSGROWFSLOG:
+ case XFS_IOC_FSGROWFSRT:
+ case XFS_IOC_FREEZE:
+ case XFS_IOC_THAW:
+ case XFS_IOC_GOINGDOWN:
+ case XFS_IOC_ERROR_INJECTION:
+ case XFS_IOC_ERROR_CLEARALL:
+ break;
+
+#ifndef BROKEN_X86_ALIGNMENT
+ /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
+ case XFS_IOC_ALLOCSP:
+ case XFS_IOC_FREESP:
+ case XFS_IOC_RESVSP:
+ case XFS_IOC_UNRESVSP:
+ case XFS_IOC_ALLOCSP64:
+ case XFS_IOC_FREESP64:
+ case XFS_IOC_RESVSP64:
+ case XFS_IOC_UNRESVSP64:
+ case XFS_IOC_SWAPEXT:
+ break;
+
+ case XFS_IOC_FSBULKSTAT_SINGLE:
+ case XFS_IOC_FSBULKSTAT:
+ case XFS_IOC_FSINUMBERS:
+ arg = xfs_ioctl32_bulkstat(arg);
+ break;
+#endif
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+ VMODIFY(vp);
+
+ return error;
+}
+
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+ return __xfs_compat_ioctl(0, f, cmd, arg);
+}
+
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg)
+{
+ return __xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
new file mode 100644
index 00000000000..779f69a4811
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+long xfs_compat_ioctl(struct file *f, unsigned cmd, unsigned long arg);
+long xfs_compat_invis_ioctl(struct file *f, unsigned cmd, unsigned long arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
new file mode 100644
index 00000000000..407e9935939
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+
+#include <linux/xattr.h>
+#include <linux/namei.h>
+
+
+/*
+ * Pull the link count and size up from the xfs inode to the linux inode
+ */
+STATIC void
+validate_fields(
+ struct inode *ip)
+{
+ vnode_t *vp = LINVFS_GET_VP(ip);
+ vattr_t va;
+ int error;
+
+ va.va_mask = XFS_AT_NLINK|XFS_AT_SIZE|XFS_AT_NBLOCKS;
+ VOP_GETATTR(vp, &va, ATTR_LAZY, NULL, error);
+ if (likely(!error)) {
+ ip->i_nlink = va.va_nlink;
+ ip->i_blocks = va.va_nblocks;
+
+ /* we're under i_sem so i_size can't change under us */
+ if (i_size_read(ip) != va.va_size)
+ i_size_write(ip, va.va_size);
+ }
+}
+
+/*
+ * Determine whether a process has a valid fs_struct (kernel daemons
+ * like knfsd don't have an fs_struct).
+ *
+ * XXX(hch): nfsd is broken, better fix it instead.
+ */
+STATIC inline int
+has_fs_struct(struct task_struct *task)
+{
+ return (task->fs != init_task.fs);
+}
+
+STATIC int
+linvfs_mknod(
+ struct inode *dir,
+ struct dentry *dentry,
+ int mode,
+ dev_t rdev)
+{
+ struct inode *ip;
+ vattr_t va;
+ vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir);
+ xfs_acl_t *default_acl = NULL;
+ attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS;
+ int error;
+
+ /*
+ * Irix uses Missed'em'V split, but doesn't want to see
+ * the upper 5 bits of (14bit) major.
+ */
+ if (!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)
+ return -EINVAL;
+
+ if (test_default_acl && test_default_acl(dvp)) {
+ if (!_ACL_ALLOC(default_acl))
+ return -ENOMEM;
+ if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+ _ACL_FREE(default_acl);
+ default_acl = NULL;
+ }
+ }
+
+ if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current))
+ mode &= ~current->fs->umask;
+
+ memset(&va, 0, sizeof(va));
+ va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+ va.va_type = IFTOVT(mode);
+ va.va_mode = mode;
+
+ switch (mode & S_IFMT) {
+ case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+ va.va_rdev = sysv_encode_dev(rdev);
+ va.va_mask |= XFS_AT_RDEV;
+ /*FALLTHROUGH*/
+ case S_IFREG:
+ VOP_CREATE(dvp, dentry, &va, &vp, NULL, error);
+ break;
+ case S_IFDIR:
+ VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ if (default_acl) {
+ if (!error) {
+ error = _ACL_INHERIT(vp, &va, default_acl);
+ if (!error) {
+ VMODIFY(vp);
+ } else {
+ struct dentry teardown = {};
+ int err2;
+
+ /* Oh, the horror.
+ * If we can't add the ACL we must back out.
+ * ENOSPC can hit here, among other things.
+ */
+ teardown.d_inode = ip = LINVFS_GET_IP(vp);
+ teardown.d_name = dentry->d_name;
+
+ vn_mark_bad(vp);
+
+ if (S_ISDIR(mode))
+ VOP_RMDIR(dvp, &teardown, NULL, err2);
+ else
+ VOP_REMOVE(dvp, &teardown, NULL, err2);
+ VN_RELE(vp);
+ }
+ }
+ _ACL_FREE(default_acl);
+ }
+
+ if (!error) {
+ ASSERT(vp);
+ ip = LINVFS_GET_IP(vp);
+
+ if (S_ISCHR(mode) || S_ISBLK(mode))
+ ip->i_rdev = rdev;
+ else if (S_ISDIR(mode))
+ validate_fields(ip);
+ d_instantiate(dentry, ip);
+ validate_fields(dir);
+ }
+ return -error;
+}
+
+STATIC int
+linvfs_create(
+ struct inode *dir,
+ struct dentry *dentry,
+ int mode,
+ struct nameidata *nd)
+{
+ return linvfs_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+linvfs_mkdir(
+ struct inode *dir,
+ struct dentry *dentry,
+ int mode)
+{
+ return linvfs_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+linvfs_lookup(
+ struct inode *dir,
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct vnode *vp = LINVFS_GET_VP(dir), *cvp;
+ int error;
+
+ if (dentry->d_name.len >= MAXNAMELEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error);
+ if (error) {
+ if (unlikely(error != ENOENT))
+ return ERR_PTR(-error);
+ d_add(dentry, NULL);
+ return NULL;
+ }
+
+ return d_splice_alias(LINVFS_GET_IP(cvp), dentry);
+}
+
+STATIC int
+linvfs_link(
+ struct dentry *old_dentry,
+ struct inode *dir,
+ struct dentry *dentry)
+{
+ struct inode *ip; /* inode of guy being linked to */
+ vnode_t *tdvp; /* target directory for new name/link */
+ vnode_t *vp; /* vp of name being linked */
+ int error;
+
+ ip = old_dentry->d_inode; /* inode being linked to */
+ if (S_ISDIR(ip->i_mode))
+ return -EPERM;
+
+ tdvp = LINVFS_GET_VP(dir);
+ vp = LINVFS_GET_VP(ip);
+
+ VOP_LINK(tdvp, vp, dentry, NULL, error);
+ if (!error) {
+ VMODIFY(tdvp);
+ VN_HOLD(vp);
+ validate_fields(ip);
+ d_instantiate(dentry, ip);
+ }
+ return -error;
+}
+
+STATIC int
+linvfs_unlink(
+ struct inode *dir,
+ struct dentry *dentry)
+{
+ struct inode *inode;
+ vnode_t *dvp; /* directory containing name to remove */
+ int error;
+
+ inode = dentry->d_inode;
+ dvp = LINVFS_GET_VP(dir);
+
+ VOP_REMOVE(dvp, dentry, NULL, error);
+ if (!error) {
+ validate_fields(dir); /* For size only */
+ validate_fields(inode);
+ }
+
+ return -error;
+}
+
+STATIC int
+linvfs_symlink(
+ struct inode *dir,
+ struct dentry *dentry,
+ const char *symname)
+{
+ struct inode *ip;
+ vattr_t va;
+ vnode_t *dvp; /* directory containing name of symlink */
+ vnode_t *cvp; /* used to lookup symlink to put in dentry */
+ int error;
+
+ dvp = LINVFS_GET_VP(dir);
+ cvp = NULL;
+
+ memset(&va, 0, sizeof(va));
+ va.va_type = VLNK;
+ va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+ va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
+
+ error = 0;
+ VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
+ if (!error && cvp) {
+ ASSERT(cvp->v_type == VLNK);
+ ip = LINVFS_GET_IP(cvp);
+ d_instantiate(dentry, ip);
+ validate_fields(dir);
+ validate_fields(ip); /* size needs update */
+ }
+ return -error;
+}
+
+STATIC int
+linvfs_rmdir(
+ struct inode *dir,
+ struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ vnode_t *dvp = LINVFS_GET_VP(dir);
+ int error;
+
+ VOP_RMDIR(dvp, dentry, NULL, error);
+ if (!error) {
+ validate_fields(inode);
+ validate_fields(dir);
+ }
+ return -error;
+}
+
+STATIC int
+linvfs_rename(
+ struct inode *odir,
+ struct dentry *odentry,
+ struct inode *ndir,
+ struct dentry *ndentry)
+{
+ struct inode *new_inode = ndentry->d_inode;
+ vnode_t *fvp; /* from directory */
+ vnode_t *tvp; /* target directory */
+ int error;
+
+ fvp = LINVFS_GET_VP(odir);
+ tvp = LINVFS_GET_VP(ndir);
+
+ VOP_RENAME(fvp, odentry, tvp, ndentry, NULL, error);
+ if (error)
+ return -error;
+
+ if (new_inode)
+ validate_fields(new_inode);
+
+ validate_fields(odir);
+ if (ndir != odir)
+ validate_fields(ndir);
+ return 0;
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC int
+linvfs_follow_link(
+ struct dentry *dentry,
+ struct nameidata *nd)
+{
+ vnode_t *vp;
+ uio_t *uio;
+ iovec_t iov;
+ int error;
+ char *link;
+
+ ASSERT(dentry);
+ ASSERT(nd);
+
+ link = (char *)kmalloc(MAXNAMELEN+1, GFP_KERNEL);
+ if (!link) {
+ nd_set_link(nd, ERR_PTR(-ENOMEM));
+ return 0;
+ }
+
+ uio = (uio_t *)kmalloc(sizeof(uio_t), GFP_KERNEL);
+ if (!uio) {
+ kfree(link);
+ nd_set_link(nd, ERR_PTR(-ENOMEM));
+ return 0;
+ }
+
+ vp = LINVFS_GET_VP(dentry->d_inode);
+
+ iov.iov_base = link;
+ iov.iov_len = MAXNAMELEN;
+
+ uio->uio_iov = &iov;
+ uio->uio_offset = 0;
+ uio->uio_segflg = UIO_SYSSPACE;
+ uio->uio_resid = MAXNAMELEN;
+ uio->uio_iovcnt = 1;
+
+ VOP_READLINK(vp, uio, 0, NULL, error);
+ if (error) {
+ kfree(link);
+ link = ERR_PTR(-error);
+ } else {
+ link[MAXNAMELEN - uio->uio_resid] = '\0';
+ }
+ kfree(uio);
+
+ nd_set_link(nd, link);
+ return 0;
+}
+
+static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ kfree(s);
+}
+
+#ifdef CONFIG_XFS_POSIX_ACL
+STATIC int
+linvfs_permission(
+ struct inode *inode,
+ int mode,
+ struct nameidata *nd)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error;
+
+ mode <<= 6; /* convert from linux to vnode access bits */
+ VOP_ACCESS(vp, mode, NULL, error);
+ return -error;
+}
+#else
+#define linvfs_permission NULL
+#endif
+
+STATIC int
+linvfs_getattr(
+ struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error = 0;
+
+ if (unlikely(vp->v_flag & VMODIFIED))
+ error = vn_revalidate(vp);
+ if (!error)
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
+STATIC int
+linvfs_setattr(
+ struct dentry *dentry,
+ struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ unsigned int ia_valid = attr->ia_valid;
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ vattr_t vattr;
+ int flags = 0;
+ int error;
+
+ memset(&vattr, 0, sizeof(vattr_t));
+ if (ia_valid & ATTR_UID) {
+ vattr.va_mask |= XFS_AT_UID;
+ vattr.va_uid = attr->ia_uid;
+ }
+ if (ia_valid & ATTR_GID) {
+ vattr.va_mask |= XFS_AT_GID;
+ vattr.va_gid = attr->ia_gid;
+ }
+ if (ia_valid & ATTR_SIZE) {
+ vattr.va_mask |= XFS_AT_SIZE;
+ vattr.va_size = attr->ia_size;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ vattr.va_mask |= XFS_AT_ATIME;
+ vattr.va_atime = attr->ia_atime;
+ }
+ if (ia_valid & ATTR_MTIME) {
+ vattr.va_mask |= XFS_AT_MTIME;
+ vattr.va_mtime = attr->ia_mtime;
+ }
+ if (ia_valid & ATTR_CTIME) {
+ vattr.va_mask |= XFS_AT_CTIME;
+ vattr.va_ctime = attr->ia_ctime;
+ }
+ if (ia_valid & ATTR_MODE) {
+ vattr.va_mask |= XFS_AT_MODE;
+ vattr.va_mode = attr->ia_mode;
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ inode->i_mode &= ~S_ISGID;
+ }
+
+ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
+ flags |= ATTR_UTIME;
+#ifdef ATTR_NO_BLOCK
+ if ((ia_valid & ATTR_NO_BLOCK))
+ flags |= ATTR_NONBLOCK;
+#endif
+
+ VOP_SETATTR(vp, &vattr, flags, NULL, error);
+ if (error)
+ return -error;
+ vn_revalidate(vp);
+ return error;
+}
+
+STATIC void
+linvfs_truncate(
+ struct inode *inode)
+{
+ block_truncate_page(inode->i_mapping, inode->i_size, linvfs_get_block);
+}
+
+STATIC int
+linvfs_setxattr(
+ struct dentry *dentry,
+ const char *name,
+ const void *data,
+ size_t size,
+ int flags)
+{
+ vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
+ char *attr = (char *)name;
+ attrnames_t *namesp;
+ int xflags = 0;
+ int error;
+
+ namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+ if (!namesp)
+ return -EOPNOTSUPP;
+ attr += namesp->attr_namelen;
+ error = namesp->attr_capable(vp, NULL);
+ if (error)
+ return error;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (flags & XATTR_CREATE)
+ xflags |= ATTR_CREATE;
+ if (flags & XATTR_REPLACE)
+ xflags |= ATTR_REPLACE;
+ xflags |= namesp->attr_flag;
+ return namesp->attr_set(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_getxattr(
+ struct dentry *dentry,
+ const char *name,
+ void *data,
+ size_t size)
+{
+ vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
+ char *attr = (char *)name;
+ attrnames_t *namesp;
+ int xflags = 0;
+ ssize_t error;
+
+ namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+ if (!namesp)
+ return -EOPNOTSUPP;
+ attr += namesp->attr_namelen;
+ error = namesp->attr_capable(vp, NULL);
+ if (error)
+ return error;
+
+ /* Convert Linux syscall to XFS internal ATTR flags */
+ if (!size) {
+ xflags |= ATTR_KERNOVAL;
+ data = NULL;
+ }
+ xflags |= namesp->attr_flag;
+ return namesp->attr_get(vp, attr, (void *)data, size, xflags);
+}
+
+STATIC ssize_t
+linvfs_listxattr(
+ struct dentry *dentry,
+ char *data,
+ size_t size)
+{
+ vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
+ int error, xflags = ATTR_KERNAMELS;
+ ssize_t result;
+
+ if (!size)
+ xflags |= ATTR_KERNOVAL;
+ xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
+
+ error = attr_generic_list(vp, data, size, xflags, &result);
+ if (error < 0)
+ return error;
+ return result;
+}
+
+STATIC int
+linvfs_removexattr(
+ struct dentry *dentry,
+ const char *name)
+{
+ vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
+ char *attr = (char *)name;
+ attrnames_t *namesp;
+ int xflags = 0;
+ int error;
+
+ namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
+ if (!namesp)
+ return -EOPNOTSUPP;
+ attr += namesp->attr_namelen;
+ error = namesp->attr_capable(vp, NULL);
+ if (error)
+ return error;
+ xflags |= namesp->attr_flag;
+ return namesp->attr_remove(vp, attr, xflags);
+}
+
+
+struct inode_operations linvfs_file_inode_operations = {
+ .permission = linvfs_permission,
+ .truncate = linvfs_truncate,
+ .getattr = linvfs_getattr,
+ .setattr = linvfs_setattr,
+ .setxattr = linvfs_setxattr,
+ .getxattr = linvfs_getxattr,
+ .listxattr = linvfs_listxattr,
+ .removexattr = linvfs_removexattr,
+};
+
+struct inode_operations linvfs_dir_inode_operations = {
+ .create = linvfs_create,
+ .lookup = linvfs_lookup,
+ .link = linvfs_link,
+ .unlink = linvfs_unlink,
+ .symlink = linvfs_symlink,
+ .mkdir = linvfs_mkdir,
+ .rmdir = linvfs_rmdir,
+ .mknod = linvfs_mknod,
+ .rename = linvfs_rename,
+ .permission = linvfs_permission,
+ .getattr = linvfs_getattr,
+ .setattr = linvfs_setattr,
+ .setxattr = linvfs_setxattr,
+ .getxattr = linvfs_getxattr,
+ .listxattr = linvfs_listxattr,
+ .removexattr = linvfs_removexattr,
+};
+
+struct inode_operations linvfs_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = linvfs_follow_link,
+ .put_link = linvfs_put_link,
+ .permission = linvfs_permission,
+ .getattr = linvfs_getattr,
+ .setattr = linvfs_setattr,
+ .setxattr = linvfs_setxattr,
+ .getxattr = linvfs_getxattr,
+ .listxattr = linvfs_listxattr,
+ .removexattr = linvfs_removexattr,
+};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
new file mode 100644
index 00000000000..6a69a62c36b
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+extern struct inode_operations linvfs_file_inode_operations;
+extern struct inode_operations linvfs_dir_inode_operations;
+extern struct inode_operations linvfs_symlink_inode_operations;
+
+extern struct file_operations linvfs_file_operations;
+extern struct file_operations linvfs_invis_file_operations;
+extern struct file_operations linvfs_dir_operations;
+
+extern struct address_space_operations linvfs_aops;
+
+extern int linvfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern void linvfs_unwritten_done(struct buffer_head *, int);
+
+extern int xfs_ioctl(struct bhv_desc *, struct inode *, struct file *,
+ int, unsigned int, void __user *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
new file mode 100644
index 00000000000..71bb41019a1
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+#include <linux/config.h>
+
+/*
+ * Some types are conditional depending on the target system.
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * as requiring XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS 1
+# if BITS_PER_LONG == 64
+# define XFS_BIG_INUMS 1
+# else
+# define XFS_BIG_INUMS 0
+# endif
+#else
+# define XFS_BIG_BLKNOS 0
+# define XFS_BIG_INUMS 0
+#endif
+
+#include <xfs_types.h>
+#include <xfs_arch.h>
+
+#include <kmem.h>
+#include <mrlock.h>
+#include <spin.h>
+#include <sv.h>
+#include <mutex.h>
+#include <sema.h>
+#include <time.h>
+
+#include <support/qsort.h>
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include <support/move.h>
+#include <support/uuid.h>
+
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include <xfs_behavior.h>
+#include <xfs_vfs.h>
+#include <xfs_cred.h>
+#include <xfs_vnode.h>
+#include <xfs_stats.h>
+#include <xfs_sysctl.h>
+#include <xfs_iops.h>
+#include <xfs_super.h>
+#include <xfs_globals.h>
+#include <xfs_fs_subr.h>
+#include <xfs_lrw.h>
+#include <xfs_buf.h>
+
+/*
+ * Feature macros (disable/enable)
+ */
+#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
+#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
+
+/*
+ * State flag for unwritten extent buffers.
+ *
+ * We need to be able to distinguish between these and delayed
+ * allocate buffers within XFS. The generic IO path code does
+ * not need to distinguish - we use the BH_Delay flag for both
+ * delalloc and these ondisk-uninitialised buffers.
+ */
+BUFFER_FNS(PrivateStart, unwritten);
+static inline void set_buffer_unwritten_io(struct buffer_head *bh)
+{
+ bh->b_end_io = linvfs_unwritten_done;
+}
+
+#define restricted_chown xfs_params.restrict_chown.val
+#define irix_sgid_inherit xfs_params.sgid_inherit.val
+#define irix_symlink_mode xfs_params.symlink_mode.val
+#define xfs_panic_mask xfs_params.panic_mask.val
+#define xfs_error_level xfs_params.error_level.val
+#define xfs_syncd_centisecs xfs_params.syncd_timer.val
+#define xfs_stats_clear xfs_params.stats_clear.val
+#define xfs_inherit_sync xfs_params.inherit_sync.val
+#define xfs_inherit_nodump xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
+#define xfs_rotorstep xfs_params.rotorstep.val
+
+#ifndef __smp_processor_id
+#define __smp_processor_id() smp_processor_id()
+#endif
+#define current_cpu() __smp_processor_id()
+#define current_pid() (current->pid)
+#define current_fsuid(cred) (current->fsuid)
+#define current_fsgid(cred) (current->fsgid)
+
+#define NBPP PAGE_SIZE
+#define DPPSHFT (PAGE_SHIFT - 9)
+#define NDPP (1 << (PAGE_SHIFT - 9))
+#define dtop(DD) (((DD) + NDPP - 1) >> DPPSHFT)
+#define dtopt(DD) ((DD) >> DPPSHFT)
+#define dpoff(DD) ((DD) & (NDPP-1))
+
+#define NBBY 8 /* number of bits per byte */
+#define NBPC PAGE_SIZE /* Number of bytes per click */
+#define BPCSHIFT PAGE_SHIFT /* LOG2(NBPC) if exact */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define BLKDEV_IOSHIFT BPCSHIFT
+#define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define BLKDEV_BB BTOBB(BLKDEV_IOSIZE)
+
+/* bytes to clicks */
+#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
+#define btoc64(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define btoct64(x) ((__uint64_t)(x)>>BPCSHIFT)
+#define io_btoc(x) (((__psunsigned_t)(x)+(IO_NBPC-1))>>IO_BPCSHIFT)
+#define io_btoct(x) ((__psunsigned_t)(x)>>IO_BPCSHIFT)
+
+/* off_t bytes to clicks */
+#define offtoc(x) (((__uint64_t)(x)+(NBPC-1))>>BPCSHIFT)
+#define offtoct(x) ((xfs_off_t)(x)>>BPCSHIFT)
+
+/* clicks to off_t bytes */
+#define ctooff(x) ((xfs_off_t)(x)<<BPCSHIFT)
+
+/* clicks to bytes */
+#define ctob(x) ((__psunsigned_t)(x)<<BPCSHIFT)
+#define btoct(x) ((__psunsigned_t)(x)>>BPCSHIFT)
+#define ctob64(x) ((__uint64_t)(x)<<BPCSHIFT)
+#define io_ctob(x) ((__psunsigned_t)(x)<<IO_BPCSHIFT)
+
+/* bytes to clicks */
+#define btoc(x) (((__psunsigned_t)(x)+(NBPC-1))>>BPCSHIFT)
+
+#ifndef CELL_CAPABLE
+#define FSC_NOTIFY_NAME_CHANGED(vp)
+#endif
+
+#ifndef ENOATTR
+#define ENOATTR ENODATA /* Attribute not found */
+#endif
+
+/* Note: EWRONGFS never visible outside the kernel */
+#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
+
+/*
+ * XXX EFSCORRUPTED needs a real value in errno.h. asm-i386/errno.h won't
+ * return codes out of its known range in errno.
+ * XXX Also note: needs to be < 1000 and fairly unique on Linux (mustn't
+ * conflict with any code we use already or any code a driver may use)
+ * XXX Some options (currently we do #2):
+ * 1/ New error code ["Filesystem is corrupted", _after_ glibc updated]
+ * 2/ 990 ["Unknown error 990"]
+ * 3/ EUCLEAN ["Structure needs cleaning"]
+ * 4/ Convert EFSCORRUPTED to EIO [just prior to return into userspace]
+ */
+#define EFSCORRUPTED 990 /* Filesystem is corrupted */
+
+#define SYNCHRONIZE() barrier()
+#define __return_address __builtin_return_address(0)
+
+/*
+ * IRIX (BSD) quotactl makes use of separate commands for user/group,
+ * whereas on Linux the syscall encodes this information into the cmd
+ * field (see the QCMD macro in quota.h). These macros help keep the
+ * code portable - they are not visible from the syscall interface.
+ */
+#define Q_XSETGQLIM XQM_CMD(0x8) /* set groups disk limits */
+#define Q_XGETGQUOTA XQM_CMD(0x9) /* get groups disk limits */
+
+/* IRIX uses a dynamic sizing algorithm (ndquot = 200 + numprocs*2) */
+/* we may well need to fine-tune this if it ever becomes an issue. */
+#define DQUOT_MAX_HEURISTIC 1024 /* NR_DQUOTS */
+#define ndquot DQUOT_MAX_HEURISTIC
+
+/* IRIX uses the current size of the name cache to guess a good value */
+/* - this isn't the same but is a good enough starting point for now. */
+#define DQUOT_HASH_HEURISTIC files_stat.nr_files
+
+/* IRIX inodes maintain the project ID also, zero this field on Linux */
+#define DEFAULT_PROJID 0
+#define dfltprid DEFAULT_PROJID
+
+#define MAXPATHLEN 1024
+
+#define MIN(a,b) (min(a,b))
+#define MAX(a,b) (max(a,b))
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
+
+#define xfs_stack_trace() dump_stack()
+
+#define xfs_itruncate_data(ip, off) \
+ (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+ __u32 mod;
+
+ switch (n) {
+ case 4:
+ mod = *(__u32 *)a % b;
+ *(__u32 *)a = *(__u32 *)a / b;
+ return mod;
+ case 8:
+ {
+ unsigned long __upper, __low, __high, __mod;
+ __u64 c = *(__u64 *)a;
+ __upper = __high = c >> 32;
+ __low = c;
+ if (__high) {
+ __upper = __high % (b);
+ __high = __high / (b);
+ }
+ asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+ asm("":"=A" (c):"a" (__low),"d" (__high));
+ *(__u64 *)a = c;
+ return __mod;
+ }
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+ switch (n) {
+ case 4:
+ return *(__u32 *)a % b;
+ case 8:
+ {
+ unsigned long __upper, __low, __high, __mod;
+ __u64 c = *(__u64 *)a;
+ __upper = __high = c >> 32;
+ __low = c;
+ if (__high) {
+ __upper = __high % (b);
+ __high = __high / (b);
+ }
+ asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+ asm("":"=A" (c):"a" (__low),"d" (__high));
+ return __mod;
+ }
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+ __u32 mod;
+
+ switch (n) {
+ case 4:
+ mod = *(__u32 *)a % b;
+ *(__u32 *)a = *(__u32 *)a / b;
+ return mod;
+ case 8:
+ mod = do_div(*(__u64 *)a, b);
+ return mod;
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+ switch (n) {
+ case 4:
+ return *(__u32 *)a % b;
+ case 8:
+ {
+ __u64 c = *(__u64 *)a;
+ return do_div(c, b);
+ }
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+ x += y - 1;
+ do_div(x, y);
+ return(x * y);
+}
+
+#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
new file mode 100644
index 00000000000..ff145fd0d1a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -0,0 +1,1082 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+/*
+ * fs/xfs/linux/xfs_lrw.c (Linux Read Write stuff)
+ *
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_inode_item.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_iomap.h"
+
+#include <linux/capability.h>
+#include <linux/writeback.h>
+
+
+#if defined(XFS_RW_TRACE)
+void
+xfs_rw_enter_trace(
+ int tag,
+ xfs_iocore_t *io,
+ void *data,
+ size_t segs,
+ loff_t offset,
+ int ioflags)
+{
+ xfs_inode_t *ip = XFS_IO_INODE(io);
+
+ if (ip->i_rwtrace == NULL)
+ return;
+ ktrace_enter(ip->i_rwtrace,
+ (void *)(unsigned long)tag,
+ (void *)ip,
+ (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)),
+ (void *)data,
+ (void *)((unsigned long)segs),
+ (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(offset & 0xffffffff)),
+ (void *)((unsigned long)ioflags),
+ (void *)((unsigned long)((io->io_new_size >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(io->io_new_size & 0xffffffff)),
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL);
+}
+
+void
+xfs_inval_cached_trace(
+ xfs_iocore_t *io,
+ xfs_off_t offset,
+ xfs_off_t len,
+ xfs_off_t first,
+ xfs_off_t last)
+{
+ xfs_inode_t *ip = XFS_IO_INODE(io);
+
+ if (ip->i_rwtrace == NULL)
+ return;
+ ktrace_enter(ip->i_rwtrace,
+ (void *)(__psint_t)XFS_INVAL_CACHED,
+ (void *)ip,
+ (void *)((unsigned long)((offset >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(offset & 0xffffffff)),
+ (void *)((unsigned long)((len >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(len & 0xffffffff)),
+ (void *)((unsigned long)((first >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(first & 0xffffffff)),
+ (void *)((unsigned long)((last >> 32) & 0xffffffff)),
+ (void *)((unsigned long)(last & 0xffffffff)),
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL,
+ (void *)NULL);
+}
+#endif
+
+/*
+ * xfs_iozero
+ *
+ * xfs_iozero clears the specified range of buffer supplied,
+ * and marks all the affected blocks as valid and modified. If
+ * an affected block is not allocated, it will be allocated. If
+ * an affected block is not completely overwritten, and is not
+ * valid before the operation, it will be read from disk before
+ * being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+ struct inode *ip, /* inode */
+ loff_t pos, /* offset in file */
+ size_t count, /* size of data to zero */
+ loff_t end_size) /* max file size to set */
+{
+ unsigned bytes;
+ struct page *page;
+ struct address_space *mapping;
+ char *kaddr;
+ int status;
+
+ mapping = ip->i_mapping;
+ do {
+ unsigned long index, offset;
+
+ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ index = pos >> PAGE_CACHE_SHIFT;
+ bytes = PAGE_CACHE_SIZE - offset;
+ if (bytes > count)
+ bytes = count;
+
+ status = -ENOMEM;
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ break;
+
+ kaddr = kmap(page);
+ status = mapping->a_ops->prepare_write(NULL, page, offset,
+ offset + bytes);
+ if (status) {
+ goto unlock;
+ }
+
+ memset((void *) (kaddr + offset), 0, bytes);
+ flush_dcache_page(page);
+ status = mapping->a_ops->commit_write(NULL, page, offset,
+ offset + bytes);
+ if (!status) {
+ pos += bytes;
+ count -= bytes;
+ if (pos > i_size_read(ip))
+ i_size_write(ip, pos < end_size ? pos : end_size);
+ }
+
+unlock:
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ if (status)
+ break;
+ } while (count);
+
+ return (-status);
+}
+
+/*
+ * xfs_inval_cached_pages
+ *
+ * This routine is responsible for keeping direct I/O and buffered I/O
+ * somewhat coherent. From here we make sure that we're at least
+ * temporarily holding the inode I/O lock exclusively and then call
+ * the page cache to flush and invalidate any cached pages. If there
+ * are no cached pages this routine will be very quick.
+ */
+void
+xfs_inval_cached_pages(
+ vnode_t *vp,
+ xfs_iocore_t *io,
+ xfs_off_t offset,
+ int write,
+ int relock)
+{
+ if (VN_CACHED(vp)) {
+ xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
+ VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
+ }
+
+}
+
+ssize_t /* bytes read, or (-) error */
+xfs_read(
+ bhv_desc_t *bdp,
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned int segs,
+ loff_t *offset,
+ int ioflags,
+ cred_t *credp)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ size_t size = 0;
+ ssize_t ret;
+ xfs_fsize_t n;
+ xfs_inode_t *ip;
+ xfs_mount_t *mp;
+ vnode_t *vp;
+ unsigned long seg;
+
+ ip = XFS_BHVTOI(bdp);
+ vp = BHV_TO_VNODE(bdp);
+ mp = ip->i_mount;
+
+ XFS_STATS_INC(xs_read_calls);
+
+ /* START copy & waste from filemap.c */
+ for (seg = 0; seg < segs; seg++) {
+ const struct iovec *iv = &iovp[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ size += iv->iov_len;
+ if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+ return XFS_ERROR(-EINVAL);
+ }
+ /* END copy & waste from filemap.c */
+
+ if (unlikely(ioflags & IO_ISDIRECT)) {
+ xfs_buftarg_t *target =
+ (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+ if ((*offset & target->pbr_smask) ||
+ (size & target->pbr_smask)) {
+ if (*offset == ip->i_d.di_size) {
+ return (0);
+ }
+ return -XFS_ERROR(EINVAL);
+ }
+ }
+
+ n = XFS_MAXIOFFSET(mp) - *offset;
+ if ((n <= 0) || (size == 0))
+ return 0;
+
+ if (n < size)
+ size = n;
+
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ return -EIO;
+ }
+
+ if (unlikely(ioflags & IO_ISDIRECT))
+ down(&inode->i_sem);
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+ if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+ !(ioflags & IO_INVIS)) {
+ vrwlock_t locktype = VRWLOCK_READ;
+
+ ret = -XFS_SEND_DATA(mp, DM_EVENT_READ,
+ BHV_TO_VNODE(bdp), *offset, size,
+ FILP_DELAY_FLAG(file), &locktype);
+ if (ret) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ goto unlock_isem;
+ }
+ }
+
+ xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
+ (void *)iovp, segs, *offset, ioflags);
+ ret = __generic_file_aio_read(iocb, iovp, segs, offset);
+ if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+ ret = wait_on_sync_kiocb(iocb);
+ if (ret > 0)
+ XFS_STATS_ADD(xs_read_bytes, ret);
+
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ if (likely(!(ioflags & IO_INVIS)))
+ xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+
+unlock_isem:
+ if (unlikely(ioflags & IO_ISDIRECT))
+ up(&inode->i_sem);
+ return ret;
+}
+
+ssize_t
+xfs_sendfile(
+ bhv_desc_t *bdp,
+ struct file *filp,
+ loff_t *offset,
+ int ioflags,
+ size_t count,
+ read_actor_t actor,
+ void *target,
+ cred_t *credp)
+{
+ ssize_t ret;
+ xfs_fsize_t n;
+ xfs_inode_t *ip;
+ xfs_mount_t *mp;
+ vnode_t *vp;
+
+ ip = XFS_BHVTOI(bdp);
+ vp = BHV_TO_VNODE(bdp);
+ mp = ip->i_mount;
+
+ XFS_STATS_INC(xs_read_calls);
+
+ n = XFS_MAXIOFFSET(mp) - *offset;
+ if ((n <= 0) || (count == 0))
+ return 0;
+
+ if (n < count)
+ count = n;
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
+
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+ if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) &&
+ (!(ioflags & IO_INVIS))) {
+ vrwlock_t locktype = VRWLOCK_READ;
+ int error;
+
+ error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count,
+ FILP_DELAY_FLAG(filp), &locktype);
+ if (error) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return -error;
+ }
+ }
+ xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
+ (void *)(unsigned long)target, count, *offset, ioflags);
+ ret = generic_file_sendfile(filp, offset, count, actor, target);
+
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ if (ret > 0)
+ XFS_STATS_ADD(xs_read_bytes, ret);
+
+ if (likely(!(ioflags & IO_INVIS)))
+ xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+
+ return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF. We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int /* error (positive) */
+xfs_zero_last_block(
+ struct inode *ip,
+ xfs_iocore_t *io,
+ xfs_off_t offset,
+ xfs_fsize_t isize,
+ xfs_fsize_t end_size)
+{
+ xfs_fileoff_t last_fsb;
+ xfs_mount_t *mp;
+ int nimaps;
+ int zero_offset;
+ int zero_len;
+ int isize_fsb_offset;
+ int error = 0;
+ xfs_bmbt_irec_t imap;
+ loff_t loff;
+ size_t lsize;
+
+ ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
+ ASSERT(offset > isize);
+
+ mp = io->io_mount;
+
+ isize_fsb_offset = XFS_B_FSB_OFFSET(mp, isize);
+ if (isize_fsb_offset == 0) {
+ /*
+ * There are no extra bytes in the last block on disk to
+ * zero, so return.
+ */
+ return 0;
+ }
+
+ last_fsb = XFS_B_TO_FSBT(mp, isize);
+ nimaps = 1;
+ error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap,
+ &nimaps, NULL);
+ if (error) {
+ return error;
+ }
+ ASSERT(nimaps > 0);
+ /*
+ * If the block underlying isize is just a hole, then there
+ * is nothing to zero.
+ */
+ if (imap.br_startblock == HOLESTARTBLOCK) {
+ return 0;
+ }
+ /*
+ * Zero the part of the last block beyond the EOF, and write it
+ * out sync. We need to drop the ilock while we do this so we
+ * don't deadlock when the buffer cache calls back to us.
+ */
+ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+ loff = XFS_FSB_TO_B(mp, last_fsb);
+ lsize = XFS_FSB_TO_B(mp, 1);
+
+ zero_offset = isize_fsb_offset;
+ zero_len = mp->m_sb.sb_blocksize - isize_fsb_offset;
+
+ error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+
+ XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ ASSERT(error >= 0);
+ return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF. This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file. This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated. If fill is set,
+ * then any holes in the range are filled and zeroed. If not, the holes
+ * are left alone as holes.
+ */
+
+int /* error (positive) */
+xfs_zero_eof(
+ vnode_t *vp,
+ xfs_iocore_t *io,
+ xfs_off_t offset, /* starting I/O offset */
+ xfs_fsize_t isize, /* current inode size */
+ xfs_fsize_t end_size) /* terminal inode size */
+{
+ struct inode *ip = LINVFS_GET_IP(vp);
+ xfs_fileoff_t start_zero_fsb;
+ xfs_fileoff_t end_zero_fsb;
+ xfs_fileoff_t prev_zero_fsb;
+ xfs_fileoff_t zero_count_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_extlen_t buf_len_fsb;
+ xfs_extlen_t prev_zero_count;
+ xfs_mount_t *mp;
+ int nimaps;
+ int error = 0;
+ xfs_bmbt_irec_t imap;
+ loff_t loff;
+ size_t lsize;
+
+ ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+ ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+
+ mp = io->io_mount;
+
+ /*
+ * First handle zeroing the block on which isize resides.
+ * We only zero a part of that block so it is handled specially.
+ */
+ error = xfs_zero_last_block(ip, io, offset, isize, end_size);
+ if (error) {
+ ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+ ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+ return error;
+ }
+
+ /*
+ * Calculate the range between the new size and the old
+ * where blocks needing to be zeroed may exist. To get the
+ * block where the last byte in the file currently resides,
+ * we need to subtract one from the size and truncate back
+ * to a block boundary. We subtract 1 in case the size is
+ * exactly on a block boundary.
+ */
+ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+ start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+ end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+ ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+ if (last_fsb == end_zero_fsb) {
+ /*
+ * The size was only incremented on its last block.
+ * We took care of that above, so just return.
+ */
+ return 0;
+ }
+
+ ASSERT(start_zero_fsb <= end_zero_fsb);
+ prev_zero_fsb = NULLFILEOFF;
+ prev_zero_count = 0;
+ while (start_zero_fsb <= end_zero_fsb) {
+ nimaps = 1;
+ zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+ error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb,
+ 0, NULL, 0, &imap, &nimaps, NULL);
+ if (error) {
+ ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
+ ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
+ return error;
+ }
+ ASSERT(nimaps > 0);
+
+ if (imap.br_state == XFS_EXT_UNWRITTEN ||
+ imap.br_startblock == HOLESTARTBLOCK) {
+ /*
+ * This loop handles initializing pages that were
+ * partially initialized by the code below this
+ * loop. It basically zeroes the part of the page
+ * that sits on a hole and sets the page as P_HOLE
+ * and calls remapf if it is a mapped file.
+ */
+ prev_zero_fsb = NULLFILEOFF;
+ prev_zero_count = 0;
+ start_zero_fsb = imap.br_startoff +
+ imap.br_blockcount;
+ ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+ continue;
+ }
+
+ /*
+ * There are blocks in the range requested.
+ * Zero them a single write at a time. We actually
+ * don't zero the entire range returned if it is
+ * too big and simply loop around to get the rest.
+ * That is not the most efficient thing to do, but it
+ * is simple and this path should not be exercised often.
+ */
+ buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount,
+ mp->m_writeio_blocks << 8);
+ /*
+ * Drop the inode lock while we're doing the I/O.
+ * We'll still have the iolock to protect us.
+ */
+ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+
+ loff = XFS_FSB_TO_B(mp, start_zero_fsb);
+ lsize = XFS_FSB_TO_B(mp, buf_len_fsb);
+
+ error = xfs_iozero(ip, loff, lsize, end_size);
+
+ if (error) {
+ goto out_lock;
+ }
+
+ prev_zero_fsb = start_zero_fsb;
+ prev_zero_count = buf_len_fsb;
+ start_zero_fsb = imap.br_startoff + buf_len_fsb;
+ ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+ XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ }
+
+ return 0;
+
+out_lock:
+
+ XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ ASSERT(error >= 0);
+ return error;
+}
+
+ssize_t /* bytes written, or (-) error */
+xfs_write(
+ bhv_desc_t *bdp,
+ struct kiocb *iocb,
+ const struct iovec *iovp,
+ unsigned int nsegs,
+ loff_t *offset,
+ int ioflags,
+ cred_t *credp)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned long segs = nsegs;
+ xfs_inode_t *xip;
+ xfs_mount_t *mp;
+ ssize_t ret = 0, error = 0;
+ xfs_fsize_t isize, new_size;
+ xfs_iocore_t *io;
+ vnode_t *vp;
+ unsigned long seg;
+ int iolock;
+ int eventsent = 0;
+ vrwlock_t locktype;
+ size_t ocount = 0, count;
+ loff_t pos;
+ int need_isem = 1, need_flush = 0;
+
+ XFS_STATS_INC(xs_write_calls);
+
+ vp = BHV_TO_VNODE(bdp);
+ xip = XFS_BHVTOI(bdp);
+
+ for (seg = 0; seg < segs; seg++) {
+ const struct iovec *iv = &iovp[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ ocount += iv->iov_len;
+ if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
+ return -EINVAL;
+ if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
+ continue;
+ if (seg == 0)
+ return -EFAULT;
+ segs = seg;
+ ocount -= iv->iov_len; /* This segment is no good */
+ break;
+ }
+
+ count = ocount;
+ pos = *offset;
+
+ if (count == 0)
+ return 0;
+
+ io = &xip->i_iocore;
+ mp = io->io_mount;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE);
+
+ if (ioflags & IO_ISDIRECT) {
+ xfs_buftarg_t *target =
+ (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ if ((pos & target->pbr_smask) || (count & target->pbr_smask))
+ return XFS_ERROR(-EINVAL);
+
+ if (!VN_CACHED(vp) && pos < i_size_read(inode))
+ need_isem = 0;
+
+ if (VN_CACHED(vp))
+ need_flush = 1;
+ }
+
+relock:
+ if (need_isem) {
+ iolock = XFS_IOLOCK_EXCL;
+ locktype = VRWLOCK_WRITE;
+
+ down(&inode->i_sem);
+ } else {
+ iolock = XFS_IOLOCK_SHARED;
+ locktype = VRWLOCK_WRITE_DIRECT;
+ }
+
+ xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+
+ isize = i_size_read(inode);
+
+ if (file->f_flags & O_APPEND)
+ *offset = isize;
+
+start:
+ error = -generic_write_checks(file, &pos, &count,
+ S_ISBLK(inode->i_mode));
+ if (error) {
+ xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+ goto out_unlock_isem;
+ }
+
+ new_size = pos + count;
+ if (new_size > isize)
+ io->io_new_size = new_size;
+
+ if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
+ !(ioflags & IO_INVIS) && !eventsent)) {
+ loff_t savedsize = pos;
+ int dmflags = FILP_DELAY_FLAG(file);
+
+ if (need_isem)
+ dmflags |= DM_FLAGS_ISEM;
+
+ xfs_iunlock(xip, XFS_ILOCK_EXCL);
+ error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
+ pos, count,
+ dmflags, &locktype);
+ if (error) {
+ xfs_iunlock(xip, iolock);
+ goto out_unlock_isem;
+ }
+ xfs_ilock(xip, XFS_ILOCK_EXCL);
+ eventsent = 1;
+
+ /*
+ * The iolock was dropped and reaquired in XFS_SEND_DATA
+ * so we have to recheck the size when appending.
+ * We will only "goto start;" once, since having sent the
+ * event prevents another call to XFS_SEND_DATA, which is
+ * what allows the size to change in the first place.
+ */
+ if ((file->f_flags & O_APPEND) && savedsize != isize) {
+ pos = isize = xip->i_d.di_size;
+ goto start;
+ }
+ }
+
+ /*
+ * On Linux, generic_file_write updates the times even if
+ * no data is copied in so long as the write had a size.
+ *
+ * We must update xfs' times since revalidate will overcopy xfs.
+ */
+ if (!(ioflags & IO_INVIS)) {
+ xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ inode_update_time(inode, 1);
+ }
+
+ /*
+ * If the offset is beyond the size of the file, we have a couple
+ * of things to do. First, if there is already space allocated
+ * we need to either create holes or zero the disk or ...
+ *
+ * If there is a page where the previous size lands, we need
+ * to zero it out up to the new size.
+ */
+
+ if (pos > isize) {
+ error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
+ isize, pos + count);
+ if (error) {
+ xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+ goto out_unlock_isem;
+ }
+ }
+ xfs_iunlock(xip, XFS_ILOCK_EXCL);
+
+ /*
+ * If we're writing the file then make sure to clear the
+ * setuid and setgid bits if the process is not being run
+ * by root. This keeps people from modifying setuid and
+ * setgid binaries.
+ */
+
+ if (((xip->i_d.di_mode & S_ISUID) ||
+ ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) ==
+ (S_ISGID | S_IXGRP))) &&
+ !capable(CAP_FSETID)) {
+ error = xfs_write_clear_setuid(xip);
+ if (likely(!error))
+ error = -remove_suid(file->f_dentry);
+ if (unlikely(error)) {
+ xfs_iunlock(xip, iolock);
+ goto out_unlock_isem;
+ }
+ }
+
+retry:
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = mapping->backing_dev_info;
+
+ if ((ioflags & IO_ISDIRECT)) {
+ if (need_flush) {
+ xfs_inval_cached_trace(io, pos, -1,
+ ctooff(offtoct(pos)), -1);
+ VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
+ -1, FI_REMAPF_LOCKED);
+ }
+
+ if (need_isem) {
+ /* demote the lock now the cached pages are gone */
+ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
+ up(&inode->i_sem);
+
+ iolock = XFS_IOLOCK_SHARED;
+ locktype = VRWLOCK_WRITE_DIRECT;
+ need_isem = 0;
+ }
+
+ xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
+ *offset, ioflags);
+ ret = generic_file_direct_write(iocb, iovp,
+ &segs, pos, offset, count, ocount);
+
+ /*
+ * direct-io write to a hole: fall through to buffered I/O
+ * for completing the rest of the request.
+ */
+ if (ret >= 0 && ret != count) {
+ XFS_STATS_ADD(xs_write_bytes, ret);
+
+ pos += ret;
+ count -= ret;
+
+ need_isem = 1;
+ ioflags &= ~IO_ISDIRECT;
+ xfs_iunlock(xip, iolock);
+ goto relock;
+ }
+ } else {
+ xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
+ *offset, ioflags);
+ ret = generic_file_buffered_write(iocb, iovp, segs,
+ pos, offset, count, ret);
+ }
+
+ current->backing_dev_info = NULL;
+
+ if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
+ ret = wait_on_sync_kiocb(iocb);
+
+ if ((ret == -ENOSPC) &&
+ DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
+ !(ioflags & IO_INVIS)) {
+
+ xfs_rwunlock(bdp, locktype);
+ error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
+ DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+ 0, 0, 0); /* Delay flag intentionally unused */
+ if (error)
+ goto out_unlock_isem;
+ xfs_rwlock(bdp, locktype);
+ pos = xip->i_d.di_size;
+ ret = 0;
+ goto retry;
+ }
+
+ if (*offset > xip->i_d.di_size) {
+ xfs_ilock(xip, XFS_ILOCK_EXCL);
+ if (*offset > xip->i_d.di_size) {
+ xip->i_d.di_size = *offset;
+ i_size_write(inode, *offset);
+ xip->i_update_core = 1;
+ xip->i_update_size = 1;
+ }
+ xfs_iunlock(xip, XFS_ILOCK_EXCL);
+ }
+
+ error = -ret;
+ if (ret <= 0)
+ goto out_unlock_internal;
+
+ XFS_STATS_ADD(xs_write_bytes, ret);
+
+ /* Handle various SYNC-type writes */
+ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
+ /*
+ * If we're treating this as O_DSYNC and we have not updated the
+ * size, force the log.
+ */
+ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) &&
+ !(xip->i_update_size)) {
+ xfs_inode_log_item_t *iip = xip->i_itemp;
+
+ /*
+ * If an allocation transaction occurred
+ * without extending the size, then we have to force
+ * the log up the proper point to ensure that the
+ * allocation is permanent. We can't count on
+ * the fact that buffered writes lock out direct I/O
+ * writes - the direct I/O write could have extended
+ * the size nontransactionally, then finished before
+ * we started. xfs_write_file will think that the file
+ * didn't grow but the update isn't safe unless the
+ * size change is logged.
+ *
+ * Force the log if we've committed a transaction
+ * against the inode or if someone else has and
+ * the commit record hasn't gone to disk (e.g.
+ * the inode is pinned). This guarantees that
+ * all changes affecting the inode are permanent
+ * when we return.
+ */
+ if (iip && iip->ili_last_lsn) {
+ xfs_log_force(mp, iip->ili_last_lsn,
+ XFS_LOG_FORCE | XFS_LOG_SYNC);
+ } else if (xfs_ipincount(xip) > 0) {
+ xfs_log_force(mp, (xfs_lsn_t)0,
+ XFS_LOG_FORCE | XFS_LOG_SYNC);
+ }
+
+ } else {
+ xfs_trans_t *tp;
+
+ /*
+ * O_SYNC or O_DSYNC _with_ a size update are handled
+ * the same way.
+ *
+ * If the write was synchronous then we need to make
+ * sure that the inode modification time is permanent.
+ * We'll have updated the timestamp above, so here
+ * we use a synchronous transaction to log the inode.
+ * It's not fast, but it's necessary.
+ *
+ * If this a dsync write and the size got changed
+ * non-transactionally, then we need to ensure that
+ * the size change gets logged in a synchronous
+ * transaction.
+ */
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC);
+ if ((error = xfs_trans_reserve(tp, 0,
+ XFS_SWRITE_LOG_RES(mp),
+ 0, 0, 0))) {
+ /* Transaction reserve failed */
+ xfs_trans_cancel(tp, 0);
+ } else {
+ /* Transaction reserve successful */
+ xfs_ilock(xip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL);
+ xfs_trans_ihold(tp, xip);
+ xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0, NULL);
+ xfs_iunlock(xip, XFS_ILOCK_EXCL);
+ }
+ if (error)
+ goto out_unlock_internal;
+ }
+
+ xfs_rwunlock(bdp, locktype);
+ if (need_isem)
+ up(&inode->i_sem);
+
+ error = sync_page_range(inode, mapping, pos, ret);
+ if (!error)
+ error = ret;
+ return error;
+ }
+
+ out_unlock_internal:
+ xfs_rwunlock(bdp, locktype);
+ out_unlock_isem:
+ if (need_isem)
+ up(&inode->i_sem);
+ return -error;
+}
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(struct xfs_buf *bp)
+{
+ xfs_mount_t *mp;
+
+ mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ pagebuf_iorequest(bp);
+ return 0;
+ } else {
+ xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
+ /*
+ * Metadata write that didn't get logged but
+ * written delayed anyway. These aren't associated
+ * with a transaction, and can be ignored.
+ */
+ if (XFS_BUF_IODONE_FUNC(bp) == NULL &&
+ (XFS_BUF_ISREAD(bp)) == 0)
+ return (xfs_bioerror_relse(bp));
+ else
+ return (xfs_bioerror(bp));
+ }
+}
+
+
+int
+xfs_bmap(bhv_desc_t *bdp,
+ xfs_off_t offset,
+ ssize_t count,
+ int flags,
+ xfs_iomap_t *iomapp,
+ int *niomaps)
+{
+ xfs_inode_t *ip = XFS_BHVTOI(bdp);
+ xfs_iocore_t *io = &ip->i_iocore;
+
+ ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
+ ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) ==
+ ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0));
+
+ return xfs_iomap(io, offset, count, flags, iomapp, niomaps);
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data
+ * from going to disk in case we are shutting down the filesystem.
+ * Typically user data goes thru this path; one of the exceptions
+ * is the superblock.
+ */
+int
+xfsbdstrat(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp)
+{
+ ASSERT(mp);
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
+ /* Grio redirection would go here
+ * if (XFS_BUF_IS_GRIO(bp)) {
+ */
+
+ pagebuf_iorequest(bp);
+ return 0;
+ }
+
+ xfs_buftrace("XFSBDSTRAT IOERROR", bp);
+ return (xfs_bioerror_relse(bp));
+}
+
+/*
+ * If the underlying (data/log/rt) device is readonly, there are some
+ * operations that cannot proceed.
+ */
+int
+xfs_dev_is_read_only(
+ xfs_mount_t *mp,
+ char *message)
+{
+ if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
+ xfs_readonly_buftarg(mp->m_logdev_targp) ||
+ (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
+ cmn_err(CE_NOTE,
+ "XFS: %s required on read-only device.", message);
+ cmn_err(CE_NOTE,
+ "XFS: write access unavailable, cannot proceed.");
+ return EROFS;
+ }
+ return 0;
+}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
new file mode 100644
index 00000000000..d723e35254a
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_LRW_H__
+#define __XFS_LRW_H__
+
+struct vnode;
+struct bhv_desc;
+struct xfs_mount;
+struct xfs_iocore;
+struct xfs_inode;
+struct xfs_bmbt_irec;
+struct xfs_buf;
+struct xfs_iomap;
+
+#if defined(XFS_RW_TRACE)
+/*
+ * Defines for the trace mechanisms in xfs_lrw.c.
+ */
+#define XFS_RW_KTRACE_SIZE 128
+
+#define XFS_READ_ENTER 1
+#define XFS_WRITE_ENTER 2
+#define XFS_IOMAP_READ_ENTER 3
+#define XFS_IOMAP_WRITE_ENTER 4
+#define XFS_IOMAP_READ_MAP 5
+#define XFS_IOMAP_WRITE_MAP 6
+#define XFS_IOMAP_WRITE_NOSPACE 7
+#define XFS_ITRUNC_START 8
+#define XFS_ITRUNC_FINISH1 9
+#define XFS_ITRUNC_FINISH2 10
+#define XFS_CTRUNC1 11
+#define XFS_CTRUNC2 12
+#define XFS_CTRUNC3 13
+#define XFS_CTRUNC4 14
+#define XFS_CTRUNC5 15
+#define XFS_CTRUNC6 16
+#define XFS_BUNMAPI 17
+#define XFS_INVAL_CACHED 18
+#define XFS_DIORD_ENTER 19
+#define XFS_DIOWR_ENTER 20
+#define XFS_SENDFILE_ENTER 21
+#define XFS_WRITEPAGE_ENTER 22
+#define XFS_RELEASEPAGE_ENTER 23
+#define XFS_IOMAP_ALLOC_ENTER 24
+#define XFS_IOMAP_ALLOC_MAP 25
+#define XFS_IOMAP_UNWRITTEN 26
+extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
+ void *, size_t, loff_t, int);
+extern void xfs_inval_cached_trace(struct xfs_iocore *,
+ xfs_off_t, xfs_off_t, xfs_off_t, xfs_off_t);
+#else
+#define xfs_rw_enter_trace(tag, io, data, size, offset, ioflags)
+#define xfs_inval_cached_trace(io, offset, len, first, last)
+#endif
+
+/*
+ * Maximum count of bmaps used by read and write paths.
+ */
+#define XFS_MAX_RW_NBMAPS 4
+
+extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
+ struct xfs_iomap *, int *);
+extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
+ xfs_fsize_t, xfs_fsize_t);
+extern void xfs_inval_cached_pages(struct vnode *, struct xfs_iocore *,
+ xfs_off_t, int, int);
+extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
+ const struct iovec *, unsigned int,
+ loff_t *, int, struct cred *);
+extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
+ const struct iovec *, unsigned int,
+ loff_t *, int, struct cred *);
+extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
+ loff_t *, int, size_t, read_actor_t,
+ void *, struct cred *);
+
+extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
+
+#define XFS_FSB_TO_DB_IO(io,fsb) \
+ (((io)->io_flags & XFS_IOCORE_RT) ? \
+ XFS_FSB_TO_BB((io)->io_mount, (fsb)) : \
+ XFS_FSB_TO_DADDR((io)->io_mount, (fsb)))
+
+#endif /* __XFS_LRW_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
new file mode 100644
index 00000000000..aaf5ddba47f
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+STATIC int
+xfs_read_xfsstats(
+ char *buffer,
+ char **start,
+ off_t offset,
+ int count,
+ int *eof,
+ void *data)
+{
+ int c, i, j, len, val;
+ __uint64_t xs_xstrat_bytes = 0;
+ __uint64_t xs_write_bytes = 0;
+ __uint64_t xs_read_bytes = 0;
+
+ static struct xstats_entry {
+ char *desc;
+ int endpoint;
+ } xstats[] = {
+ { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC },
+ { "abt", XFSSTAT_END_ALLOC_BTREE },
+ { "blk_map", XFSSTAT_END_BLOCK_MAPPING },
+ { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE },
+ { "dir", XFSSTAT_END_DIRECTORY_OPS },
+ { "trans", XFSSTAT_END_TRANSACTIONS },
+ { "ig", XFSSTAT_END_INODE_OPS },
+ { "log", XFSSTAT_END_LOG_OPS },
+ { "push_ail", XFSSTAT_END_TAIL_PUSHING },
+ { "xstrat", XFSSTAT_END_WRITE_CONVERT },
+ { "rw", XFSSTAT_END_READ_WRITE_OPS },
+ { "attr", XFSSTAT_END_ATTRIBUTE_OPS },
+ { "icluster", XFSSTAT_END_INODE_CLUSTER },
+ { "vnodes", XFSSTAT_END_VNODE_OPS },
+ { "buf", XFSSTAT_END_BUF },
+ };
+
+ /* Loop over all stats groups */
+ for (i=j=len = 0; i < sizeof(xstats)/sizeof(struct xstats_entry); i++) {
+ len += sprintf(buffer + len, xstats[i].desc);
+ /* inner loop does each group */
+ while (j < xstats[i].endpoint) {
+ val = 0;
+ /* sum over all cpus */
+ for (c = 0; c < NR_CPUS; c++) {
+ if (!cpu_possible(c)) continue;
+ val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+ }
+ len += sprintf(buffer + len, " %u", val);
+ j++;
+ }
+ buffer[len++] = '\n';
+ }
+ /* extra precision counters */
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_possible(i)) continue;
+ xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+ xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+ xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+ }
+
+ len += sprintf(buffer + len, "xpc %Lu %Lu %Lu\n",
+ xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+ len += sprintf(buffer + len, "debug %u\n",
+#if defined(DEBUG)
+ 1);
+#else
+ 0);
+#endif
+
+ if (offset >= len) {
+ *start = buffer;
+ *eof = 1;
+ return 0;
+ }
+ *start = buffer + offset;
+ if ((len -= offset) > count)
+ return count;
+ *eof = 1;
+
+ return len;
+}
+
+void
+xfs_init_procfs(void)
+{
+ if (!proc_mkdir("fs/xfs", NULL))
+ return;
+ create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+ remove_proc_entry("fs/xfs/stat", NULL);
+ remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
new file mode 100644
index 00000000000..3f756a6c3eb
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC 4
+ __uint32_t xs_allocx;
+ __uint32_t xs_allocb;
+ __uint32_t xs_freex;
+ __uint32_t xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
+ __uint32_t xs_abt_lookup;
+ __uint32_t xs_abt_compare;
+ __uint32_t xs_abt_insrec;
+ __uint32_t xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
+ __uint32_t xs_blk_mapr;
+ __uint32_t xs_blk_mapw;
+ __uint32_t xs_blk_unmap;
+ __uint32_t xs_add_exlist;
+ __uint32_t xs_del_exlist;
+ __uint32_t xs_look_exlist;
+ __uint32_t xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
+ __uint32_t xs_bmbt_lookup;
+ __uint32_t xs_bmbt_compare;
+ __uint32_t xs_bmbt_insrec;
+ __uint32_t xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+ __uint32_t xs_dir_lookup;
+ __uint32_t xs_dir_create;
+ __uint32_t xs_dir_remove;
+ __uint32_t xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
+ __uint32_t xs_trans_sync;
+ __uint32_t xs_trans_async;
+ __uint32_t xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
+ __uint32_t xs_ig_attempts;
+ __uint32_t xs_ig_found;
+ __uint32_t xs_ig_frecycle;
+ __uint32_t xs_ig_missed;
+ __uint32_t xs_ig_dup;
+ __uint32_t xs_ig_reclaims;
+ __uint32_t xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
+ __uint32_t xs_log_writes;
+ __uint32_t xs_log_blocks;
+ __uint32_t xs_log_noiclogs;
+ __uint32_t xs_log_force;
+ __uint32_t xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
+ __uint32_t xs_try_logspace;
+ __uint32_t xs_sleep_logspace;
+ __uint32_t xs_push_ail;
+ __uint32_t xs_push_ail_success;
+ __uint32_t xs_push_ail_pushbuf;
+ __uint32_t xs_push_ail_pinned;
+ __uint32_t xs_push_ail_locked;
+ __uint32_t xs_push_ail_flushing;
+ __uint32_t xs_push_ail_restarts;
+ __uint32_t xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
+ __uint32_t xs_xstrat_quick;
+ __uint32_t xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
+ __uint32_t xs_write_calls;
+ __uint32_t xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
+ __uint32_t xs_attr_get;
+ __uint32_t xs_attr_set;
+ __uint32_t xs_attr_remove;
+ __uint32_t xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
+ __uint32_t xs_iflush_count;
+ __uint32_t xs_icluster_flushcnt;
+ __uint32_t xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
+ __uint32_t vn_active; /* # vnodes not on free lists */
+ __uint32_t vn_alloc; /* # times vn_alloc called */
+ __uint32_t vn_get; /* # times vn_get called */
+ __uint32_t vn_hold; /* # times vn_hold called */
+ __uint32_t vn_rele; /* # times vn_rele called */
+ __uint32_t vn_reclaim; /* # times vn_reclaim called */
+ __uint32_t vn_remove; /* # times vn_remove called */
+ __uint32_t vn_free; /* # times vn_free called */
+#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
+ __uint32_t pb_get;
+ __uint32_t pb_create;
+ __uint32_t pb_get_locked;
+ __uint32_t pb_get_locked_waited;
+ __uint32_t pb_busy_locked;
+ __uint32_t pb_miss_locked;
+ __uint32_t pb_page_retries;
+ __uint32_t pb_page_found;
+ __uint32_t pb_get_read;
+/* Extra precision counters */
+ __uint64_t xs_xstrat_bytes;
+ __uint64_t xs_write_bytes;
+ __uint64_t xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern void xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else /* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static __inline void xfs_init_procfs(void) { };
+static __inline void xfs_cleanup_procfs(void) { };
+
+#endif /* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
new file mode 100644
index 00000000000..53dc658cafa
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -0,0 +1,912 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_version.h"
+#include "xfs_ioctl32.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/mount.h>
+#include <linux/writeback.h>
+
+STATIC struct quotactl_ops linvfs_qops;
+STATIC struct super_operations linvfs_sops;
+STATIC kmem_zone_t *linvfs_inode_zone;
+
+STATIC struct xfs_mount_args *
+xfs_args_allocate(
+ struct super_block *sb)
+{
+ struct xfs_mount_args *args;
+
+ args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+ args->logbufs = args->logbufsize = -1;
+ strncpy(args->fsname, sb->s_id, MAXNAMELEN);
+
+ /* Copy the already-parsed mount(2) flags we're interested in */
+ if (sb->s_flags & MS_NOATIME)
+ args->flags |= XFSMNT_NOATIME;
+ if (sb->s_flags & MS_DIRSYNC)
+ args->flags |= XFSMNT_DIRSYNC;
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ args->flags |= XFSMNT_WSYNC;
+
+ /* Default to 32 bit inodes on Linux all the time */
+ args->flags |= XFSMNT_32BITINODES;
+
+ return args;
+}
+
+__uint64_t
+xfs_max_file_offset(
+ unsigned int blockshift)
+{
+ unsigned int pagefactor = 1;
+ unsigned int bitshift = BITS_PER_LONG - 1;
+
+ /* Figure out maximum filesize, on Linux this can depend on
+ * the filesystem blocksize (on 32 bit platforms).
+ * __block_prepare_write does this in an [unsigned] long...
+ * page->index << (PAGE_CACHE_SHIFT - bbits)
+ * So, for page sized blocks (4K on 32 bit platforms),
+ * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+ * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+ * but for smaller blocksizes it is less (bbits = log2 bsize).
+ * Note1: get_block_t takes a long (implicit cast from above)
+ * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+ * can optionally convert the [unsigned] long from above into
+ * an [unsigned] long long.
+ */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBD)
+ ASSERT(sizeof(sector_t) == 8);
+ pagefactor = PAGE_CACHE_SIZE;
+ bitshift = BITS_PER_LONG;
+# else
+ pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+ return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC __inline__ void
+xfs_set_inodeops(
+ struct inode *inode)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+
+ if (vp->v_type == VNON) {
+ vn_mark_bad(vp);
+ } else if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &linvfs_file_inode_operations;
+ inode->i_fop = &linvfs_file_operations;
+ inode->i_mapping->a_ops = &linvfs_aops;
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &linvfs_dir_inode_operations;
+ inode->i_fop = &linvfs_dir_operations;
+ } else if (S_ISLNK(inode->i_mode)) {
+ inode->i_op = &linvfs_symlink_inode_operations;
+ if (inode->i_blocks)
+ inode->i_mapping->a_ops = &linvfs_aops;
+ } else {
+ inode->i_op = &linvfs_file_inode_operations;
+ init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ }
+}
+
+STATIC __inline__ void
+xfs_revalidate_inode(
+ xfs_mount_t *mp,
+ vnode_t *vp,
+ xfs_inode_t *ip)
+{
+ struct inode *inode = LINVFS_GET_IP(vp);
+
+ inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+ inode->i_nlink = ip->i_d.di_nlink;
+ inode->i_uid = ip->i_d.di_uid;
+ inode->i_gid = ip->i_d.di_gid;
+ if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+ inode->i_rdev = 0;
+ } else {
+ xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
+ inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+ }
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_generation = ip->i_d.di_gen;
+ i_size_write(inode, ip->i_d.di_size);
+ inode->i_blocks =
+ XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+ inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
+ inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+ inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+ inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+ inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+ inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+ if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+ inode->i_flags |= S_SYNC;
+ else
+ inode->i_flags &= ~S_SYNC;
+ if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+ inode->i_flags |= S_NOATIME;
+ else
+ inode->i_flags &= ~S_NOATIME;
+ vp->v_flag &= ~VMODIFIED;
+}
+
+void
+xfs_initialize_vnode(
+ bhv_desc_t *bdp,
+ vnode_t *vp,
+ bhv_desc_t *inode_bhv,
+ int unlock)
+{
+ xfs_inode_t *ip = XFS_BHVTOI(inode_bhv);
+ struct inode *inode = LINVFS_GET_IP(vp);
+
+ if (!inode_bhv->bd_vobj) {
+ vp->v_vfsp = bhvtovfs(bdp);
+ bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
+ bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
+ }
+
+ /*
+ * We need to set the ops vectors, and unlock the inode, but if
+ * we have been called during the new inode create process, it is
+ * too early to fill in the Linux inode. We will get called a
+ * second time once the inode is properly set up, and then we can
+ * finish our work.
+ */
+ if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
+ vp->v_type = IFTOVT(ip->i_d.di_mode);
+ xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
+ xfs_set_inodeops(inode);
+
+ ip->i_flags &= ~XFS_INEW;
+ barrier();
+
+ unlock_new_inode(inode);
+ }
+}
+
+int
+xfs_blkdev_get(
+ xfs_mount_t *mp,
+ const char *name,
+ struct block_device **bdevp)
+{
+ int error = 0;
+
+ *bdevp = open_bdev_excl(name, 0, mp);
+ if (IS_ERR(*bdevp)) {
+ error = PTR_ERR(*bdevp);
+ printk("XFS: Invalid device [%s], error=%d\n", name, error);
+ }
+
+ return -error;
+}
+
+void
+xfs_blkdev_put(
+ struct block_device *bdev)
+{
+ if (bdev)
+ close_bdev_excl(bdev);
+}
+
+
+STATIC struct inode *
+linvfs_alloc_inode(
+ struct super_block *sb)
+{
+ vnode_t *vp;
+
+ vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone,
+ kmem_flags_convert(KM_SLEEP));
+ if (!vp)
+ return NULL;
+ return LINVFS_GET_IP(vp);
+}
+
+STATIC void
+linvfs_destroy_inode(
+ struct inode *inode)
+{
+ kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+}
+
+STATIC void
+init_once(
+ void *data,
+ kmem_cache_t *cachep,
+ unsigned long flags)
+{
+ vnode_t *vp = (vnode_t *)data;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ inode_init_once(LINVFS_GET_IP(vp));
+}
+
+STATIC int
+init_inodecache( void )
+{
+ linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+ sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
+ init_once, NULL);
+ if (linvfs_inode_zone == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+STATIC void
+destroy_inodecache( void )
+{
+ if (kmem_cache_destroy(linvfs_inode_zone))
+ printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+}
+
+/*
+ * Attempt to flush the inode, this will actually fail
+ * if the inode is pinned, but we dirty the inode again
+ * at the point when it is unpinned after a log write,
+ * since this is when the inode itself becomes flushable.
+ */
+STATIC int
+linvfs_write_inode(
+ struct inode *inode,
+ int sync)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+ int error = 0, flags = FLUSH_INODE;
+
+ if (vp) {
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+ if (sync)
+ flags |= FLUSH_SYNC;
+ VOP_IFLUSH(vp, flags, error);
+ if (error == EAGAIN) {
+ if (sync)
+ VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
+ else
+ error = 0;
+ }
+ }
+
+ return -error;
+}
+
+STATIC void
+linvfs_clear_inode(
+ struct inode *inode)
+{
+ vnode_t *vp = LINVFS_GET_VP(inode);
+
+ if (vp) {
+ vn_rele(vp);
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+ /*
+ * Do all our cleanup, and remove this vnode.
+ */
+ vn_remove(vp);
+ }
+}
+
+
+/*
+ * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+ * Doing this has two advantages:
+ * - It saves on stack space, which is tight in certain situations
+ * - It can be used (with care) as a mechanism to avoid deadlocks.
+ * Flushing while allocating in a full filesystem requires both.
+ */
+STATIC void
+xfs_syncd_queue_work(
+ struct vfs *vfs,
+ void *data,
+ void (*syncer)(vfs_t *, void *))
+{
+ vfs_sync_work_t *work;
+
+ work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
+ INIT_LIST_HEAD(&work->w_list);
+ work->w_syncer = syncer;
+ work->w_data = data;
+ work->w_vfs = vfs;
+ spin_lock(&vfs->vfs_sync_lock);
+ list_add_tail(&work->w_list, &vfs->vfs_sync_list);
+ spin_unlock(&vfs->vfs_sync_lock);
+ wake_up_process(vfs->vfs_sync_task);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations. At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room...
+ */
+STATIC void
+xfs_flush_inode_work(
+ vfs_t *vfs,
+ void *inode)
+{
+ filemap_flush(((struct inode *)inode)->i_mapping);
+ iput((struct inode *)inode);
+}
+
+void
+xfs_flush_inode(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+ struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
+
+ igrab(inode);
+ xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
+ delay(HZ/2);
+}
+
+/*
+ * This is the "bigger hammer" version of xfs_flush_inode_work...
+ * (IOW, "If at first you don't succeed, use a Bigger Hammer").
+ */
+STATIC void
+xfs_flush_device_work(
+ vfs_t *vfs,
+ void *inode)
+{
+ sync_blockdev(vfs->vfs_super->s_bdev);
+ iput((struct inode *)inode);
+}
+
+void
+xfs_flush_device(
+ xfs_inode_t *ip)
+{
+ struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip));
+ struct vfs *vfs = XFS_MTOVFS(ip->i_mount);
+
+ igrab(inode);
+ xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
+ delay(HZ/2);
+ xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+}
+
+#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
+STATIC void
+vfs_sync_worker(
+ vfs_t *vfsp,
+ void *unused)
+{
+ int error;
+
+ if (!(vfsp->vfs_flag & VFS_RDONLY))
+ VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
+ vfsp->vfs_sync_seq++;
+ wmb();
+ wake_up(&vfsp->vfs_wait_single_sync_task);
+}
+
+STATIC int
+xfssyncd(
+ void *arg)
+{
+ long timeleft;
+ vfs_t *vfsp = (vfs_t *) arg;
+ struct list_head tmp;
+ struct vfs_sync_work *work, *n;
+
+ daemonize("xfssyncd");
+
+ vfsp->vfs_sync_work.w_vfs = vfsp;
+ vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
+ vfsp->vfs_sync_task = current;
+ wmb();
+ wake_up(&vfsp->vfs_wait_sync_task);
+
+ INIT_LIST_HEAD(&tmp);
+ timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ timeleft = schedule_timeout(timeleft);
+ /* swsusp */
+ try_to_freeze(PF_FREEZE);
+ if (vfsp->vfs_flag & VFS_UMOUNT)
+ break;
+
+ spin_lock(&vfsp->vfs_sync_lock);
+ /*
+ * We can get woken by laptop mode, to do a sync -
+ * that's the (only!) case where the list would be
+ * empty with time remaining.
+ */
+ if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
+ if (!timeleft)
+ timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
+ list_add_tail(&vfsp->vfs_sync_work.w_list,
+ &vfsp->vfs_sync_list);
+ }
+ list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
+ list_move(&work->w_list, &tmp);
+ spin_unlock(&vfsp->vfs_sync_lock);
+
+ list_for_each_entry_safe(work, n, &tmp, w_list) {
+ (*work->w_syncer)(vfsp, work->w_data);
+ list_del(&work->w_list);
+ if (work == &vfsp->vfs_sync_work)
+ continue;
+ kmem_free(work, sizeof(struct vfs_sync_work));
+ }
+ }
+
+ vfsp->vfs_sync_task = NULL;
+ wmb();
+ wake_up(&vfsp->vfs_wait_sync_task);
+
+ return 0;
+}
+
+STATIC int
+linvfs_start_syncd(
+ vfs_t *vfsp)
+{
+ int pid;
+
+ pid = kernel_thread(xfssyncd, (void *) vfsp,
+ CLONE_VM | CLONE_FS | CLONE_FILES);
+ if (pid < 0)
+ return -pid;
+ wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+ return 0;
+}
+
+STATIC void
+linvfs_stop_syncd(
+ vfs_t *vfsp)
+{
+ vfsp->vfs_flag |= VFS_UMOUNT;
+ wmb();
+
+ wake_up_process(vfsp->vfs_sync_task);
+ wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+}
+
+STATIC void
+linvfs_put_super(
+ struct super_block *sb)
+{
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ linvfs_stop_syncd(vfsp);
+ VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
+ if (!error)
+ VFS_UNMOUNT(vfsp, 0, NULL, error);
+ if (error) {
+ printk("XFS unmount got error %d\n", error);
+ printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
+ return;
+ }
+
+ vfs_deallocate(vfsp);
+}
+
+STATIC void
+linvfs_write_super(
+ struct super_block *sb)
+{
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ if (sb->s_flags & MS_RDONLY) {
+ sb->s_dirt = 0; /* paranoia */
+ return;
+ }
+ /* Push the log and superblock a little */
+ VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
+ sb->s_dirt = 0;
+}
+
+STATIC int
+linvfs_sync_super(
+ struct super_block *sb,
+ int wait)
+{
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+ int flags = SYNC_FSDATA;
+
+ if (wait)
+ flags |= SYNC_WAIT;
+
+ VFS_SYNC(vfsp, flags, NULL, error);
+ sb->s_dirt = 0;
+
+ if (unlikely(laptop_mode)) {
+ int prev_sync_seq = vfsp->vfs_sync_seq;
+
+ /*
+ * The disk must be active because we're syncing.
+ * We schedule xfssyncd now (now that the disk is
+ * active) instead of later (when it might not be).
+ */
+ wake_up_process(vfsp->vfs_sync_task);
+ /*
+ * We have to wait for the sync iteration to complete.
+ * If we don't, the disk activity caused by the sync
+ * will come after the sync is completed, and that
+ * triggers another sync from laptop mode.
+ */
+ wait_event(vfsp->vfs_wait_single_sync_task,
+ vfsp->vfs_sync_seq != prev_sync_seq);
+ }
+
+ return -error;
+}
+
+STATIC int
+linvfs_statfs(
+ struct super_block *sb,
+ struct kstatfs *statp)
+{
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ VFS_STATVFS(vfsp, statp, NULL, error);
+ return -error;
+}
+
+STATIC int
+linvfs_remount(
+ struct super_block *sb,
+ int *flags,
+ char *options)
+{
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ struct xfs_mount_args *args = xfs_args_allocate(sb);
+ int error;
+
+ VFS_PARSEARGS(vfsp, options, args, 1, error);
+ if (!error)
+ VFS_MNTUPDATE(vfsp, flags, args, error);
+ kmem_free(args, sizeof(*args));
+ return -error;
+}
+
+STATIC void
+linvfs_freeze_fs(
+ struct super_block *sb)
+{
+ VFS_FREEZE(LINVFS_GET_VFS(sb));
+}
+
+STATIC int
+linvfs_show_options(
+ struct seq_file *m,
+ struct vfsmount *mnt)
+{
+ struct vfs *vfsp = LINVFS_GET_VFS(mnt->mnt_sb);
+ int error;
+
+ VFS_SHOWARGS(vfsp, m, error);
+ return error;
+}
+
+STATIC int
+linvfs_getxstate(
+ struct super_block *sb,
+ struct fs_quota_stat *fqs)
+{
+ struct vfs *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
+ return -error;
+}
+
+STATIC int
+linvfs_setxstate(
+ struct super_block *sb,
+ unsigned int flags,
+ int op)
+{
+ struct vfs *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
+ return -error;
+}
+
+STATIC int
+linvfs_getxquota(
+ struct super_block *sb,
+ int type,
+ qid_t id,
+ struct fs_disk_quota *fdq)
+{
+ struct vfs *vfsp = LINVFS_GET_VFS(sb);
+ int error, getmode;
+
+ getmode = (type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETQUOTA;
+ VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
+ return -error;
+}
+
+STATIC int
+linvfs_setxquota(
+ struct super_block *sb,
+ int type,
+ qid_t id,
+ struct fs_disk_quota *fdq)
+{
+ struct vfs *vfsp = LINVFS_GET_VFS(sb);
+ int error, setmode;
+
+ setmode = (type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETQLIM;
+ VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
+ return -error;
+}
+
+STATIC int
+linvfs_fill_super(
+ struct super_block *sb,
+ void *data,
+ int silent)
+{
+ vnode_t *rootvp;
+ struct vfs *vfsp = vfs_allocate();
+ struct xfs_mount_args *args = xfs_args_allocate(sb);
+ struct kstatfs statvfs;
+ int error, error2;
+
+ vfsp->vfs_super = sb;
+ LINVFS_SET_VFS(sb, vfsp);
+ if (sb->s_flags & MS_RDONLY)
+ vfsp->vfs_flag |= VFS_RDONLY;
+ bhv_insert_all_vfsops(vfsp);
+
+ VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
+ if (error) {
+ bhv_remove_all_vfsops(vfsp, 1);
+ goto fail_vfsop;
+ }
+
+ sb_min_blocksize(sb, BBSIZE);
+#ifdef CONFIG_XFS_EXPORT
+ sb->s_export_op = &linvfs_export_ops;
+#endif
+ sb->s_qcop = &linvfs_qops;
+ sb->s_op = &linvfs_sops;
+
+ VFS_MOUNT(vfsp, args, NULL, error);
+ if (error) {
+ bhv_remove_all_vfsops(vfsp, 1);
+ goto fail_vfsop;
+ }
+
+ VFS_STATVFS(vfsp, &statvfs, NULL, error);
+ if (error)
+ goto fail_unmount;
+
+ sb->s_dirt = 1;
+ sb->s_magic = statvfs.f_type;
+ sb->s_blocksize = statvfs.f_bsize;
+ sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
+ sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+ sb->s_time_gran = 1;
+ set_posix_acl_flag(sb);
+
+ VFS_ROOT(vfsp, &rootvp, error);
+ if (error)
+ goto fail_unmount;
+
+ sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
+ if (!sb->s_root) {
+ error = ENOMEM;
+ goto fail_vnrele;
+ }
+ if (is_bad_inode(sb->s_root->d_inode)) {
+ error = EINVAL;
+ goto fail_vnrele;
+ }
+ if ((error = linvfs_start_syncd(vfsp)))
+ goto fail_vnrele;
+ vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
+
+ kmem_free(args, sizeof(*args));
+ return 0;
+
+fail_vnrele:
+ if (sb->s_root) {
+ dput(sb->s_root);
+ sb->s_root = NULL;
+ } else {
+ VN_RELE(rootvp);
+ }
+
+fail_unmount:
+ VFS_UNMOUNT(vfsp, 0, NULL, error2);
+
+fail_vfsop:
+ vfs_deallocate(vfsp);
+ kmem_free(args, sizeof(*args));
+ return -error;
+}
+
+STATIC struct super_block *
+linvfs_get_sb(
+ struct file_system_type *fs_type,
+ int flags,
+ const char *dev_name,
+ void *data)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
+}
+
+STATIC struct super_operations linvfs_sops = {
+ .alloc_inode = linvfs_alloc_inode,
+ .destroy_inode = linvfs_destroy_inode,
+ .write_inode = linvfs_write_inode,
+ .clear_inode = linvfs_clear_inode,
+ .put_super = linvfs_put_super,
+ .write_super = linvfs_write_super,
+ .sync_fs = linvfs_sync_super,
+ .write_super_lockfs = linvfs_freeze_fs,
+ .statfs = linvfs_statfs,
+ .remount_fs = linvfs_remount,
+ .show_options = linvfs_show_options,
+};
+
+STATIC struct quotactl_ops linvfs_qops = {
+ .get_xstate = linvfs_getxstate,
+ .set_xstate = linvfs_setxstate,
+ .get_xquota = linvfs_getxquota,
+ .set_xquota = linvfs_setxquota,
+};
+
+STATIC struct file_system_type xfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "xfs",
+ .get_sb = linvfs_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+
+STATIC int __init
+init_xfs_fs( void )
+{
+ int error;
+ struct sysinfo si;
+ static char message[] __initdata = KERN_INFO \
+ XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
+
+ printk(message);
+
+ si_meminfo(&si);
+ xfs_physmem = si.totalram;
+
+ ktrace_init(64);
+
+ error = init_inodecache();
+ if (error < 0)
+ goto undo_inodecache;
+
+ error = pagebuf_init();
+ if (error < 0)
+ goto undo_pagebuf;
+
+ vn_init();
+ xfs_init();
+ uuid_init();
+ vfs_initquota();
+
+ error = register_filesystem(&xfs_fs_type);
+ if (error)
+ goto undo_register;
+ XFS_DM_INIT(&xfs_fs_type);
+ return 0;
+
+undo_register:
+ pagebuf_terminate();
+
+undo_pagebuf:
+ destroy_inodecache();
+
+undo_inodecache:
+ return error;
+}
+
+STATIC void __exit
+exit_xfs_fs( void )
+{
+ vfs_exitquota();
+ XFS_DM_EXIT(&xfs_fs_type);
+ unregister_filesystem(&xfs_fs_type);
+ xfs_cleanup();
+ pagebuf_terminate();
+ destroy_inodecache();
+ ktrace_uninit();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
new file mode 100644
index 00000000000..ec7e0035c73
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#ifdef CONFIG_XFS_DMAPI
+# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops)
+# define vfs_initdmapi() dmapi_init()
+# define vfs_exitdmapi() dmapi_uninit()
+#else
+# define vfs_insertdmapi(vfs) do { } while (0)
+# define vfs_initdmapi() do { } while (0)
+# define vfs_exitdmapi() do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_QUOTA
+# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops)
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota() xfs_qm_init()
+# define vfs_exitquota() xfs_qm_exit()
+#else
+# define vfs_insertquota(vfs) do { } while (0)
+# define vfs_initquota() do { } while (0)
+# define vfs_exitquota() do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING "ACLs, "
+# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb) do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_SECURITY
+# define XFS_SECURITY_STRING "security attributes, "
+# define ENOSECURITY 0
+#else
+# define XFS_SECURITY_STRING
+# define ENOSECURITY EOPNOTSUPP
+#endif
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+# define XFS_BIGFS_STRING "large block/inode numbers, "
+# else
+# define XFS_BIGFS_STRING "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef CONFIG_XFS_TRACE
+# define XFS_TRACE_STRING "tracing, "
+#else
+# define XFS_TRACE_STRING
+#endif
+
+#ifdef CONFIG_XFS_DMAPI
+# define XFS_DMAPI_STRING "dmapi support, "
+#else
+# define XFS_DMAPI_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING "debug"
+#else
+# define XFS_DBG_STRING "no debug"
+#endif
+
+#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
+ XFS_SECURITY_STRING \
+ XFS_REALTIME_STRING \
+ XFS_BIGFS_STRING \
+ XFS_TRACE_STRING \
+ XFS_DMAPI_STRING \
+ XFS_DBG_STRING /* DBG must be last */
+
+#define LINVFS_GET_VFS(s) \
+ (vfs_t *)((s)->s_fs_info)
+#define LINVFS_SET_VFS(s, vfsp) \
+ ((s)->s_fs_info = vfsp)
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_initialize_vnode(bhv_desc_t *, vnode_t *, bhv_desc_t *, int);
+
+extern void xfs_flush_inode(struct xfs_inode *);
+extern void xfs_flush_device(struct xfs_inode *);
+
+extern int xfs_blkdev_get(struct xfs_mount *, const char *,
+ struct block_device **);
+extern void xfs_blkdev_put(struct block_device *);
+
+extern struct export_operations linvfs_export_ops;
+
+#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
new file mode 100644
index 00000000000..0dc010356f4
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_rw.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+
+
+static struct ctl_table_header *xfs_table_header;
+
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+ ctl_table *ctl,
+ int write,
+ struct file *filp,
+ void __user *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ int c, ret, *valp = ctl->data;
+ __uint32_t vn_active;
+
+ ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+
+ if (!ret && write && *valp) {
+ printk("XFS Clearing xfsstats\n");
+ for (c = 0; c < NR_CPUS; c++) {
+ if (!cpu_possible(c)) continue;
+ preempt_disable();
+ /* save vn_active, it's a universal truth! */
+ vn_active = per_cpu(xfsstats, c).vn_active;
+ memset(&per_cpu(xfsstats, c), 0,
+ sizeof(struct xfsstats));
+ per_cpu(xfsstats, c).vn_active = vn_active;
+ preempt_enable();
+ }
+ xfs_stats_clear = 0;
+ }
+
+ return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+STATIC ctl_table xfs_table[] = {
+ {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
+
+ {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
+
+ {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
+
+ {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
+
+ {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.error_level.min, &xfs_params.error_level.max},
+
+ {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
+
+ {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
+
+ {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
+
+ {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
+
+ {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
+
+ {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
+
+ {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
+
+ {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
+ sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+ &sysctl_intvec, NULL,
+ &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
+
+ /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+ {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
+ sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
+ &sysctl_intvec, NULL,
+ &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
+#endif /* CONFIG_PROC_FS */
+
+ {0}
+};
+
+STATIC ctl_table xfs_dir_table[] = {
+ {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
+ {0}
+};
+
+STATIC ctl_table xfs_root_table[] = {
+ {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table},
+ {0}
+};
+
+void
+xfs_sysctl_register(void)
+{
+ xfs_table_header = register_sysctl_table(xfs_root_table, 1);
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+ if (xfs_table_header)
+ unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
new file mode 100644
index 00000000000..a39a95020a5
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2001-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+ int min;
+ int val;
+ int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+ xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
+ xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
+ * not a member of parent dir GID. */
+ xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
+ xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */
+ xfs_sysctl_val_t error_level; /* Degree of reporting for problems */
+ xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */
+ xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */
+ xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
+ xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+ xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+ xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
+ xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
+ xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
+ xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered. These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0 No error reports
+ * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any
+ * additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+ /* XFS_REFCACHE_SIZE = 1 */
+ /* XFS_REFCACHE_PURGE = 2 */
+ XFS_RESTRICT_CHOWN = 3,
+ XFS_SGID_INHERIT = 4,
+ XFS_SYMLINK_MODE = 5,
+ XFS_PANIC_MASK = 6,
+ XFS_ERRLEVEL = 7,
+ XFS_SYNCD_TIMER = 8,
+ /* XFS_PROBE_DMAPI = 9 */
+ /* XFS_PROBE_IOOPS = 10 */
+ /* XFS_PROBE_QUOTA = 11 */
+ XFS_STATS_CLEAR = 12,
+ XFS_INHERIT_SYNC = 13,
+ XFS_INHERIT_NODUMP = 14,
+ XFS_INHERIT_NOATIME = 15,
+ XFS_BUF_TIMER = 16,
+ XFS_BUF_AGE = 17,
+ /* XFS_IO_BYPASS = 18 */
+ XFS_INHERIT_NOSYM = 19,
+ XFS_ROTORSTEP = 20,
+};
+
+extern xfs_param_t xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern void xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register() do { } while (0)
+# define xfs_sysctl_unregister() do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
new file mode 100644
index 00000000000..96f96394417
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_version.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Dummy file that can contain a timestamp to put into the
+ * XFS init string, to help users keep track of what they're
+ * running
+ */
+
+#ifndef __XFS_VERSION_H__
+#define __XFS_VERSION_H__
+
+#define XFS_VERSION_STRING "SGI XFS"
+
+#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vfs.c b/fs/xfs/linux-2.6/xfs_vfs.c
new file mode 100644
index 00000000000..669c6164495
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_macros.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_clnt.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_imap.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+
+int
+vfs_mount(
+ struct bhv_desc *bdp,
+ struct xfs_mount_args *args,
+ struct cred *cr)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_mount)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_mount)(next, args, cr));
+}
+
+int
+vfs_parseargs(
+ struct bhv_desc *bdp,
+ char *s,
+ struct xfs_mount_args *args,
+ int f)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_parseargs)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_parseargs)(next, s, args, f));
+}
+
+int
+vfs_showargs(
+ struct bhv_desc *bdp,
+ struct seq_file *m)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_showargs)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_showargs)(next, m));
+}
+
+int
+vfs_unmount(
+ struct bhv_desc *bdp,
+ int fl,
+ struct cred *cr)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_unmount)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_unmount)(next, fl, cr));
+}
+
+int
+vfs_mntupdate(
+ struct bhv_desc *bdp,
+ int *fl,
+ struct xfs_mount_args *args)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_mntupdate)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_mntupdate)(next, fl, args));
+}
+
+int
+vfs_root(
+ struct bhv_desc *bdp,
+ struct vnode **vpp)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_root)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_root)(next, vpp));
+}
+
+int
+vfs_statvfs(
+ struct bhv_desc *bdp,
+ xfs_statfs_t *sp,
+ struct vnode *vp)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_statvfs)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_statvfs)(next, sp, vp));
+}
+
+int
+vfs_sync(
+ struct bhv_desc *bdp,
+ int fl,
+ struct cred *cr)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_sync)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_sync)(next, fl, cr));
+}
+
+int
+vfs_vget(
+ struct bhv_desc *bdp,
+ struct vnode **vpp,
+ struct fid *fidp)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_vget)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_vget)(next, vpp, fidp));
+}
+
+int
+vfs_dmapiops(
+ struct bhv_desc *bdp,
+ caddr_t addr)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_dmapiops)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_dmapiops)(next, addr));
+}
+
+int
+vfs_quotactl(
+ struct bhv_desc *bdp,
+ int cmd,
+ int id,
+ caddr_t addr)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_quotactl)
+ next = BHV_NEXT(next);
+ return ((*bhvtovfsops(next)->vfs_quotactl)(next, cmd, id, addr));
+}
+
+void
+vfs_init_vnode(
+ struct bhv_desc *bdp,
+ struct vnode *vp,
+ struct bhv_desc *bp,
+ int unlock)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_init_vnode)
+ next = BHV_NEXT(next);
+ ((*bhvtovfsops(next)->vfs_init_vnode)(next, vp, bp, unlock));
+}
+
+void
+vfs_force_shutdown(
+ struct bhv_desc *bdp,
+ int fl,
+ char *file,
+ int line)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_force_shutdown)
+ next = BHV_NEXT(next);
+ ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line));
+}
+
+void
+vfs_freeze(
+ struct bhv_desc *bdp)
+{
+ struct bhv_desc *next = bdp;
+
+ ASSERT(next);
+ while (! (bhvtovfsops(next))->vfs_freeze)
+ next = BHV_NEXT(next);
+ ((*bhvtovfsops(next)->vfs_freeze)(next));
+}
+
+vfs_t *
+vfs_allocate( void )
+{
+ struct vfs *vfsp;
+
+ vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP);
+ bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
+ INIT_LIST_HEAD(&vfsp->vfs_sync_list);
+ spin_lock_init(&vfsp->vfs_sync_lock);
+ init_waitqueue_head(&vfsp->vfs_wait_sync_task);
+ init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
+ return vfsp;
+}
+
+void
+vfs_deallocate(
+ struct vfs *vfsp)
+{
+ bhv_head_destroy(VFS_BHVHEAD(vfsp));
+ kmem_free(vfsp, sizeof(vfs_t));
+}
+
+void
+vfs_insertops(
+ struct vfs *vfsp,
+ struct bhv_vfsops *vfsops)
+{
+ struct bhv_desc *bdp;
+
+ bdp = kmem_alloc(sizeof(struct bhv_desc), KM_SLEEP);
+ bhv_desc_init(bdp, NULL, vfsp, vfsops);
+ bhv_insert(&vfsp->vfs_bh, bdp);
+}
+
+void
+vfs_insertbhv(
+ struct vfs *vfsp,
+ struct bhv_desc *bdp,
+ struct vfsops *vfsops,
+ void *mount)
+{
+ bhv_desc_init(bdp, mount, vfsp, vfsops);
+ bhv_insert_initial(&vfsp->vfs_bh, bdp);
+}
+
+void
+bhv_remove_vfsops(
+ struct vfs *vfsp,
+ int pos)
+{
+ struct bhv_desc *bhv;
+
+ bhv = bhv_lookup_range(&vfsp->vfs_bh, pos, pos);
+ if (!bhv)
+ return;
+ bhv_remove(&vfsp->vfs_bh, bhv);
+ kmem_free(bhv, sizeof(*bhv));
+}
+
+void
+bhv_remove_all_vfsops(
+ struct vfs *vfsp,
+ int freebase)
+{
+ struct xfs_mount *mp;
+
+ bhv_remove_vfsops(vfsp, VFS_POSITION_QM);
+ bhv_remove_vfsops(vfsp, VFS_POSITION_DM);
+ if (!freebase)
+ return;
+ mp = XFS_BHVTOM(bhv_lookup(VFS_BHVHEAD(vfsp), &xfs_vfsops));
+ VFS_REMOVEBHV(vfsp, &mp->m_bhv);
+ xfs_mount_free(mp, 0);
+}
+
+void
+bhv_insert_all_vfsops(
+ struct vfs *vfsp)
+{
+ struct xfs_mount *mp;
+
+ mp = xfs_mount_init();
+ vfs_insertbhv(vfsp, &mp->m_bhv, &xfs_vfsops, mp);
+ vfs_insertdmapi(vfsp);
+ vfs_insertquota(vfsp);
+}
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
new file mode 100644
index 00000000000..76493991578
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_VFS_H__
+#define __XFS_VFS_H__
+
+#include <linux/vfs.h>
+#include "xfs_fs.h"
+
+struct fid;
+struct vfs;
+struct cred;
+struct vnode;
+struct kstatfs;
+struct seq_file;
+struct super_block;
+struct xfs_mount_args;
+
+typedef struct kstatfs xfs_statfs_t;
+
+typedef struct vfs_sync_work {
+ struct list_head w_list;
+ struct vfs *w_vfs;
+ void *w_data; /* syncer routine argument */
+ void (*w_syncer)(struct vfs *, void *);
+} vfs_sync_work_t;
+
+typedef struct vfs {
+ u_int vfs_flag; /* flags */
+ xfs_fsid_t vfs_fsid; /* file system ID */
+ xfs_fsid_t *vfs_altfsid; /* An ID fixed for life of FS */
+ bhv_head_t vfs_bh; /* head of vfs behavior chain */
+ struct super_block *vfs_super; /* generic superblock pointer */
+ struct task_struct *vfs_sync_task; /* generalised sync thread */
+ vfs_sync_work_t vfs_sync_work; /* work item for VFS_SYNC */
+ struct list_head vfs_sync_list; /* sync thread work item list */
+ spinlock_t vfs_sync_lock; /* work item list lock */
+ int vfs_sync_seq; /* sync thread generation no. */
+ wait_queue_head_t vfs_wait_single_sync_task;
+ wait_queue_head_t vfs_wait_sync_task;
+} vfs_t;
+
+#define vfs_fbhv vfs_bh.bh_first /* 1st on vfs behavior chain */
+
+#define bhvtovfs(bdp) ( (struct vfs *)BHV_VOBJ(bdp) )
+#define bhvtovfsops(bdp) ( (struct vfsops *)BHV_OPS(bdp) )
+#define VFS_BHVHEAD(vfs) ( &(vfs)->vfs_bh )
+#define VFS_REMOVEBHV(vfs, bdp) ( bhv_remove(VFS_BHVHEAD(vfs), bdp) )
+
+#define VFS_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
+#define VFS_POSITION_TOP BHV_POSITION_TOP /* chain top */
+#define VFS_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
+
+typedef enum {
+ VFS_BHV_UNKNOWN, /* not specified */
+ VFS_BHV_XFS, /* xfs */
+ VFS_BHV_DM, /* data migration */
+ VFS_BHV_QM, /* quota manager */
+ VFS_BHV_IO, /* IO path */
+ VFS_BHV_END /* housekeeping end-of-range */
+} vfs_bhv_t;
+
+#define VFS_POSITION_XFS (BHV_POSITION_BASE)
+#define VFS_POSITION_DM (VFS_POSITION_BASE+10)
+#define VFS_POSITION_QM (VFS_POSITION_BASE+20)
+#define VFS_POSITION_IO (VFS_POSITION_BASE+30)
+
+#define VFS_RDONLY 0x0001 /* read-only vfs */
+#define VFS_GRPID 0x0002 /* group-ID assigned from directory */
+#define VFS_DMI 0x0004 /* filesystem has the DMI enabled */
+#define VFS_UMOUNT 0x0008 /* unmount in progress */
+#define VFS_END 0x0008 /* max flag */
+
+#define SYNC_ATTR 0x0001 /* sync attributes */
+#define SYNC_CLOSE 0x0002 /* close file system down */
+#define SYNC_DELWRI 0x0004 /* look at delayed writes */
+#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
+#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
+#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
+#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
+#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
+
+typedef int (*vfs_mount_t)(bhv_desc_t *,
+ struct xfs_mount_args *, struct cred *);
+typedef int (*vfs_parseargs_t)(bhv_desc_t *, char *,
+ struct xfs_mount_args *, int);
+typedef int (*vfs_showargs_t)(bhv_desc_t *, struct seq_file *);
+typedef int (*vfs_unmount_t)(bhv_desc_t *, int, struct cred *);
+typedef int (*vfs_mntupdate_t)(bhv_desc_t *, int *,
+ struct xfs_mount_args *);
+typedef int (*vfs_root_t)(bhv_desc_t *, struct vnode **);
+typedef int (*vfs_statvfs_t)(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+typedef int (*vfs_sync_t)(bhv_desc_t *, int, struct cred *);
+typedef int (*vfs_vget_t)(bhv_desc_t *, struct vnode **, struct fid *);
+typedef int (*vfs_dmapiops_t)(bhv_desc_t *, caddr_t);
+typedef int (*vfs_quotactl_t)(bhv_desc_t *, int, int, caddr_t);
+typedef void (*vfs_init_vnode_t)(bhv_desc_t *,
+ struct vnode *, bhv_desc_t *, int);
+typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int);
+typedef void (*vfs_freeze_t)(bhv_desc_t *);
+
+typedef struct vfsops {
+ bhv_position_t vf_position; /* behavior chain position */
+ vfs_mount_t vfs_mount; /* mount file system */
+ vfs_parseargs_t vfs_parseargs; /* parse mount options */
+ vfs_showargs_t vfs_showargs; /* unparse mount options */
+ vfs_unmount_t vfs_unmount; /* unmount file system */
+ vfs_mntupdate_t vfs_mntupdate; /* update file system options */
+ vfs_root_t vfs_root; /* get root vnode */
+ vfs_statvfs_t vfs_statvfs; /* file system statistics */
+ vfs_sync_t vfs_sync; /* flush files */
+ vfs_vget_t vfs_vget; /* get vnode from fid */
+ vfs_dmapiops_t vfs_dmapiops; /* data migration */
+ vfs_quotactl_t vfs_quotactl; /* disk quota */
+ vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */
+ vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */
+ vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */
+} vfsops_t;
+
+/*
+ * VFS's. Operates on vfs structure pointers (starts at bhv head).
+ */
+#define VHEAD(v) ((v)->vfs_fbhv)
+#define VFS_MOUNT(v, ma,cr, rv) ((rv) = vfs_mount(VHEAD(v), ma,cr))
+#define VFS_PARSEARGS(v, o,ma,f, rv) ((rv) = vfs_parseargs(VHEAD(v), o,ma,f))
+#define VFS_SHOWARGS(v, m, rv) ((rv) = vfs_showargs(VHEAD(v), m))
+#define VFS_UNMOUNT(v, f, cr, rv) ((rv) = vfs_unmount(VHEAD(v), f,cr))
+#define VFS_MNTUPDATE(v, fl, args, rv) ((rv) = vfs_mntupdate(VHEAD(v), fl, args))
+#define VFS_ROOT(v, vpp, rv) ((rv) = vfs_root(VHEAD(v), vpp))
+#define VFS_STATVFS(v, sp,vp, rv) ((rv) = vfs_statvfs(VHEAD(v), sp,vp))
+#define VFS_SYNC(v, flag,cr, rv) ((rv) = vfs_sync(VHEAD(v), flag,cr))
+#define VFS_VGET(v, vpp,fidp, rv) ((rv) = vfs_vget(VHEAD(v), vpp,fidp))
+#define VFS_DMAPIOPS(v, p, rv) ((rv) = vfs_dmapiops(VHEAD(v), p))
+#define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p))
+#define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) )
+#define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) )
+#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) )
+
+/*
+ * PVFS's. Operates on behavior descriptor pointers.
+ */
+#define PVFS_MOUNT(b, ma,cr, rv) ((rv) = vfs_mount(b, ma,cr))
+#define PVFS_PARSEARGS(b, o,ma,f, rv) ((rv) = vfs_parseargs(b, o,ma,f))
+#define PVFS_SHOWARGS(b, m, rv) ((rv) = vfs_showargs(b, m))
+#define PVFS_UNMOUNT(b, f,cr, rv) ((rv) = vfs_unmount(b, f,cr))
+#define PVFS_MNTUPDATE(b, fl, args, rv) ((rv) = vfs_mntupdate(b, fl, args))
+#define PVFS_ROOT(b, vpp, rv) ((rv) = vfs_root(b, vpp))
+#define PVFS_STATVFS(b, sp,vp, rv) ((rv) = vfs_statvfs(b, sp,vp))
+#define PVFS_SYNC(b, flag,cr, rv) ((rv) = vfs_sync(b, flag,cr))
+#define PVFS_VGET(b, vpp,fidp, rv) ((rv) = vfs_vget(b, vpp,fidp))
+#define PVFS_DMAPIOPS(b, p, rv) ((rv) = vfs_dmapiops(b, p))
+#define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p))
+#define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) )
+#define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) )
+#define PVFS_FREEZE(b) ( vfs_freeze(b) )
+
+extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *);
+extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int);
+extern int vfs_showargs(bhv_desc_t *, struct seq_file *);
+extern int vfs_unmount(bhv_desc_t *, int, struct cred *);
+extern int vfs_mntupdate(bhv_desc_t *, int *, struct xfs_mount_args *);
+extern int vfs_root(bhv_desc_t *, struct vnode **);
+extern int vfs_statvfs(bhv_desc_t *, xfs_statfs_t *, struct vnode *);
+extern int vfs_sync(bhv_desc_t *, int, struct cred *);
+extern int vfs_vget(bhv_desc_t *, struct vnode **, struct fid *);
+extern int vfs_dmapiops(bhv_desc_t *, caddr_t);
+extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t);
+extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int);
+extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int);
+extern void vfs_freeze(bhv_desc_t *);
+
+typedef struct bhv_vfsops {
+ struct vfsops bhv_common;
+ void * bhv_custom;
+} bhv_vfsops_t;
+
+#define vfs_bhv_lookup(v, id) ( bhv_lookup_range(&(v)->vfs_bh, (id), (id)) )
+#define vfs_bhv_custom(b) ( ((bhv_vfsops_t *)BHV_OPS(b))->bhv_custom )
+#define vfs_bhv_set_custom(b,o) ( (b)->bhv_custom = (void *)(o))
+#define vfs_bhv_clr_custom(b) ( (b)->bhv_custom = NULL )
+
+extern vfs_t *vfs_allocate(void);
+extern void vfs_deallocate(vfs_t *);
+extern void vfs_insertops(vfs_t *, bhv_vfsops_t *);
+extern void vfs_insertbhv(vfs_t *, bhv_desc_t *, vfsops_t *, void *);
+
+extern void bhv_insert_all_vfsops(struct vfs *);
+extern void bhv_remove_all_vfsops(struct vfs *, int);
+extern void bhv_remove_vfsops(struct vfs *, int);
+
+#define fs_frozen(vfsp) ((vfsp)->vfs_super->s_frozen)
+#define fs_check_frozen(vfsp, level) \
+ vfs_check_frozen(vfsp->vfs_super, level);
+
+#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
new file mode 100644
index 00000000000..849c61c74f3
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+
+uint64_t vn_generation; /* vnode generation number */
+DEFINE_SPINLOCK(vnumber_lock);
+
+/*
+ * Dedicated vnode inactive/reclaim sync semaphores.
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC 37
+#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
+sv_t vsync[NVSYNC];
+
+/*
+ * Translate stat(2) file types to vnode types and vice versa.
+ * Aware of numeric order of S_IFMT and vnode type values.
+ */
+enum vtype iftovt_tab[] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
+};
+
+u_short vttoif_tab[] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
+};
+
+
+void
+vn_init(void)
+{
+ register sv_t *svp;
+ register int i;
+
+ for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
+ init_sv(svp, SV_DEFAULT, "vsy", i);
+}
+
+/*
+ * Clean a vnode of filesystem-specific data and prepare it for reuse.
+ */
+STATIC int
+vn_reclaim(
+ struct vnode *vp)
+{
+ int error;
+
+ XFS_STATS_INC(vn_reclaim);
+ vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
+
+ /*
+ * Only make the VOP_RECLAIM call if there are behaviors
+ * to call.
+ */
+ if (vp->v_fbhv) {
+ VOP_RECLAIM(vp, error);
+ if (error)
+ return -error;
+ }
+ ASSERT(vp->v_fbhv == NULL);
+
+ VN_LOCK(vp);
+ vp->v_flag &= (VRECLM|VWAIT);
+ VN_UNLOCK(vp, 0);
+
+ vp->v_type = VNON;
+ vp->v_fbhv = NULL;
+
+#ifdef XFS_VNODE_TRACE
+ ktrace_free(vp->v_trace);
+ vp->v_trace = NULL;
+#endif
+
+ return 0;
+}
+
+STATIC void
+vn_wakeup(
+ struct vnode *vp)
+{
+ VN_LOCK(vp);
+ if (vp->v_flag & VWAIT)
+ sv_broadcast(vptosync(vp));
+ vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
+ VN_UNLOCK(vp, 0);
+}
+
+int
+vn_wait(
+ struct vnode *vp)
+{
+ VN_LOCK(vp);
+ if (vp->v_flag & (VINACT | VRECLM)) {
+ vp->v_flag |= VWAIT;
+ sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+ return 1;
+ }
+ VN_UNLOCK(vp, 0);
+ return 0;
+}
+
+struct vnode *
+vn_initialize(
+ struct inode *inode)
+{
+ struct vnode *vp = LINVFS_GET_VP(inode);
+
+ XFS_STATS_INC(vn_active);
+ XFS_STATS_INC(vn_alloc);
+
+ vp->v_flag = VMODIFIED;
+ spinlock_init(&vp->v_lock, "v_lock");
+
+ spin_lock(&vnumber_lock);
+ if (!++vn_generation) /* v_number shouldn't be zero */
+ vn_generation++;
+ vp->v_number = vn_generation;
+ spin_unlock(&vnumber_lock);
+
+ ASSERT(VN_CACHED(vp) == 0);
+
+ /* Initialize the first behavior and the behavior chain head. */
+ vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
+
+#ifdef XFS_VNODE_TRACE
+ vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
+ printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
+#endif /* XFS_VNODE_TRACE */
+
+ vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
+ return vp;
+}
+
+/*
+ * Get a reference on a vnode.
+ */
+vnode_t *
+vn_get(
+ struct vnode *vp,
+ vmap_t *vmap)
+{
+ struct inode *inode;
+
+ XFS_STATS_INC(vn_get);
+ inode = LINVFS_GET_IP(vp);
+ if (inode->i_state & I_FREEING)
+ return NULL;
+
+ inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
+ if (!inode) /* Inode not present */
+ return NULL;
+
+ vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
+
+ return vp;
+}
+
+/*
+ * Revalidate the Linux inode from the vattr.
+ * Note: i_size _not_ updated; we must hold the inode
+ * semaphore when doing that - callers responsibility.
+ */
+void
+vn_revalidate_core(
+ struct vnode *vp,
+ vattr_t *vap)
+{
+ struct inode *inode = LINVFS_GET_IP(vp);
+
+ inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode;
+ inode->i_nlink = vap->va_nlink;
+ inode->i_uid = vap->va_uid;
+ inode->i_gid = vap->va_gid;
+ inode->i_blocks = vap->va_nblocks;
+ inode->i_mtime = vap->va_mtime;
+ inode->i_ctime = vap->va_ctime;
+ inode->i_atime = vap->va_atime;
+ if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= ~S_IMMUTABLE;
+ if (vap->va_xflags & XFS_XFLAG_APPEND)
+ inode->i_flags |= S_APPEND;
+ else
+ inode->i_flags &= ~S_APPEND;
+ if (vap->va_xflags & XFS_XFLAG_SYNC)
+ inode->i_flags |= S_SYNC;
+ else
+ inode->i_flags &= ~S_SYNC;
+ if (vap->va_xflags & XFS_XFLAG_NOATIME)
+ inode->i_flags |= S_NOATIME;
+ else
+ inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Revalidate the Linux inode from the vnode.
+ */
+int
+vn_revalidate(
+ struct vnode *vp)
+{
+ vattr_t va;
+ int error;
+
+ vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address);
+ ASSERT(vp->v_fbhv != NULL);
+
+ va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS;
+ VOP_GETATTR(vp, &va, 0, NULL, error);
+ if (!error) {
+ vn_revalidate_core(vp, &va);
+ VUNMODIFY(vp);
+ }
+ return -error;
+}
+
+/*
+ * purge a vnode from the cache
+ * At this point the vnode is guaranteed to have no references (vn_count == 0)
+ * The caller has to make sure that there are no ways someone could
+ * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
+ */
+void
+vn_purge(
+ struct vnode *vp,
+ vmap_t *vmap)
+{
+ vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
+
+again:
+ /*
+ * Check whether vp has already been reclaimed since our caller
+ * sampled its version while holding a filesystem cache lock that
+ * its VOP_RECLAIM function acquires.
+ */
+ VN_LOCK(vp);
+ if (vp->v_number != vmap->v_number) {
+ VN_UNLOCK(vp, 0);
+ return;
+ }
+
+ /*
+ * If vp is being reclaimed or inactivated, wait until it is inert,
+ * then proceed. Can't assume that vnode is actually reclaimed
+ * just because the reclaimed flag is asserted -- a vn_alloc
+ * reclaim can fail.
+ */
+ if (vp->v_flag & (VINACT | VRECLM)) {
+ ASSERT(vn_count(vp) == 0);
+ vp->v_flag |= VWAIT;
+ sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
+ goto again;
+ }
+
+ /*
+ * Another process could have raced in and gotten this vnode...
+ */
+ if (vn_count(vp) > 0) {
+ VN_UNLOCK(vp, 0);
+ return;
+ }
+
+ XFS_STATS_DEC(vn_active);
+ vp->v_flag |= VRECLM;
+ VN_UNLOCK(vp, 0);
+
+ /*
+ * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
+ * vp's filesystem to flush and invalidate all cached resources.
+ * When vn_reclaim returns, vp should have no private data,
+ * either in a system cache or attached to v_data.
+ */
+ if (vn_reclaim(vp) != 0)
+ panic("vn_purge: cannot reclaim");
+
+ /*
+ * Wakeup anyone waiting for vp to be reclaimed.
+ */
+ vn_wakeup(vp);
+}
+
+/*
+ * Add a reference to a referenced vnode.
+ */
+struct vnode *
+vn_hold(
+ struct vnode *vp)
+{
+ struct inode *inode;
+
+ XFS_STATS_INC(vn_hold);
+
+ VN_LOCK(vp);
+ inode = igrab(LINVFS_GET_IP(vp));
+ ASSERT(inode);
+ VN_UNLOCK(vp, 0);
+
+ return vp;
+}
+
+/*
+ * Call VOP_INACTIVE on last reference.
+ */
+void
+vn_rele(
+ struct vnode *vp)
+{
+ int vcnt;
+ int cache;
+
+ XFS_STATS_INC(vn_rele);
+
+ VN_LOCK(vp);
+
+ vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
+ vcnt = vn_count(vp);
+
+ /*
+ * Since we always get called from put_inode we know
+ * that i_count won't be decremented after we
+ * return.
+ */
+ if (!vcnt) {
+ /*
+ * As soon as we turn this on, noone can find us in vn_get
+ * until we turn off VINACT or VRECLM
+ */
+ vp->v_flag |= VINACT;
+ VN_UNLOCK(vp, 0);
+
+ /*
+ * Do not make the VOP_INACTIVE call if there
+ * are no behaviors attached to the vnode to call.
+ */
+ if (vp->v_fbhv)
+ VOP_INACTIVE(vp, NULL, cache);
+
+ VN_LOCK(vp);
+ if (vp->v_flag & VWAIT)
+ sv_broadcast(vptosync(vp));
+
+ vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
+ }
+
+ VN_UNLOCK(vp, 0);
+
+ vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
+}
+
+/*
+ * Finish the removal of a vnode.
+ */
+void
+vn_remove(
+ struct vnode *vp)
+{
+ vmap_t vmap;
+
+ /* Make sure we don't do this to the same vnode twice */
+ if (!(vp->v_fbhv))
+ return;
+
+ XFS_STATS_INC(vn_remove);
+ vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
+
+ /*
+ * After the following purge the vnode
+ * will no longer exist.
+ */
+ VMAP(vp, vmap);
+ vn_purge(vp, &vmap);
+}
+
+
+#ifdef XFS_VNODE_TRACE
+
+#define KTRACE_ENTER(vp, vk, s, line, ra) \
+ ktrace_enter( (vp)->v_trace, \
+/* 0 */ (void *)(__psint_t)(vk), \
+/* 1 */ (void *)(s), \
+/* 2 */ (void *)(__psint_t) line, \
+/* 3 */ (void *)(vn_count(vp)), \
+/* 4 */ (void *)(ra), \
+/* 5 */ (void *)(__psunsigned_t)(vp)->v_flag, \
+/* 6 */ (void *)(__psint_t)current_cpu(), \
+/* 7 */ (void *)(__psint_t)current_pid(), \
+/* 8 */ (void *)__return_address, \
+/* 9 */ 0, 0, 0, 0, 0, 0, 0)
+
+/*
+ * Vnode tracing code.
+ */
+void
+vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
+{
+ KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
+}
+
+void
+vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
+{
+ KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
+}
+
+void
+vn_trace_hold(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+ KTRACE_ENTER(vp, VNODE_KTRACE_HOLD, file, line, ra);
+}
+
+void
+vn_trace_ref(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+ KTRACE_ENTER(vp, VNODE_KTRACE_REF, file, line, ra);
+}
+
+void
+vn_trace_rele(vnode_t *vp, char *file, int line, inst_t *ra)
+{
+ KTRACE_ENTER(vp, VNODE_KTRACE_RELE, file, line, ra);
+}
+#endif /* XFS_VNODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
new file mode 100644
index 00000000000..da76c1f1e11
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -0,0 +1,666 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ * Portions Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+struct uio;
+struct file;
+struct vattr;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Vnode types. VNON means no type.
+ */
+enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
+
+typedef xfs_ino_t vnumber_t;
+typedef struct dentry vname_t;
+typedef bhv_head_t vn_bhv_head_t;
+
+/*
+ * MP locking protocols:
+ * v_flag, v_vfsp VN_LOCK/VN_UNLOCK
+ * v_type read-only or fs-dependent
+ */
+typedef struct vnode {
+ __u32 v_flag; /* vnode flags (see below) */
+ enum vtype v_type; /* vnode type */
+ struct vfs *v_vfsp; /* ptr to containing VFS */
+ vnumber_t v_number; /* in-core vnode number */
+ vn_bhv_head_t v_bh; /* behavior head */
+ spinlock_t v_lock; /* VN_LOCK/VN_UNLOCK */
+ struct inode v_inode; /* Linux inode */
+#ifdef XFS_VNODE_TRACE
+ struct ktrace *v_trace; /* trace header structure */
+#endif
+} vnode_t;
+
+#define v_fbhv v_bh.bh_first /* first behavior */
+#define v_fops v_bh.bh_first->bd_ops /* first behavior ops */
+
+#define VNODE_POSITION_BASE BHV_POSITION_BASE /* chain bottom */
+#define VNODE_POSITION_TOP BHV_POSITION_TOP /* chain top */
+#define VNODE_POSITION_INVALID BHV_POSITION_INVALID /* invalid pos. num */
+
+typedef enum {
+ VN_BHV_UNKNOWN, /* not specified */
+ VN_BHV_XFS, /* xfs */
+ VN_BHV_DM, /* data migration */
+ VN_BHV_QM, /* quota manager */
+ VN_BHV_IO, /* IO path */
+ VN_BHV_END /* housekeeping end-of-range */
+} vn_bhv_t;
+
+#define VNODE_POSITION_XFS (VNODE_POSITION_BASE)
+#define VNODE_POSITION_DM (VNODE_POSITION_BASE+10)
+#define VNODE_POSITION_QM (VNODE_POSITION_BASE+20)
+#define VNODE_POSITION_IO (VNODE_POSITION_BASE+30)
+
+/*
+ * Macros for dealing with the behavior descriptor inside of the vnode.
+ */
+#define BHV_TO_VNODE(bdp) ((vnode_t *)BHV_VOBJ(bdp))
+#define BHV_TO_VNODE_NULL(bdp) ((vnode_t *)BHV_VOBJNULL(bdp))
+
+#define VN_BHV_HEAD(vp) ((bhv_head_t *)(&((vp)->v_bh)))
+#define vn_bhv_head_init(bhp,name) bhv_head_init(bhp,name)
+#define vn_bhv_remove(bhp,bdp) bhv_remove(bhp,bdp)
+#define vn_bhv_lookup(bhp,ops) bhv_lookup(bhp,ops)
+#define vn_bhv_lookup_unlocked(bhp,ops) bhv_lookup_unlocked(bhp,ops)
+
+/*
+ * Vnode to Linux inode mapping.
+ */
+#define LINVFS_GET_VP(inode) ((vnode_t *)list_entry(inode, vnode_t, v_inode))
+#define LINVFS_GET_IP(vp) (&(vp)->v_inode)
+
+/*
+ * Convert between vnode types and inode formats (since POSIX.1
+ * defines mode word of stat structure in terms of inode formats).
+ */
+extern enum vtype iftovt_tab[];
+extern u_short vttoif_tab[];
+#define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12])
+#define VTTOIF(indx) (vttoif_tab[(int)(indx)])
+#define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode))
+
+
+/*
+ * Vnode flags.
+ */
+#define VINACT 0x1 /* vnode is being inactivated */
+#define VRECLM 0x2 /* vnode is being reclaimed */
+#define VWAIT 0x4 /* waiting for VINACT/VRECLM to end */
+#define VMODIFIED 0x8 /* XFS inode state possibly differs */
+ /* to the Linux inode state. */
+
+/*
+ * Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
+ */
+typedef enum vrwlock {
+ VRWLOCK_NONE,
+ VRWLOCK_READ,
+ VRWLOCK_WRITE,
+ VRWLOCK_WRITE_DIRECT,
+ VRWLOCK_TRY_READ,
+ VRWLOCK_TRY_WRITE
+} vrwlock_t;
+
+/*
+ * Return values for VOP_INACTIVE. A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define VN_INACTIVE_CACHE 0
+#define VN_INACTIVE_NOCACHE 1
+
+/*
+ * Values for the cmd code given to VOP_VNODE_CHANGE.
+ */
+typedef enum vchange {
+ VCHANGE_FLAGS_FRLOCKS = 0,
+ VCHANGE_FLAGS_ENF_LOCKING = 1,
+ VCHANGE_FLAGS_TRUNCATED = 2,
+ VCHANGE_FLAGS_PAGE_DIRTY = 3,
+ VCHANGE_FLAGS_IOEXCL_COUNT = 4
+} vchange_t;
+
+
+typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
+typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
+ const struct iovec *, unsigned int,
+ loff_t *, int, struct cred *);
+typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
+ const struct iovec *, unsigned int,
+ loff_t *, int, struct cred *);
+typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
+ loff_t *, int, size_t, read_actor_t,
+ void *, struct cred *);
+typedef int (*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
+ int, unsigned int, void __user *);
+typedef int (*vop_getattr_t)(bhv_desc_t *, struct vattr *, int,
+ struct cred *);
+typedef int (*vop_setattr_t)(bhv_desc_t *, struct vattr *, int,
+ struct cred *);
+typedef int (*vop_access_t)(bhv_desc_t *, int, struct cred *);
+typedef int (*vop_lookup_t)(bhv_desc_t *, vname_t *, vnode_t **,
+ int, vnode_t *, struct cred *);
+typedef int (*vop_create_t)(bhv_desc_t *, vname_t *, struct vattr *,
+ vnode_t **, struct cred *);
+typedef int (*vop_remove_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int (*vop_link_t)(bhv_desc_t *, vnode_t *, vname_t *,
+ struct cred *);
+typedef int (*vop_rename_t)(bhv_desc_t *, vname_t *, vnode_t *, vname_t *,
+ struct cred *);
+typedef int (*vop_mkdir_t)(bhv_desc_t *, vname_t *, struct vattr *,
+ vnode_t **, struct cred *);
+typedef int (*vop_rmdir_t)(bhv_desc_t *, vname_t *, struct cred *);
+typedef int (*vop_readdir_t)(bhv_desc_t *, struct uio *, struct cred *,
+ int *);
+typedef int (*vop_symlink_t)(bhv_desc_t *, vname_t *, struct vattr *,
+ char *, vnode_t **, struct cred *);
+typedef int (*vop_readlink_t)(bhv_desc_t *, struct uio *, int,
+ struct cred *);
+typedef int (*vop_fsync_t)(bhv_desc_t *, int, struct cred *,
+ xfs_off_t, xfs_off_t);
+typedef int (*vop_inactive_t)(bhv_desc_t *, struct cred *);
+typedef int (*vop_fid2_t)(bhv_desc_t *, struct fid *);
+typedef int (*vop_release_t)(bhv_desc_t *);
+typedef int (*vop_rwlock_t)(bhv_desc_t *, vrwlock_t);
+typedef void (*vop_rwunlock_t)(bhv_desc_t *, vrwlock_t);
+typedef int (*vop_bmap_t)(bhv_desc_t *, xfs_off_t, ssize_t, int,
+ struct xfs_iomap *, int *);
+typedef int (*vop_reclaim_t)(bhv_desc_t *);
+typedef int (*vop_attr_get_t)(bhv_desc_t *, char *, char *, int *, int,
+ struct cred *);
+typedef int (*vop_attr_set_t)(bhv_desc_t *, char *, char *, int, int,
+ struct cred *);
+typedef int (*vop_attr_remove_t)(bhv_desc_t *, char *, int, struct cred *);
+typedef int (*vop_attr_list_t)(bhv_desc_t *, char *, int, int,
+ struct attrlist_cursor_kern *, struct cred *);
+typedef void (*vop_link_removed_t)(bhv_desc_t *, vnode_t *, int);
+typedef void (*vop_vnode_change_t)(bhv_desc_t *, vchange_t, __psint_t);
+typedef void (*vop_ptossvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef void (*vop_pflushinvalvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t, int);
+typedef int (*vop_pflushvp_t)(bhv_desc_t *, xfs_off_t, xfs_off_t,
+ uint64_t, int);
+typedef int (*vop_iflush_t)(bhv_desc_t *, int);
+
+
+typedef struct vnodeops {
+ bhv_position_t vn_position; /* position within behavior chain */
+ vop_open_t vop_open;
+ vop_read_t vop_read;
+ vop_write_t vop_write;
+ vop_sendfile_t vop_sendfile;
+ vop_ioctl_t vop_ioctl;
+ vop_getattr_t vop_getattr;
+ vop_setattr_t vop_setattr;
+ vop_access_t vop_access;
+ vop_lookup_t vop_lookup;
+ vop_create_t vop_create;
+ vop_remove_t vop_remove;
+ vop_link_t vop_link;
+ vop_rename_t vop_rename;
+ vop_mkdir_t vop_mkdir;
+ vop_rmdir_t vop_rmdir;
+ vop_readdir_t vop_readdir;
+ vop_symlink_t vop_symlink;
+ vop_readlink_t vop_readlink;
+ vop_fsync_t vop_fsync;
+ vop_inactive_t vop_inactive;
+ vop_fid2_t vop_fid2;
+ vop_rwlock_t vop_rwlock;
+ vop_rwunlock_t vop_rwunlock;
+ vop_bmap_t vop_bmap;
+ vop_reclaim_t vop_reclaim;
+ vop_attr_get_t vop_attr_get;
+ vop_attr_set_t vop_attr_set;
+ vop_attr_remove_t vop_attr_remove;
+ vop_attr_list_t vop_attr_list;
+ vop_link_removed_t vop_link_removed;
+ vop_vnode_change_t vop_vnode_change;
+ vop_ptossvp_t vop_tosspages;
+ vop_pflushinvalvp_t vop_flushinval_pages;
+ vop_pflushvp_t vop_flush_pages;
+ vop_release_t vop_release;
+ vop_iflush_t vop_iflush;
+} vnodeops_t;
+
+/*
+ * VOP's.
+ */
+#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op)
+
+#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \
+ rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \
+ rv = _VOP_(vop_write, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
+#define VOP_SENDFILE(vp,f,off,ioflags,cnt,act,targ,cr,rv) \
+ rv = _VOP_(vop_sendfile, vp)((vp)->v_fbhv,f,off,ioflags,cnt,act,targ,cr)
+#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
+ rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
+#define VOP_OPEN(vp, cr, rv) \
+ rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
+#define VOP_GETATTR(vp, vap, f, cr, rv) \
+ rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define VOP_SETATTR(vp, vap, f, cr, rv) \
+ rv = _VOP_(vop_setattr, vp)((vp)->v_fbhv, vap, f, cr)
+#define VOP_ACCESS(vp, mode, cr, rv) \
+ rv = _VOP_(vop_access, vp)((vp)->v_fbhv, mode, cr)
+#define VOP_LOOKUP(vp,d,vpp,f,rdir,cr,rv) \
+ rv = _VOP_(vop_lookup, vp)((vp)->v_fbhv,d,vpp,f,rdir,cr)
+#define VOP_CREATE(dvp,d,vap,vpp,cr,rv) \
+ rv = _VOP_(vop_create, dvp)((dvp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_REMOVE(dvp,d,cr,rv) \
+ rv = _VOP_(vop_remove, dvp)((dvp)->v_fbhv,d,cr)
+#define VOP_LINK(tdvp,fvp,d,cr,rv) \
+ rv = _VOP_(vop_link, tdvp)((tdvp)->v_fbhv,fvp,d,cr)
+#define VOP_RENAME(fvp,fnm,tdvp,tnm,cr,rv) \
+ rv = _VOP_(vop_rename, fvp)((fvp)->v_fbhv,fnm,tdvp,tnm,cr)
+#define VOP_MKDIR(dp,d,vap,vpp,cr,rv) \
+ rv = _VOP_(vop_mkdir, dp)((dp)->v_fbhv,d,vap,vpp,cr)
+#define VOP_RMDIR(dp,d,cr,rv) \
+ rv = _VOP_(vop_rmdir, dp)((dp)->v_fbhv,d,cr)
+#define VOP_READDIR(vp,uiop,cr,eofp,rv) \
+ rv = _VOP_(vop_readdir, vp)((vp)->v_fbhv,uiop,cr,eofp)
+#define VOP_SYMLINK(dvp,d,vap,tnm,vpp,cr,rv) \
+ rv = _VOP_(vop_symlink, dvp) ((dvp)->v_fbhv,d,vap,tnm,vpp,cr)
+#define VOP_READLINK(vp,uiop,fl,cr,rv) \
+ rv = _VOP_(vop_readlink, vp)((vp)->v_fbhv,uiop,fl,cr)
+#define VOP_FSYNC(vp,f,cr,b,e,rv) \
+ rv = _VOP_(vop_fsync, vp)((vp)->v_fbhv,f,cr,b,e)
+#define VOP_INACTIVE(vp, cr, rv) \
+ rv = _VOP_(vop_inactive, vp)((vp)->v_fbhv, cr)
+#define VOP_RELEASE(vp, rv) \
+ rv = _VOP_(vop_release, vp)((vp)->v_fbhv)
+#define VOP_FID2(vp, fidp, rv) \
+ rv = _VOP_(vop_fid2, vp)((vp)->v_fbhv, fidp)
+#define VOP_RWLOCK(vp,i) \
+ (void)_VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWLOCK_TRY(vp,i) \
+ _VOP_(vop_rwlock, vp)((vp)->v_fbhv, i)
+#define VOP_RWUNLOCK(vp,i) \
+ (void)_VOP_(vop_rwunlock, vp)((vp)->v_fbhv, i)
+#define VOP_FRLOCK(vp,c,fl,flags,offset,fr,rv) \
+ rv = _VOP_(vop_frlock, vp)((vp)->v_fbhv,c,fl,flags,offset,fr)
+#define VOP_RECLAIM(vp, rv) \
+ rv = _VOP_(vop_reclaim, vp)((vp)->v_fbhv)
+#define VOP_ATTR_GET(vp, name, val, vallenp, fl, cred, rv) \
+ rv = _VOP_(vop_attr_get, vp)((vp)->v_fbhv,name,val,vallenp,fl,cred)
+#define VOP_ATTR_SET(vp, name, val, vallen, fl, cred, rv) \
+ rv = _VOP_(vop_attr_set, vp)((vp)->v_fbhv,name,val,vallen,fl,cred)
+#define VOP_ATTR_REMOVE(vp, name, flags, cred, rv) \
+ rv = _VOP_(vop_attr_remove, vp)((vp)->v_fbhv,name,flags,cred)
+#define VOP_ATTR_LIST(vp, buf, buflen, fl, cursor, cred, rv) \
+ rv = _VOP_(vop_attr_list, vp)((vp)->v_fbhv,buf,buflen,fl,cursor,cred)
+#define VOP_LINK_REMOVED(vp, dvp, linkzero) \
+ (void)_VOP_(vop_link_removed, vp)((vp)->v_fbhv, dvp, linkzero)
+#define VOP_VNODE_CHANGE(vp, cmd, val) \
+ (void)_VOP_(vop_vnode_change, vp)((vp)->v_fbhv,cmd,val)
+/*
+ * These are page cache functions that now go thru VOPs.
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_TOSS_PAGES(vp, first, last, fiopt) \
+ _VOP_(vop_tosspages, vp)((vp)->v_fbhv,first, last, fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSHINVAL_PAGES(vp, first, last, fiopt) \
+ _VOP_(vop_flushinval_pages, vp)((vp)->v_fbhv,first,last,fiopt)
+/*
+ * 'last' parameter is unused and left in for IRIX compatibility
+ */
+#define VOP_FLUSH_PAGES(vp, first, last, flags, fiopt, rv) \
+ rv = _VOP_(vop_flush_pages, vp)((vp)->v_fbhv,first,last,flags,fiopt)
+#define VOP_IOCTL(vp, inode, filp, fl, cmd, arg, rv) \
+ rv = _VOP_(vop_ioctl, vp)((vp)->v_fbhv,inode,filp,fl,cmd,arg)
+#define VOP_IFLUSH(vp, flags, rv) \
+ rv = _VOP_(vop_iflush, vp)((vp)->v_fbhv, flags)
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISAIO 0x00001 /* don't wait for completion */
+#define IO_ISDIRECT 0x00004 /* bypass page cache */
+#define IO_INVIS 0x00020 /* don't update inode timestamps */
+
+/*
+ * Flags for VOP_IFLUSH call
+ */
+#define FLUSH_SYNC 1 /* wait for flush to complete */
+#define FLUSH_INODE 2 /* flush the inode itself */
+#define FLUSH_LOG 4 /* force the last log entry for
+ * this inode out to disk */
+
+/*
+ * Flush/Invalidate options for VOP_TOSS_PAGES, VOP_FLUSHINVAL_PAGES and
+ * VOP_FLUSH_PAGES.
+ */
+#define FI_NONE 0 /* none */
+#define FI_REMAPF 1 /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation.
+ Prevent VM access to the pages until
+ the operation completes. */
+
+/*
+ * Vnode attributes. va_mask indicates those attributes the caller
+ * wants to set or extract.
+ */
+typedef struct vattr {
+ int va_mask; /* bit-mask of attributes present */
+ enum vtype va_type; /* vnode type (for create) */
+ mode_t va_mode; /* file access mode and type */
+ nlink_t va_nlink; /* number of references to file */
+ uid_t va_uid; /* owner user id */
+ gid_t va_gid; /* owner group id */
+ xfs_ino_t va_nodeid; /* file id */
+ xfs_off_t va_size; /* file size in bytes */
+ u_long va_blocksize; /* blocksize preferred for i/o */
+ struct timespec va_atime; /* time of last access */
+ struct timespec va_mtime; /* time of last modification */
+ struct timespec va_ctime; /* time file changed */
+ u_int va_gen; /* generation number of file */
+ xfs_dev_t va_rdev; /* device the special file represents */
+ __int64_t va_nblocks; /* number of blocks allocated */
+ u_long va_xflags; /* random extended file flags */
+ u_long va_extsize; /* file extent size */
+ u_long va_nextents; /* number of extents in file */
+ u_long va_anextents; /* number of attr extents in file */
+ int va_projid; /* project id */
+} vattr_t;
+
+/*
+ * setattr or getattr attributes
+ */
+#define XFS_AT_TYPE 0x00000001
+#define XFS_AT_MODE 0x00000002
+#define XFS_AT_UID 0x00000004
+#define XFS_AT_GID 0x00000008
+#define XFS_AT_FSID 0x00000010
+#define XFS_AT_NODEID 0x00000020
+#define XFS_AT_NLINK 0x00000040
+#define XFS_AT_SIZE 0x00000080
+#define XFS_AT_ATIME 0x00000100
+#define XFS_AT_MTIME 0x00000200
+#define XFS_AT_CTIME 0x00000400
+#define XFS_AT_RDEV 0x00000800
+#define XFS_AT_BLKSIZE 0x00001000
+#define XFS_AT_NBLOCKS 0x00002000
+#define XFS_AT_VCODE 0x00004000
+#define XFS_AT_MAC 0x00008000
+#define XFS_AT_UPDATIME 0x00010000
+#define XFS_AT_UPDMTIME 0x00020000
+#define XFS_AT_UPDCTIME 0x00040000
+#define XFS_AT_ACL 0x00080000
+#define XFS_AT_CAP 0x00100000
+#define XFS_AT_INF 0x00200000
+#define XFS_AT_XFLAGS 0x00400000
+#define XFS_AT_EXTSIZE 0x00800000
+#define XFS_AT_NEXTENTS 0x01000000
+#define XFS_AT_ANEXTENTS 0x02000000
+#define XFS_AT_PROJID 0x04000000
+#define XFS_AT_SIZE_NOPERM 0x08000000
+#define XFS_AT_GENCOUNT 0x10000000
+
+#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
+ XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
+ XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
+
+#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
+ XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
+ XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
+ XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
+
+#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
+
+#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
+
+#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
+ XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
+ XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
+
+/*
+ * Modes.
+ */
+#define VSUID S_ISUID /* set user id on execution */
+#define VSGID S_ISGID /* set group id on execution */
+#define VSVTX S_ISVTX /* save swapped text even after use */
+#define VREAD S_IRUSR /* read, write, execute permissions */
+#define VWRITE S_IWUSR
+#define VEXEC S_IXUSR
+
+#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
+
+/*
+ * Check whether mandatory file locking is enabled.
+ */
+#define MANDLOCK(vp, mode) \
+ ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+
+extern void vn_init(void);
+extern int vn_wait(struct vnode *);
+extern vnode_t *vn_initialize(struct inode *);
+
+/*
+ * Acquiring and invalidating vnodes:
+ *
+ * if (vn_get(vp, version, 0))
+ * ...;
+ * vn_purge(vp, version);
+ *
+ * vn_get and vn_purge must be called with vmap_t arguments, sampled
+ * while a lock that the vnode's VOP_RECLAIM function acquires is
+ * held, to ensure that the vnode sampled with the lock held isn't
+ * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
+ * and the subsequent vn_get or vn_purge.
+ */
+
+/*
+ * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
+ */
+typedef struct vnode_map {
+ vfs_t *v_vfsp;
+ vnumber_t v_number; /* in-core vnode number */
+ xfs_ino_t v_ino; /* inode # */
+} vmap_t;
+
+#define VMAP(vp, vmap) {(vmap).v_vfsp = (vp)->v_vfsp, \
+ (vmap).v_number = (vp)->v_number, \
+ (vmap).v_ino = (vp)->v_inode.i_ino; }
+
+extern void vn_purge(struct vnode *, vmap_t *);
+extern vnode_t *vn_get(struct vnode *, vmap_t *);
+extern int vn_revalidate(struct vnode *);
+extern void vn_revalidate_core(struct vnode *, vattr_t *);
+extern void vn_remove(struct vnode *);
+
+static inline int vn_count(struct vnode *vp)
+{
+ return atomic_read(&LINVFS_GET_IP(vp)->i_count);
+}
+
+/*
+ * Vnode reference counting functions (and macros for compatibility).
+ */
+extern vnode_t *vn_hold(struct vnode *);
+extern void vn_rele(struct vnode *);
+
+#if defined(XFS_VNODE_TRACE)
+#define VN_HOLD(vp) \
+ ((void)vn_hold(vp), \
+ vn_trace_hold(vp, __FILE__, __LINE__, (inst_t *)__return_address))
+#define VN_RELE(vp) \
+ (vn_trace_rele(vp, __FILE__, __LINE__, (inst_t *)__return_address), \
+ iput(LINVFS_GET_IP(vp)))
+#else
+#define VN_HOLD(vp) ((void)vn_hold(vp))
+#define VN_RELE(vp) (iput(LINVFS_GET_IP(vp)))
+#endif
+
+/*
+ * Vname handling macros.
+ */
+#define VNAME(dentry) ((char *) (dentry)->d_name.name)
+#define VNAMELEN(dentry) ((dentry)->d_name.len)
+#define VNAME_TO_VNODE(dentry) (LINVFS_GET_VP((dentry)->d_inode))
+
+/*
+ * Vnode spinlock manipulation.
+ */
+#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
+#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
+#define VN_FLAGSET(vp,b) vn_flagset(vp,b)
+#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b)
+
+static __inline__ void vn_flagset(struct vnode *vp, uint flag)
+{
+ spin_lock(&vp->v_lock);
+ vp->v_flag |= flag;
+ spin_unlock(&vp->v_lock);
+}
+
+static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
+{
+ spin_lock(&vp->v_lock);
+ vp->v_flag &= ~flag;
+ spin_unlock(&vp->v_lock);
+}
+
+/*
+ * Update modify/access/change times on the vnode
+ */
+#define VN_MTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_mtime = *(tvp))
+#define VN_ATIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_atime = *(tvp))
+#define VN_CTIMESET(vp, tvp) (LINVFS_GET_IP(vp)->i_ctime = *(tvp))
+
+/*
+ * Dealing with bad inodes
+ */
+static inline void vn_mark_bad(struct vnode *vp)
+{
+ make_bad_inode(LINVFS_GET_IP(vp));
+}
+
+static inline int VN_BAD(struct vnode *vp)
+{
+ return is_bad_inode(LINVFS_GET_IP(vp));
+}
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp) mapping_mapped(LINVFS_GET_IP(vp)->i_mapping)
+#define VN_CACHED(vp) (LINVFS_GET_IP(vp)->i_mapping->nrpages)
+#define VN_DIRTY(vp) mapping_tagged(LINVFS_GET_IP(vp)->i_mapping, \
+ PAGECACHE_TAG_DIRTY)
+#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED)
+#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED)
+
+/*
+ * Flags to VOP_SETATTR/VOP_GETATTR.
+ */
+#define ATTR_UTIME 0x01 /* non-default utime(2) request */
+#define ATTR_DMI 0x08 /* invocation from a DMI function */
+#define ATTR_LAZY 0x80 /* set/get attributes lazily */
+#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
+
+/*
+ * Flags to VOP_FSYNC and VOP_RECLAIM.
+ */
+#define FSYNC_NOWAIT 0 /* asynchronous flush */
+#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */
+#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */
+#define FSYNC_DATA 0x4 /* synchronous fsync of data only */
+
+/*
+ * Tracking vnode activity.
+ */
+#if defined(XFS_VNODE_TRACE)
+
+#define VNODE_TRACE_SIZE 16 /* number of trace entries */
+#define VNODE_KTRACE_ENTRY 1
+#define VNODE_KTRACE_EXIT 2
+#define VNODE_KTRACE_HOLD 3
+#define VNODE_KTRACE_REF 4
+#define VNODE_KTRACE_RELE 5
+
+extern void vn_trace_entry(struct vnode *, char *, inst_t *);
+extern void vn_trace_exit(struct vnode *, char *, inst_t *);
+extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
+extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
+
+#define VN_TRACE(vp) \
+ vn_trace_ref(vp, __FILE__, __LINE__, (inst_t *)__return_address)
+#else
+#define vn_trace_entry(a,b,c)
+#define vn_trace_exit(a,b,c)
+#define vn_trace_hold(a,b,c,d)
+#define vn_trace_ref(a,b,c,d)
+#define vn_trace_rele(a,b,c,d)
+#define VN_TRACE(vp)
+#endif
+
+#endif /* __XFS_VNODE_H__ */