summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 13:04:10 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 13:57:55 -0700
commitbadf16621c1f9d1ac753be056fce11b43d6e0be5 (patch)
tree3fdf833fdf2e3d3a439090743539680449ec3428 /fs
parentc0dfb2905126e9e94edebbce8d3e05001301f52d (diff)
[PATCH] files: break up files struct
In order for the RCU to work, the file table array, sets and their sizes must be updated atomically. Instead of ensuring this through too many memory barriers, we put the arrays and their sizes in a separate structure. This patch takes the first step of putting the file table elements in a separate structure fdtable that is embedded withing files_struct. It also changes all the users to refer to the file table using files_fdtable() macro. Subsequent applciation of RCU becomes easier after this. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c8
-rw-r--r--fs/fcntl.c47
-rw-r--r--fs/file.c42
-rw-r--r--fs/locks.c8
-rw-r--r--fs/open.c41
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/select.c12
8 files changed, 104 insertions, 63 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d..14dd03907cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
static inline void flush_old_files(struct files_struct * files)
{
long j = -1;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
j++;
i = j * __NFDBITS;
- if (i >= files->max_fds || i >= files->max_fdset)
+ fdt = files_fdtable(files);
+ if (i >= fdt->max_fds || i >= fdt->max_fdset)
break;
- set = files->close_on_exec->fds_bits[j];
+ set = fdt->close_on_exec->fds_bits[j];
if (!set)
continue;
- files->close_on_exec->fds_bits[j] = 0;
+ fdt->close_on_exec->fds_bits[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc3..bfecc623808 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -24,20 +24,24 @@
void fastcall set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
if (flag)
- FD_SET(fd, files->close_on_exec);
+ FD_SET(fd, fdt->close_on_exec);
else
- FD_CLR(fd, files->close_on_exec);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
}
static inline int get_close_on_exec(unsigned int fd)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
int res;
spin_lock(&files->file_lock);
- res = FD_ISSET(fd, files->close_on_exec);
+ fdt = files_fdtable(files);
+ res = FD_ISSET(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
return res;
}
@@ -54,24 +58,26 @@ static int locate_fd(struct files_struct *files,
unsigned int newfd;
unsigned int start;
int error;
+ struct fdtable *fdt;
error = -EINVAL;
if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
+ fdt = files_fdtable(files);
repeat:
/*
* Someone might have closed fd's in the range
- * orig_start..files->next_fd
+ * orig_start..fdt->next_fd
*/
start = orig_start;
- if (start < files->next_fd)
- start = files->next_fd;
+ if (start < fdt->next_fd)
+ start = fdt->next_fd;
newfd = start;
- if (start < files->max_fdset) {
- newfd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset, start);
+ if (start < fdt->max_fdset) {
+ newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset, start);
}
error = -EMFILE;
@@ -89,8 +95,8 @@ repeat:
if (error)
goto repeat;
- if (start <= files->next_fd)
- files->next_fd = newfd + 1;
+ if (start <= fdt->next_fd)
+ fdt->next_fd = newfd + 1;
error = newfd;
@@ -101,13 +107,16 @@ out:
static int dupfd(struct file *file, unsigned int start)
{
struct files_struct * files = current->files;
+ struct fdtable *fdt;
int fd;
spin_lock(&files->file_lock);
fd = locate_fd(files, file, start);
if (fd >= 0) {
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
+ /* locate_fd() may have expanded fdtable, load the ptr */
+ fdt = files_fdtable(files);
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
fd_install(fd, file);
} else {
@@ -123,6 +132,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
int err = -EBADF;
struct file * file, *tofree;
struct files_struct * files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
if (!(file = fcheck(oldfd)))
@@ -148,13 +158,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
/* Yes. It's a race. In user space. Nothing sane to do */
err = -EBUSY;
- tofree = files->fd[newfd];
- if (!tofree && FD_ISSET(newfd, files->open_fds))
+ fdt = files_fdtable(files);
+ tofree = fdt->fd[newfd];
+ if (!tofree && FD_ISSET(newfd, fdt->open_fds))
goto out_fput;
- files->fd[newfd] = file;
- FD_SET(newfd, files->open_fds);
- FD_CLR(newfd, files->close_on_exec);
+ fdt->fd[newfd] = file;
+ FD_SET(newfd, fdt->open_fds);
+ FD_CLR(newfd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d..f5926ce73f3 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -59,13 +59,15 @@ static int expand_fd_array(struct files_struct *files, int nr)
{
struct file **new_fds;
int error, nfds;
+ struct fdtable *fdt;
error = -EMFILE;
- if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
+ fdt = files_fdtable(files);
+ if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN)
goto out;
- nfds = files->max_fds;
+ nfds = fdt->max_fds;
spin_unlock(&files->file_lock);
/*
@@ -95,13 +97,14 @@ static int expand_fd_array(struct files_struct *files, int nr)
goto out;
/* Copy the existing array and install the new pointer */
+ fdt = files_fdtable(files);
- if (nfds > files->max_fds) {
+ if (nfds > fdt->max_fds) {
struct file **old_fds;
int i;
- old_fds = xchg(&files->fd, new_fds);
- i = xchg(&files->max_fds, nfds);
+ old_fds = xchg(&fdt->fd, new_fds);
+ i = xchg(&fdt->max_fds, nfds);
/* Don't copy/clear the array if we are creating a new
fd array for fork() */
@@ -164,12 +167,14 @@ static int expand_fdset(struct files_struct *files, int nr)
{
fd_set *new_openset = NULL, *new_execset = NULL;
int error, nfds = 0;
+ struct fdtable *fdt;
error = -EMFILE;
- if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
+ fdt = files_fdtable(files);
+ if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN)
goto out;
- nfds = files->max_fdset;
+ nfds = fdt->max_fdset;
spin_unlock(&files->file_lock);
/* Expand to the max in easy steps */
@@ -193,24 +198,25 @@ static int expand_fdset(struct files_struct *files, int nr)
error = 0;
/* Copy the existing tables and install the new pointers */
- if (nfds > files->max_fdset) {
- int i = files->max_fdset / (sizeof(unsigned long) * 8);
- int count = (nfds - files->max_fdset) / 8;
+ fdt = files_fdtable(files);
+ if (nfds > fdt->max_fdset) {
+ int i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+ int count = (nfds - fdt->max_fdset) / 8;
/*
* Don't copy the entire array if the current fdset is
* not yet initialised.
*/
if (i) {
- memcpy (new_openset, files->open_fds, files->max_fdset/8);
- memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
+ memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8);
+ memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8);
memset (&new_openset->fds_bits[i], 0, count);
memset (&new_execset->fds_bits[i], 0, count);
}
- nfds = xchg(&files->max_fdset, nfds);
- new_openset = xchg(&files->open_fds, new_openset);
- new_execset = xchg(&files->close_on_exec, new_execset);
+ nfds = xchg(&fdt->max_fdset, nfds);
+ new_openset = xchg(&fdt->open_fds, new_openset);
+ new_execset = xchg(&fdt->close_on_exec, new_execset);
spin_unlock(&files->file_lock);
free_fdset (new_openset, nfds);
free_fdset (new_execset, nfds);
@@ -237,13 +243,15 @@ out:
int expand_files(struct files_struct *files, int nr)
{
int err, expand = 0;
+ struct fdtable *fdt;
- if (nr >= files->max_fdset) {
+ fdt = files_fdtable(files);
+ if (nr >= fdt->max_fdset) {
expand = 1;
if ((err = expand_fdset(files, nr)))
goto out;
}
- if (nr >= files->max_fds) {
+ if (nr >= fdt->max_fds) {
expand = 1;
if ((err = expand_fd_array(files, nr)))
goto out;
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179f..c2c09b4798d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
{
struct files_struct *files = current->files;
int i, j;
+ struct fdtable *fdt;
if (from == files)
return;
lock_kernel();
j = 0;
+ fdt = files_fdtable(files);
for (;;) {
unsigned long set;
i = j * __NFDBITS;
- if (i >= files->max_fdset || i >= files->max_fds)
+ if (i >= fdt->max_fdset || i >= fdt->max_fds)
break;
- set = files->open_fds->fds_bits[j++];
+ set = fdt->open_fds->fds_bits[j++];
while (set) {
if (set & 1) {
- struct file *file = files->fd[i];
+ struct file *file = fdt->fd[i];
if (file)
__steal_locks(file, from);
}
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c2..b6542516a0c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -842,14 +842,16 @@ int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
+ struct fdtable *fdt;
error = -EMFILE;
spin_lock(&files->file_lock);
repeat:
- fd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset,
- files->next_fd);
+ fdt = files_fdtable(files);
+ fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset,
+ fdt->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +874,14 @@ repeat:
goto repeat;
}
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
- files->next_fd = fd + 1;
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
+ fdt->next_fd = fd + 1;
#if 1
/* Sanity check */
- if (files->fd[fd] != NULL) {
+ if (fdt->fd[fd] != NULL) {
printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
- files->fd[fd] = NULL;
+ fdt->fd[fd] = NULL;
}
#endif
error = fd;
@@ -893,9 +895,10 @@ EXPORT_SYMBOL(get_unused_fd);
static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
- __FD_CLR(fd, files->open_fds);
- if (fd < files->next_fd)
- files->next_fd = fd;
+ struct fdtable *fdt = files_fdtable(files);
+ __FD_CLR(fd, fdt->open_fds);
+ if (fd < fdt->next_fd)
+ fdt->next_fd = fd;
}
void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +927,12 @@ EXPORT_SYMBOL(put_unused_fd);
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (unlikely(files->fd[fd] != NULL))
+ fdt = files_fdtable(files);
+ if (unlikely(fdt->fd[fd] != NULL))
BUG();
- files->fd[fd] = file;
+ fdt->fd[fd] = file;
spin_unlock(&files->file_lock);
}
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (fd >= files->max_fds)
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
goto out_unlock;
- filp = files->fd[fd];
+ filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
- files->fd[fd] = NULL;
- FD_CLR(fd, files->close_on_exec);
+ fdt->fd[fd] = NULL;
+ FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(filp, files);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998a..d88d518d30f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
{
struct group_info *group_info;
int g;
+ struct fdtable *fdt = NULL;
read_lock(&tasklist_lock);
buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
p->gid, p->egid, p->sgid, p->fsgid);
read_unlock(&tasklist_lock);
task_lock(p);
+ if (p->files)
+ fdt = files_fdtable(p->files);
buffer += sprintf(buffer,
"FDSize:\t%d\n"
"Groups:\t",
- p->files ? p->files->max_fds : 0);
+ fdt ? fdt->max_fds : 0);
group_info = p->group_info;
get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d..d0087a0b024 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1039,6 +1039,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
int retval;
char buf[NUMBUF];
struct files_struct * files;
+ struct fdtable *fdt;
retval = -ENOENT;
if (!pid_alive(p))
@@ -1062,8 +1063,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
if (!files)
goto out;
spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
for (fd = filp->f_pos-2;
- fd < files->max_fds;
+ fd < fdt->max_fds;
fd++, filp->f_pos++) {
unsigned int i,j;
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0..2e56325c73c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -132,11 +132,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
unsigned long *open_fds;
unsigned long set;
int max;
+ struct fdtable *fdt;
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
- open_fds = current->files->open_fds->fds_bits+n;
+ fdt = files_fdtable(current->files);
+ open_fds = fdt->open_fds->fds_bits+n;
max = 0;
if (set) {
set &= BITS(fds, n);
@@ -299,6 +301,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
char *bits;
long timeout;
int ret, size, max_fdset;
+ struct fdtable *fdt;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -326,7 +329,8 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
- max_fdset = current->files->max_fdset;
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
if (n > max_fdset)
n = max_fdset;
@@ -464,9 +468,11 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
unsigned int i;
struct poll_list *head;
struct poll_list *walk;
+ struct fdtable *fdt;
/* Do a sanity check on nfds ... */
- if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
+ fdt = files_fdtable(current->files);
+ if (nfds > fdt->max_fdset && nfds > OPEN_MAX)
return -EINVAL;
if (timeout) {