From ab2af1f5005069321c5d130f09cce577b03f43ef Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:13 -0700 Subject: [PATCH] files: files struct with RCU Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 15 ++++++++------- kernel/fork.c | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 83beb1e93b1..6d2089a1bce 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files) close_files(files); /* * Free the fd and fdset arrays if we expanded them. + * If the fdtable was embedded, pass files for freeing + * at the end of the RCU grace period. Otherwise, + * you can free files immediately. */ fdt = files_fdtable(files); - if (fdt->fd != &files->fd_array[0]) - free_fd_array(fdt->fd, fdt->max_fds); - if (fdt->max_fdset > __FD_SETSIZE) { - free_fdset(fdt->open_fds, fdt->max_fdset); - free_fdset(fdt->close_on_exec, fdt->max_fdset); - } - kmem_cache_free(files_cachep, files); + if (fdt == &files->fdtab) + fdt->free_files = files; + else + kmem_cache_free(files_cachep, files); + free_fdtable(fdt); } } diff --git a/kernel/fork.c b/kernel/fork.c index ecc694debb5..8149f360288 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) return 0; } -static int count_open_files(struct files_struct *files, int size) +static int count_open_files(struct fdtable *fdt) { + int size = fdt->max_fdset; int i; - struct fdtable *fdt; /* Find the last open fd */ - fdt = files_fdtable(files); for (i = size/(8*sizeof(long)); i > 0; ) { if (fdt->open_fds->fds_bits[--i]) break; @@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); - fdt = files_fdtable(newf); + fdt = &newf->fdtab; fdt->next_fd = 0; fdt->max_fds = NR_OPEN_DEFAULT; fdt->max_fdset = __FD_SETSIZE; fdt->close_on_exec = &newf->close_on_exec_init; fdt->open_fds = &newf->open_fds_init; fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->free_files = NULL; + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); out: return newf; } @@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) old_fdt = files_fdtable(oldf); new_fdt = files_fdtable(newf); size = old_fdt->max_fdset; - open_files = count_open_files(oldf, old_fdt->max_fdset); + open_files = count_open_files(old_fdt); expand = 0; /* @@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) spin_unlock(&newf->file_lock); if (error < 0) goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); } old_fds = old_fdt->fd; @@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) */ FD_CLR(open_files - i, new_fdt->open_fds); } - *new_fds++ = f; + rcu_assign_pointer(*new_fds++, f); } spin_unlock(&oldf->file_lock); -- cgit v1.2.3