diff options
Diffstat (limited to 'fs/aio.c')
| -rw-r--r-- | fs/aio.c | 119 | 
1 files changed, 49 insertions, 70 deletions
| @@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx)  	info->nr = nr_events;		/* trusted copy */ -	ring = kmap_atomic(info->ring_pages[0], KM_USER0); +	ring = kmap_atomic(info->ring_pages[0]);  	ring->nr = nr_events;	/* user copy */  	ring->id = ctx->user_id;  	ring->head = ring->tail = 0; @@ -168,47 +168,38 @@ static int aio_setup_ring(struct kioctx *ctx)  	ring->compat_features = AIO_RING_COMPAT_FEATURES;  	ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;  	ring->header_length = sizeof(struct aio_ring); -	kunmap_atomic(ring, KM_USER0); +	kunmap_atomic(ring);  	return 0;  }  /* aio_ring_event: returns a pointer to the event at the given index from - * kmap_atomic(, km).  Release the pointer with put_aio_ring_event(); + * kmap_atomic().  Release the pointer with put_aio_ring_event();   */  #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE / sizeof(struct io_event))  #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))  #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -#define aio_ring_event(info, nr, km) ({					\ +#define aio_ring_event(info, nr) ({					\  	unsigned pos = (nr) + AIO_EVENTS_OFFSET;			\  	struct io_event *__event;					\  	__event = kmap_atomic(						\ -			(info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ +			(info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \  	__event += pos % AIO_EVENTS_PER_PAGE;				\  	__event;							\  }) -#define put_aio_ring_event(event, km) do {	\ +#define put_aio_ring_event(event) do {		\  	struct io_event *__event = (event);	\  	(void)__event;				\ -	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ +	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \  } while(0)  static void ctx_rcu_free(struct rcu_head *head)  {  	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); -	unsigned nr_events = ctx->max_reqs; -  	kmem_cache_free(kioctx_cachep, ctx); - -	if (nr_events) { -		spin_lock(&aio_nr_lock); -		BUG_ON(aio_nr - nr_events > aio_nr); -		aio_nr -= nr_events; -		spin_unlock(&aio_nr_lock); -	}  }  /* __put_ioctx @@ -217,23 +208,23 @@ static void ctx_rcu_free(struct rcu_head *head)   */  static void __put_ioctx(struct kioctx *ctx)  { +	unsigned nr_events = ctx->max_reqs;  	BUG_ON(ctx->reqs_active); -	cancel_delayed_work(&ctx->wq); -	cancel_work_sync(&ctx->wq.work); +	cancel_delayed_work_sync(&ctx->wq);  	aio_free_ring(ctx);  	mmdrop(ctx->mm);  	ctx->mm = NULL; +	if (nr_events) { +		spin_lock(&aio_nr_lock); +		BUG_ON(aio_nr - nr_events > aio_nr); +		aio_nr -= nr_events; +		spin_unlock(&aio_nr_lock); +	}  	pr_debug("__put_ioctx: freeing %p\n", ctx);  	call_rcu(&ctx->rcu_head, ctx_rcu_free);  } -static inline void get_ioctx(struct kioctx *kioctx) -{ -	BUG_ON(atomic_read(&kioctx->users) <= 0); -	atomic_inc(&kioctx->users); -} -  static inline int try_get_ioctx(struct kioctx *kioctx)  {  	return atomic_inc_not_zero(&kioctx->users); @@ -253,7 +244,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)  {  	struct mm_struct *mm;  	struct kioctx *ctx; -	int did_sync = 0; +	int err = -ENOMEM;  	/* Prevent overflows */  	if ((nr_events > (0x10000000U / sizeof(struct io_event))) || @@ -262,7 +253,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)  		return ERR_PTR(-EINVAL);  	} -	if ((unsigned long)nr_events > aio_max_nr) +	if (!nr_events || (unsigned long)nr_events > aio_max_nr)  		return ERR_PTR(-EAGAIN);  	ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); @@ -273,7 +264,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)  	mm = ctx->mm = current->mm;  	atomic_inc(&mm->mm_count); -	atomic_set(&ctx->users, 1); +	atomic_set(&ctx->users, 2);  	spin_lock_init(&ctx->ctx_lock);  	spin_lock_init(&ctx->ring_info.ring_lock);  	init_waitqueue_head(&ctx->wait); @@ -286,25 +277,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)  		goto out_freectx;  	/* limit the number of system wide aios */ -	do { -		spin_lock_bh(&aio_nr_lock); -		if (aio_nr + nr_events > aio_max_nr || -		    aio_nr + nr_events < aio_nr) -			ctx->max_reqs = 0; -		else -			aio_nr += ctx->max_reqs; -		spin_unlock_bh(&aio_nr_lock); -		if (ctx->max_reqs || did_sync) -			break; - -		/* wait for rcu callbacks to have completed before giving up */ -		synchronize_rcu(); -		did_sync = 1; -		ctx->max_reqs = nr_events; -	} while (1); - -	if (ctx->max_reqs == 0) +	spin_lock(&aio_nr_lock); +	if (aio_nr + nr_events > aio_max_nr || +	    aio_nr + nr_events < aio_nr) { +		spin_unlock(&aio_nr_lock);  		goto out_cleanup; +	} +	aio_nr += ctx->max_reqs; +	spin_unlock(&aio_nr_lock);  	/* now link into global list. */  	spin_lock(&mm->ioctx_lock); @@ -316,16 +296,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)  	return ctx;  out_cleanup: -	__put_ioctx(ctx); -	return ERR_PTR(-EAGAIN); - +	err = -EAGAIN; +	aio_free_ring(ctx);  out_freectx:  	mmdrop(mm);  	kmem_cache_free(kioctx_cachep, ctx); -	ctx = ERR_PTR(-ENOMEM); - -	dprintk("aio: error allocating ioctx %p\n", ctx); -	return ctx; +	dprintk("aio: error allocating ioctx %d\n", err); +	return ERR_PTR(err);  }  /* aio_cancel_all @@ -413,10 +390,6 @@ void exit_aio(struct mm_struct *mm)  		aio_cancel_all(ctx);  		wait_for_all_aios(ctx); -		/* -		 * Ensure we don't leave the ctx on the aio_wq -		 */ -		cancel_work_sync(&ctx->wq.work);  		if (1 != atomic_read(&ctx->users))  			printk(KERN_DEBUG @@ -490,6 +463,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)  		kmem_cache_free(kiocb_cachep, req);  		ctx->reqs_active--;  	} +	if (unlikely(!ctx->reqs_active && ctx->dead)) +		wake_up_all(&ctx->wait);  	spin_unlock_irq(&ctx->ctx_lock);  } @@ -607,11 +582,16 @@ static void aio_fput_routine(struct work_struct *data)  			fput(req->ki_filp);  		/* Link the iocb into the context's free list */ +		rcu_read_lock();  		spin_lock_irq(&ctx->ctx_lock);  		really_put_req(ctx, req); +		/* +		 * at that point ctx might've been killed, but actual +		 * freeing is RCU'd +		 */  		spin_unlock_irq(&ctx->ctx_lock); +		rcu_read_unlock(); -		put_ioctx(ctx);  		spin_lock_irq(&fput_lock);  	}  	spin_unlock_irq(&fput_lock); @@ -642,7 +622,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)  	 * this function will be executed w/out any aio kthread wakeup.  	 */  	if (unlikely(!fput_atomic(req->ki_filp))) { -		get_ioctx(ctx);  		spin_lock(&fput_lock);  		list_add(&req->ki_list, &fput_head);  		spin_unlock(&fput_lock); @@ -920,7 +899,7 @@ static void aio_kick_handler(struct work_struct *work)   	unuse_mm(mm);  	set_fs(oldfs);  	/* -	 * we're in a worker thread already, don't use queue_delayed_work, +	 * we're in a worker thread already; no point using non-zero delay  	 */  	if (requeue)  		queue_delayed_work(aio_wq, &ctx->wq, 0); @@ -1019,10 +998,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)  	if (kiocbIsCancelled(iocb))  		goto put_rq; -	ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); +	ring = kmap_atomic(info->ring_pages[0]);  	tail = info->tail; -	event = aio_ring_event(info, tail, KM_IRQ0); +	event = aio_ring_event(info, tail);  	if (++tail >= info->nr)  		tail = 0; @@ -1043,8 +1022,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2)  	info->tail = tail;  	ring->tail = tail; -	put_aio_ring_event(event, KM_IRQ0); -	kunmap_atomic(ring, KM_IRQ1); +	put_aio_ring_event(event); +	kunmap_atomic(ring);  	pr_debug("added to ring %p at [%lu]\n", iocb, tail); @@ -1089,7 +1068,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)  	unsigned long head;  	int ret = 0; -	ring = kmap_atomic(info->ring_pages[0], KM_USER0); +	ring = kmap_atomic(info->ring_pages[0]);  	dprintk("in aio_read_evt h%lu t%lu m%lu\n",  		 (unsigned long)ring->head, (unsigned long)ring->tail,  		 (unsigned long)ring->nr); @@ -1101,18 +1080,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)  	head = ring->head % info->nr;  	if (head != ring->tail) { -		struct io_event *evp = aio_ring_event(info, head, KM_USER1); +		struct io_event *evp = aio_ring_event(info, head);  		*ent = *evp;  		head = (head + 1) % info->nr;  		smp_mb(); /* finish reading the event before updatng the head */  		ring->head = head;  		ret = 1; -		put_aio_ring_event(evp, KM_USER1); +		put_aio_ring_event(evp);  	}  	spin_unlock(&info->ring_lock);  out: -	kunmap_atomic(ring, KM_USER0); +	kunmap_atomic(ring);  	dprintk("leaving aio_read_evt: %d  h%lu t%lu\n", ret,  		 (unsigned long)ring->head, (unsigned long)ring->tail);  	return ret; @@ -1336,10 +1315,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)  	ret = PTR_ERR(ioctx);  	if (!IS_ERR(ioctx)) {  		ret = put_user(ioctx->user_id, ctxp); -		if (!ret) +		if (!ret) { +			put_ioctx(ioctx);  			return 0; - -		get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ +		}  		io_destroy(ioctx);  	} | 
