summaryrefslogtreecommitdiff
path: root/rt/aio_misc.c
diff options
context:
space:
mode:
Diffstat (limited to 'rt/aio_misc.c')
-rw-r--r--rt/aio_misc.c588
1 files changed, 414 insertions, 174 deletions
diff --git a/rt/aio_misc.c b/rt/aio_misc.c
index e4bb12c500..6ea30c2158 100644
--- a/rt/aio_misc.c
+++ b/rt/aio_misc.c
@@ -21,7 +21,6 @@
#include <aio.h>
#include <errno.h>
#include <pthread.h>
-#include <semaphore.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
@@ -29,40 +28,199 @@
#include "aio_misc.h"
-/* We need a list of pending operations. This is sorted according to
- the priority given in the aio_reqprio member. */
-aiocb_union *__aio_requests;
+/* Pool of request list entries. */
+static struct requestlist **pool;
-/* Since the list is global we need a semaphore protecting it. */
-sem_t __aio_requests_sema;
+/* Number of total and allocated pool entries. */
+static size_t pool_tab_size;
+static size_t pool_size;
+/* We implement a two dimensional array but allocate each row separately.
+ The macro below determines how many entries should be used per row.
+ It should better be a power of two. */
+#define ENTRIES_PER_ROW 16
-/* The initialization function. It gets automatically called if any
- aio_* function is used in the program. */
-static void
-__attribute__ ((unused))
-aio_initialize (void)
+/* The row table is incremented in units of this. */
+#define ROW_STEP 8
+
+/* List of available entries. */
+static struct requestlist *freelist;
+
+/* List of request waiting to be processed. */
+static struct requestlist *runlist;
+
+/* Structure list of all currently processed requests. */
+static struct requestlist *requests;
+
+/* Number of threads currently running. */
+static int nthreads;
+
+
+/* These are the values used to optimize the use of AIO. The user can
+ overwrite them by using the `aio_init' function. */
+static struct aioinit optim =
+{
+ 20, /* int aio_threads; Maximal number of threads. */
+ 256, /* int aio_num; Number of expected simultanious requests. */
+ 0,
+ 0,
+ 0,
+ 0,
+ { 0, }
+};
+
+
+/* Since the list is global we need a mutex protecting it. */
+pthread_mutex_t __aio_requests_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+
+
+/* Functions to handle request list pool. */
+static struct requestlist *
+get_elem (void)
{
- /* Initialize the semaphore. We allow exactly one user at a time. */
- sem_init (&__aio_requests_sema, 0, 1);
+ struct requestlist *result;
+
+ if (freelist == NULL)
+ {
+ struct requestlist *new_row;
+ size_t new_size;
+
+ /* Compute new size. */
+ new_size = pool_size ? pool_size + ENTRIES_PER_ROW : optim.aio_num;
+
+ if ((new_size / ENTRIES_PER_ROW) >= pool_tab_size)
+ {
+ size_t new_tab_size = new_size / ENTRIES_PER_ROW;
+ struct requestlist **new_tab;
+
+ new_tab = (struct requestlist **)
+ realloc (pool, (new_tab_size * sizeof (struct requestlist *)));
+
+ if (new_tab == NULL)
+ return NULL;
+
+ pool_tab_size = new_tab_size;
+ pool = new_tab;
+ }
+
+ if (pool_size == 0)
+ {
+ size_t cnt;
+
+ new_row = (struct requestlist *)
+ calloc (new_size, sizeof (struct requestlist));
+
+ if (new_row == NULL)
+ return NULL;
+
+ for (cnt = 0; cnt < new_size / ENTRIES_PER_ROW; ++cnt)
+ pool[cnt] = &new_row[cnt * ENTRIES_PER_ROW];
+ }
+ else
+ {
+ /* Allocat one new row. */
+ new_row = (struct requestlist *)
+ calloc (ENTRIES_PER_ROW, sizeof (struct requestlist));
+ if (new_row == NULL)
+ return NULL;
+
+ pool[new_size / ENTRIES_PER_ROW] = new_row;
+ }
+
+ /* Put all the new entries in the freelist. */
+ do
+ {
+ new_row->next_prio = freelist;
+ freelist = new_row++;
+ }
+ while (++pool_size < new_size);
+ }
+
+ result = freelist;
+ freelist = freelist->next_prio;
+
+ return result;
}
-text_set_element (__libc_subinit, aio_initialize);
+
+void
+__aio_free_req (struct requestlist *elem)
+{
+ elem->running = no;
+ elem->next_prio = freelist;
+ freelist = elem;
+}
+
+
+struct requestlist *
+__aio_find_req (aiocb_union *elem)
+{
+ struct requestlist *runp = requests;
+ int fildes = elem->aiocb.aio_fildes;
+
+ while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
+ runp = runp->next_fd;
+
+ if (runp != NULL)
+ if (runp->aiocbp->aiocb.aio_fildes != fildes)
+ runp = NULL;
+ else
+ while (runp != NULL && runp->aiocbp != elem)
+ runp = runp->next_prio;
+
+ return runp;
+}
+
+
+struct requestlist *
+__aio_find_req_fd (int fildes)
+{
+ struct requestlist *runp = requests;
+
+ while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
+ runp = runp->next_fd;
+
+ return (runp != NULL && runp->aiocbp->aiocb.aio_fildes == fildes
+ ? runp : NULL);
+}
/* The thread handler. */
static void *handle_fildes_io (void *arg);
+/* User optimization. */
+void
+__aio_init (const struct aioinit *init)
+{
+ /* Get the mutex. */
+ pthread_mutex_lock (&__aio_requests_mutex);
+
+ /* Only allow writing new values if the table is not yet allocated. */
+ if (pool == NULL)
+ {
+ optim.aio_threads = init->aio_threads < 1 ? 1 : init->aio_threads;
+ optim.aio_num = (init->aio_num < ENTRIES_PER_ROW
+ ? ENTRIES_PER_ROW
+ : init->aio_num & ~ENTRIES_PER_ROW);
+ }
+
+ /* Release the mutex. */
+ pthread_mutex_unlock (&__aio_requests_mutex);
+}
+weak_alias (__aio_init, aio_init)
+
+
/* The main function of the async I/O handling. It enqueues requests
and if necessary starts and handles threads. */
-int
-__aio_enqueue_request (aiocb_union *aiocbp, int operation, int require_lock)
+struct requestlist *
+__aio_enqueue_request (aiocb_union *aiocbp, int operation)
{
- int result;
+ int result = 0;
int policy, prio;
struct sched_param param;
- aiocb_union *runp;
+ struct requestlist *last, *runp, *newp;
+ int running = no;
if (aiocbp->aiocb.aio_reqprio < 0
|| aiocbp->aiocb.aio_reqprio > AIO_PRIO_DELTA_MAX)
@@ -71,94 +229,160 @@ __aio_enqueue_request (aiocb_union *aiocbp, int operation, int require_lock)
__set_errno (EINVAL);
aiocbp->aiocb.__error_code = EINVAL;
aiocbp->aiocb.__return_value = -1;
- return -1;
- }
-
- if (pthread_getschedparam (pthread_self (), &policy, &param) < 0)
- {
- /* Something went wrong. */
- aiocbp->aiocb.__error_code = errno;
- aiocbp->aiocb.__return_value = -1;
- return -1;
+ return NULL;
}
/* Compute priority for this request. */
+ pthread_getschedparam (pthread_self (), &policy, &param);
prio = param.sched_priority - aiocbp->aiocb.aio_reqprio;
+ /* Get the mutex. */
+ pthread_mutex_lock (&__aio_requests_mutex);
- /* Get the semaphore. */
- if (require_lock)
- sem_wait (&__aio_requests_sema);
-
- runp = __aio_requests;
+ last = NULL;
+ runp = requests;
/* First look whether the current file descriptor is currently
worked with. */
- while (runp != NULL && runp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes)
- runp = (aiocb_union *) runp->aiocb.__next_fd;
+ while (runp != NULL
+ && runp->aiocbp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes)
+ {
+ last = runp;
+ runp = runp->next_fd;
+ }
- if (runp != NULL)
+ /* Get a new element for the waiting list. */
+ newp = get_elem ();
+ if (newp == NULL)
+ {
+ __set_errno (EAGAIN);
+ pthread_mutex_unlock (&__aio_requests_mutex);
+ return NULL;
+ }
+ newp->aiocbp = aiocbp;
+ newp->waiting = NULL;
+
+ aiocbp->aiocb.__abs_prio = prio;
+ aiocbp->aiocb.__policy = policy;
+ aiocbp->aiocb.aio_lio_opcode = operation;
+ aiocbp->aiocb.__error_code = EINPROGRESS;
+ aiocbp->aiocb.__return_value = 0;
+
+ if (runp != NULL
+ && runp->aiocbp->aiocb.aio_fildes == aiocbp->aiocb.aio_fildes)
{
/* The current file descriptor is worked on. It makes no sense
- to start another thread since this new thread would have to
- wait for the previous one to terminate. Simply enqueue it
- after the running one according to the priority. */
- while (runp->aiocb.__next_prio != NULL
- && runp->aiocb.__next_prio->__abs_prio >= prio)
- runp = (aiocb_union *) runp->aiocb.__next_prio;
-
- aiocbp->aiocb.__next_prio = runp->aiocb.__next_prio;
- aiocbp->aiocb.__abs_prio = prio;
- aiocbp->aiocb.__policy = policy;
- aiocbp->aiocb.aio_lio_opcode = operation;
- aiocbp->aiocb.__error_code = EINPROGRESS;
- aiocbp->aiocb.__return_value = 0;
- runp->aiocb.__next_prio = (struct aiocb *) aiocbp;
-
- result = 0;
+ to start another thread since this new thread would fight
+ with the running thread for the resources. But we also cannot
+ say that the thread processing this desriptor shall imeediately
+ after finishing the current job process this request if there
+ are other threads in the running queue which have a higher
+ priority. */
+
+ /* Simply enqueue it after the running one according to the
+ priority. */
+ while (runp->next_prio != NULL
+ && runp->next_prio->aiocbp->aiocb.__abs_prio >= prio)
+ runp = runp->next_prio;
+
+ newp->next_prio = runp->next_prio;
+ runp->next_prio = newp;
+
+ running = queued;
}
else
{
- /* We create a new thread for this file descriptor. The
+ /* Enqueue this request for a new descriptor. */
+ if (last == NULL)
+ {
+ newp->last_fd = NULL;
+ newp->next_fd = requests;
+ if (requests != NULL)
+ requests->last_fd = newp;
+ requests = newp;
+ }
+ else
+ {
+ newp->next_fd = last->next_fd;
+ newp->last_fd = last;
+ last->next_fd = newp;
+ if (newp->next_fd != NULL)
+ newp->next_fd->last_fd = newp;
+ }
+
+ newp->next_prio = NULL;
+ }
+
+ if (running == no)
+ {
+ /* We try to create a new thread for this file descriptor. The
function which gets called will handle all available requests
for this descriptor and when all are processed it will
- terminate. */
- pthread_t thid;
- pthread_attr_t attr;
-
- /* First enqueue the request (the list is empty). */
- aiocbp->aiocb.__next_fd = NULL;
- aiocbp->aiocb.__last_fd = NULL;
-
- aiocbp->aiocb.__next_prio = NULL;
- aiocbp->aiocb.__abs_prio = prio;
- aiocbp->aiocb.__policy = policy;
- aiocbp->aiocb.aio_lio_opcode = operation;
- aiocbp->aiocb.__error_code = EINPROGRESS;
- aiocbp->aiocb.__return_value = 0;
-
- /* Make sure the thread is created detached. */
- pthread_attr_init (&attr);
- pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
-
- /* Now try to start a thread. */
- if (pthread_create (&thid, &attr, handle_fildes_io, aiocbp) < 0)
+ terminate.
+
+ If no new thread can be created or if the specified limit of
+ threads for AIO is reached we queue the request. */
+
+ /* See if we can create a thread. */
+ if (nthreads < optim.aio_threads)
{
- result = -1;
- aiocbp->aiocb.__error_code = errno;
- aiocbp->aiocb.__return_value = -1;
+ pthread_t thid;
+ pthread_attr_t attr;
+
+ /* Make sure the thread is created detached. */
+ pthread_attr_init (&attr);
+ pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+
+ /* Now try to start a thread. */
+ if (pthread_create (&thid, &attr, handle_fildes_io, newp) == 0)
+ {
+ /* We managed to enqueue the request. All errors which can
+ happen now can be recognized by calls to `aio_return' and
+ `aio_error'. */
+ running = allocated;
+ ++nthreads;
+ }
+ else if (nthreads == 0)
+ /* We cannot create a thread in the moment and there is
+ also no thread running. This is a problem. `errno' is
+ set to EAGAIN if this is only a temporary problem. */
+ result = -1;
+ }
+ }
+
+ /* Enqueue the request in the run queue if it is not yet running. */
+ if (running < yes && result == 0)
+ {
+ if (runlist == NULL || runlist->aiocbp->aiocb.__abs_prio < prio)
+ {
+ newp->next_run = runlist;
+ runlist = newp;
}
else
- /* We managed to enqueue the request. All errors which can
- happen now can be recognized by calls to `aio_return' and
- `aio_error'. */
- result = 0;
+ {
+ runp = runlist;
+
+ while (runp->next_run != NULL
+ && runp->next_run->aiocbp->aiocb.__abs_prio >= prio)
+ runp = runp->next_run;
+
+ newp->next_run = runp->next_run;
+ runp->next_run = newp;
+ }
}
- /* Release the semaphore. */
- if (require_lock)
- sem_post (&__aio_requests_sema);
+ if (result == 0)
+ newp->running = running;
+ else
+ {
+ /* Something went wrong. */
+ __aio_free_req (newp);
+ newp = NULL;
+ }
- return result;
+ /* Release the mutex. */
+ pthread_mutex_unlock (&__aio_requests_mutex);
+
+ return newp;
}
@@ -167,140 +391,156 @@ handle_fildes_io (void *arg)
{
pthread_t self = pthread_self ();
struct sched_param param;
- aiocb_union *runp = (aiocb_union *) arg;
+ struct requestlist *runp = (struct requestlist *) arg;
+ aiocb_union *aiocbp = runp->aiocbp;
int policy;
- int fildes = runp->aiocb.aio_fildes; /* This is always the same. */
+ int fildes = runp->aiocbp->aiocb.aio_fildes;
pthread_getschedparam (self, &policy, &param);
do
{
/* Change the priority to the requested value (if necessary). */
- if (runp->aiocb.__abs_prio != param.sched_priority
- || runp->aiocb.__policy != policy)
+ if (aiocbp->aiocb.__abs_prio != param.sched_priority
+ || aiocbp->aiocb.__policy != policy)
{
- param.sched_priority = runp->aiocb.__abs_prio;
- policy = runp->aiocb.__policy;
+ param.sched_priority = aiocbp->aiocb.__abs_prio;
+ policy = aiocbp->aiocb.__policy;
pthread_setschedparam (self, policy, &param);
}
/* Process request pointed to by RUNP. We must not be disturbed
by signals. */
- if ((runp->aiocb.aio_lio_opcode & 127) == LIO_READ)
+ if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_READ)
{
- if (runp->aiocb.aio_lio_opcode & 128)
- runp->aiocb.__return_value =
+ if (aiocbp->aiocb.aio_lio_opcode & 128)
+ aiocbp->aiocb.__return_value =
TEMP_FAILURE_RETRY (__pread64 (fildes,
- (void *) runp->aiocb64.aio_buf,
- runp->aiocb64.aio_nbytes,
- runp->aiocb64.aio_offset));
+ (void *) aiocbp->aiocb64.aio_buf,
+ aiocbp->aiocb64.aio_nbytes,
+ aiocbp->aiocb64.aio_offset));
else
- runp->aiocb.__return_value =
+ aiocbp->aiocb.__return_value =
TEMP_FAILURE_RETRY (__pread (fildes,
- (void *) runp->aiocb.aio_buf,
- runp->aiocb.aio_nbytes,
- runp->aiocb.aio_offset));
+ (void *) aiocbp->aiocb.aio_buf,
+ aiocbp->aiocb.aio_nbytes,
+ aiocbp->aiocb.aio_offset));
}
- else if ((runp->aiocb.aio_lio_opcode & 127) == LIO_WRITE)
+ else if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_WRITE)
{
- if (runp->aiocb.aio_lio_opcode & 128)
- runp->aiocb.__return_value =
+ if (aiocbp->aiocb.aio_lio_opcode & 128)
+ aiocbp->aiocb.__return_value =
TEMP_FAILURE_RETRY (__pwrite64 (fildes,
- (const void *) runp->aiocb64.aio_buf,
- runp->aiocb64.aio_nbytes,
- runp->aiocb64.aio_offset));
+ (const void *) aiocbp->aiocb64.aio_buf,
+ aiocbp->aiocb64.aio_nbytes,
+ aiocbp->aiocb64.aio_offset));
else
- runp->aiocb.__return_value =
+ aiocbp->aiocb.__return_value =
TEMP_FAILURE_RETRY (__pwrite (fildes,
- (const void *) runp->aiocb.aio_buf,
- runp->aiocb.aio_nbytes,
- runp->aiocb.aio_offset));
+ (const void *) aiocbp->aiocb.aio_buf,
+ aiocbp->aiocb.aio_nbytes,
+ aiocbp->aiocb.aio_offset));
}
- else if (runp->aiocb.aio_lio_opcode == __LIO_DSYNC)
- runp->aiocb.__return_value = TEMP_FAILURE_RETRY (fdatasync (fildes));
- else if (runp->aiocb.aio_lio_opcode == __LIO_SYNC)
- runp->aiocb.__return_value = TEMP_FAILURE_RETRY (fsync (fildes));
+ else if (aiocbp->aiocb.aio_lio_opcode == LIO_DSYNC)
+ aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fdatasync (fildes));
+ else if (aiocbp->aiocb.aio_lio_opcode == LIO_SYNC)
+ aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fsync (fildes));
else
{
/* This is an invalid opcode. */
- runp->aiocb.__return_value = -1;
+ aiocbp->aiocb.__return_value = -1;
__set_errno (EINVAL);
}
- if (runp->aiocb.__return_value == -1)
- runp->aiocb.__error_code = errno;
+ /* Get the mutex. */
+ pthread_mutex_lock (&__aio_requests_mutex);
+
+ if (aiocbp->aiocb.__return_value == -1)
+ aiocbp->aiocb.__error_code = errno;
else
- runp->aiocb.__error_code = 0;
+ aiocbp->aiocb.__error_code = 0;
/* Send the signal to notify about finished processing of the
request. */
- if (runp->aiocb.aio_sigevent.sigev_notify == SIGEV_THREAD)
+ __aio_notify (runp);
+
+ /* Now dequeue the current request. */
+ if (runp->next_prio == NULL)
{
- /* We have to start a thread. */
- pthread_t tid;
- pthread_attr_t attr, *pattr;
+ /* No outstanding request for this descriptor. Process the
+ runlist if necessary. */
+ if (runp->next_fd != NULL)
+ runp->next_fd->last_fd = runp->last_fd;
+ if (runp->last_fd != NULL)
+ runp->last_fd->next_fd = runp->next_fd;
+ }
+ else
+ {
+ runp->next_prio->last_fd = runp->last_fd;
+ runp->next_prio->next_fd = runp->next_fd;
+ runp->next_prio->running = yes;
+ if (runp->next_fd != NULL)
+ runp->next_fd->last_fd = runp->next_prio;
+ if (runp->last_fd != NULL)
+ runp->last_fd->next_fd = runp->next_prio;
+ }
+
+ /* Free the old element. */
+ __aio_free_req (runp);
- pattr = (pthread_attr_t *)
- runp->aiocb.aio_sigevent.sigev_notify_attributes;
- if (pattr == NULL)
+ runp = freelist;
+ if (runp != NULL)
+ {
+ /* We must not run requests which are not marked `running'. */
+ if (runp->running == yes)
{
- pthread_attr_init (&attr);
- pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
- pattr = &attr;
+ freelist = runp->next_run;
+ runp->running = allocated;
}
-
- if (pthread_create (&tid,
- (pthread_attr_t *)
- runp->aiocb.aio_sigevent.sigev_notify_attributes,
- (void *(*) (void *))
- runp->aiocb.aio_sigevent.sigev_notify_function,
- runp->aiocb.aio_sigevent.sigev_value.sival_ptr)
- < 0)
+ else
{
- /* XXX What shall we do if already an error is set by
- read/write/fsync? */
- runp->aiocb.__error_code = errno;
- runp->aiocb.__return_value = -1;
+ struct requestlist *old;
+
+ do
+ {
+ old = runp;
+ runp = runp->next_run;
+ }
+ while (runp != NULL && runp->running != yes);
+
+ if (runp != NULL)
+ old->next_run = runp->next_run;
}
}
- else if (runp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL)
- /* We have to send a signal. */
- if (__aio_sigqueue (runp->aiocb.aio_sigevent.sigev_signo,
- runp->aiocb.aio_sigevent.sigev_value) < 0)
- {
- /* XXX What shall we do if already an error is set by
- read/write/fsync? */
- runp->aiocb.__error_code = errno;
- runp->aiocb.__return_value = -1;
- }
-
- /* Get the semaphore. */
- sem_wait (&__aio_requests_sema);
- /* Now dequeue the current request. */
- if (runp->aiocb.__next_prio == NULL)
- {
- if (runp->aiocb.__next_fd != NULL)
- runp->aiocb.__next_fd->__last_fd = runp->aiocb.__last_fd;
- if (runp->aiocb.__last_fd != NULL)
- runp->aiocb.__last_fd->__next_fd = runp->aiocb.__next_fd;
- runp = NULL;
- }
- else
- {
- runp->aiocb.__next_prio->__last_fd = runp->aiocb.__last_fd;
- runp->aiocb.__next_prio->__next_fd = runp->aiocb.__next_fd;
- if (runp->aiocb.__next_fd != NULL)
- runp->aiocb.__next_fd->__last_fd = runp->aiocb.__next_prio;
- if (runp->aiocb.__last_fd != NULL)
- runp->aiocb.__last_fd->__next_fd = runp->aiocb.__next_prio;
- runp = (aiocb_union *) runp->aiocb.__next_prio;
- }
+ /* If no request to work on we will stop the thread. */
+ if (runp == NULL)
+ --nthreads;
- /* Release the semaphore. */
- sem_post (&__aio_requests_sema);
+ /* Release the mutex. */
+ pthread_mutex_unlock (&__aio_requests_mutex);
}
while (runp != NULL);
pthread_exit (NULL);
}
+
+
+/* Free allocated resources. */
+static void
+__attribute__ ((unused))
+free_res (void)
+{
+ size_t row;
+
+ /* The first block of rows as specified in OPTIM is allocated in
+ one chunk. */
+ free (pool[0]);
+
+ for (row = optim.aio_num / ENTRIES_PER_ROW; row < pool_tab_size; ++row)
+ free (pool[row]);
+
+ free (pool);
+}
+
+text_set_element (__libc_subfreeres, free_res);