From 75956694f3f80a1c32389c95069641f52c236c8b Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sun, 13 Dec 2009 11:50:16 -0800 Subject: Add Requeue-PI support for x86 arch. --- nptl/ChangeLog | 11 ++++ .../sysv/linux/i386/i486/pthread_cond_broadcast.S | 30 +++++++-- .../sysv/linux/i386/i486/pthread_cond_signal.S | 43 ++++++++++++- .../sysv/linux/i386/i486/pthread_cond_timedwait.S | 67 +++++++++++++++++--- .../unix/sysv/linux/i386/i486/pthread_cond_wait.S | 73 ++++++++++++++++++---- nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h | 2 + 6 files changed, 198 insertions(+), 28 deletions(-) (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 3d9a60afc5..b05f6e4a03 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,14 @@ +2009-12-01 Dinakar Guniguntala + + * sysdeps/unix/sysv/linux/i386/i486/lowlevellock.h: Define + FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: If mutex + is a non robust PI mutex, then use FUTEX_CMP_REQUEUE_PI. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: If mutex + is a non robust PI mutex, then use FUTEX_WAIT_REQUEUE_PI. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise. + 2009-12-12 Ulrich Drepper * sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S (sem_timedwait): diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S index 40fb04b31b..a7ca78f78d 100644 --- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S +++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S @@ -91,12 +91,17 @@ __pthread_cond_broadcast: 8: cmpl $-1, %edi je 9f - /* XXX: The kernel so far doesn't support requeue to PI futex. */ - /* XXX: The kernel only supports FUTEX_CMP_REQUEUE to the same - type of futex (private resp. shared). */ - testl $(PI_BIT | PS_BIT), MUTEX_KIND(%edi) + /* Do not use requeue for pshared condvars. */ + testl $PS_BIT, MUTEX_KIND(%edi) jne 9f + /* Requeue to a non-robust PI mutex if the PI bit is set and + the robust bit is not set. */ + movl MUTEX_KIND(%edi), %eax + andl $(ROBUST_BIT|PI_BIT), %eax + cmpl $PI_BIT, %eax + je 81f + /* Wake up all threads. */ #ifdef __ASSUME_PRIVATE_FUTEX movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %ecx @@ -138,6 +143,23 @@ __pthread_cond_broadcast: cfi_restore_state +81: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx + movl $SYS_futex, %eax + movl $0x7fffffff, %esi + movl $1, %edx + /* Get the address of the futex involved. */ +# if MUTEX_FUTEX != 0 + addl $MUTEX_FUTEX, %edi +# endif + int $0x80 + + /* For any kind of error, which mainly is EAGAIN, we try again + with WAKE. The general test also covers running on old + kernels. */ + cmpl $0xfffff001, %eax + jb 6b + jmp 9f + /* Initial locking failed. */ 1: #if cond_lock == 0 diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S index 013fcc303f..9fc2cbfeaf 100644 --- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S +++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -86,7 +87,17 @@ __pthread_cond_signal: #endif cmpl $-1, dep_mutex-cond_futex(%ebx) sete %cl - subl $1, %ecx + je 8f + + movl dep_mutex-cond_futex(%ebx), %edx + /* Requeue to a non-robust PI mutex if the PI bit is set and + the robust bit is not set. */ + movl MUTEX_KIND(%edx), %eax + andl $(ROBUST_BIT|PI_BIT), %eax + cmpl $PI_BIT, %eax + je 9f + +8: subl $1, %ecx #ifdef __ASSUME_PRIVATE_FUTEX andl $FUTEX_PRIVATE_FLAG, %ecx #else @@ -124,8 +135,34 @@ __pthread_cond_signal: cfi_restore_state -7: /* %ecx should be either FUTEX_WAKE_OP or - FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG from the previous syscall. */ +9: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx + movl $SYS_futex, %eax + movl $1, %edx + xorl %esi, %esi + movl dep_mutex-cond_futex(%ebx), %edi + movl (%ebx), %ebp + /* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for + sysenter. + ENTER_KERNEL */ + int $0x80 + popl %ebp + popl %esi + + leal -cond_futex(%ebx), %edi + + /* For any kind of error, we try again with WAKE. + The general test also covers running on old kernels. */ + cmpl $-4095, %eax + jb 4f + +7: +#ifdef __ASSUME_PRIVATE_FUTEX + andl $FUTEX_PRIVATE_FLAG, %ecx +#else + andl %gs:PRIVATE_FUTEX, %ecx +#endif + orl $FUTEX_WAKE, %ecx + xorl $(FUTEX_WAKE ^ FUTEX_WAKE_OP), %ecx movl $SYS_futex, %eax /* %edx should be 1 already from $FUTEX_WAKE_OP syscall. diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S index 8f5088a460..7faf4415c6 100644 --- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -95,7 +96,7 @@ __pthread_cond_timedwait: addl $1, cond_futex(%ebx) addl $(1 << nwaiters_shift), cond_nwaiters(%ebx) -#define FRAME_SIZE 24 +#define FRAME_SIZE 32 subl $FRAME_SIZE, %esp cfi_adjust_cfa_offset(FRAME_SIZE) @@ -107,8 +108,10 @@ __pthread_cond_timedwait: movl %edx, 16(%esp) movl %eax, 20(%esp) + /* Reset the pi-requeued flag. */ +8: movl $0, 24(%esp) /* Get the current time. */ -8: movl %ebx, %edx + movl %ebx, %edx #ifdef __NR_clock_gettime /* Get the clock number. */ movl cond_nwaiters(%ebx), %ebx @@ -158,6 +161,7 @@ __pthread_cond_timedwait: movl %edx, 8(%esp) movl cond_futex(%ebx), %edi + movl %edi, 28(%esp) /* Unlock. */ LOCK @@ -172,13 +176,50 @@ __pthread_cond_timedwait: 4: call __pthread_enable_asynccancel movl %eax, (%esp) - leal 4(%esp), %esi #if FUTEX_PRIVATE_FLAG > 255 xorl %ecx, %ecx #endif cmpl $-1, dep_mutex(%ebx) sete %cl - subl $1, %ecx + je 40f + + movl dep_mutex(%ebx), %edi + /* Requeue to a non-robust PI mutex if the PI bit is set and + the robust bit is not set. */ + movl MUTEX_KIND(%edi), %eax + andl $(ROBUST_BIT|PI_BIT), %eax + cmpl $PI_BIT, %eax + jne 40f + + movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx + /* The following only works like this because we only support + two clocks, represented using a single bit. */ + testl $1, cond_nwaiters(%ebx) + /* XXX Need to implement using sete instead of a jump. */ + jne 42f + orl $FUTEX_CLOCK_REALTIME, %ecx + + /* Requeue-PI uses absolute timeout */ +42: leal (%ebp), %esi + movl 28(%esp), %edx + addl $cond_futex, %ebx + movl $SYS_futex, %eax + ENTER_KERNEL + subl $cond_futex, %ebx + movl %eax, %esi + /* Set the pi-requeued flag only if the kernel has returned 0. The + kernel does not hold the mutex on ETIMEDOUT or any other error. */ + cmpl $0, %eax + sete 24(%esp) + je 41f + + /* Normal and PI futexes dont mix. Use normal futex functions only + if the kernel does not support the PI futex functions. */ + cmpl $-ENOSYS, %eax + jne 41f + xorl %ecx, %ecx + +40: subl $1, %ecx #ifdef __ASSUME_PRIVATE_FUTEX andl $FUTEX_PRIVATE_FLAG, %ecx #else @@ -187,7 +228,8 @@ __pthread_cond_timedwait: #if FUTEX_WAIT != 0 addl $FUTEX_WAIT, %ecx #endif - movl %edi, %edx + leal 4(%esp), %esi + movl 28(%esp), %edx addl $cond_futex, %ebx .Ladd_cond_futex: movl $SYS_futex, %eax @@ -196,7 +238,7 @@ __pthread_cond_timedwait: .Lsub_cond_futex: movl %eax, %esi - movl (%esp), %eax +41: movl (%esp), %eax call __pthread_disable_asynccancel .LcleanupEND: @@ -284,10 +326,16 @@ __pthread_cond_timedwait: #endif jne 10f +11: xorl %eax, %eax + /* With requeue_pi, the mutex lock is held in the kernel. */ + movl 24(%esp), %ecx + testl %ecx, %ecx + jnz 26f + /* Remove cancellation handler. */ -11: movl 24+FRAME_SIZE(%esp), %eax + movl 24+FRAME_SIZE(%esp), %eax call __pthread_mutex_cond_lock - addl $FRAME_SIZE, %esp +26: addl $FRAME_SIZE, %esp cfi_adjust_cfa_offset(-FRAME_SIZE); /* We return the result of the mutex_lock operation if it failed. */ @@ -317,6 +365,9 @@ __pthread_cond_timedwait: cfi_restore_state +27: call __pthread_mutex_cond_lock_adjust + jmp 26b + /* Initial locking failed. */ 1: #if cond_lock == 0 diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S index 776b95e40c..a60aac3377 100644 --- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include @@ -43,6 +45,9 @@ __pthread_cond_wait: cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) #endif + pushl %ebp + cfi_adjust_cfa_offset(4) + cfi_rel_offset(%ebp, 0) pushl %edi cfi_adjust_cfa_offset(4) cfi_rel_offset(%edi, 0) @@ -55,7 +60,7 @@ __pthread_cond_wait: cfi_remember_state xorl %esi, %esi - movl 16(%esp), %ebx + movl 20(%esp), %ebx /* Get internal lock. */ movl $1, %edx @@ -71,7 +76,7 @@ __pthread_cond_wait: /* Store the reference to the mutex. If there is already a different value in there this is a bad user bug. */ 2: cmpl $-1, dep_mutex(%ebx) - movl 20(%esp), %eax + movl 24(%esp), %eax je 15f movl %eax, dep_mutex(%ebx) @@ -87,7 +92,7 @@ __pthread_cond_wait: addl $1, cond_futex(%ebx) addl $(1 << nwaiters_shift), cond_nwaiters(%ebx) -#define FRAME_SIZE 16 +#define FRAME_SIZE 20 subl $FRAME_SIZE, %esp cfi_adjust_cfa_offset(FRAME_SIZE) @@ -99,8 +104,10 @@ __pthread_cond_wait: movl %edx, 8(%esp) movl %eax, 12(%esp) -8: movl cond_futex(%ebx), %edi - + /* Reset the pi-requeued flag. */ +8: movl $0, 16(%esp) + movl cond_futex(%ebx), %ebp + /* Unlock. */ LOCK #if cond_lock == 0 @@ -114,12 +121,39 @@ __pthread_cond_wait: 4: call __pthread_enable_asynccancel movl %eax, (%esp) -#if FUTEX_PRIVATE_FLAG > 255 xorl %ecx, %ecx -#endif cmpl $-1, dep_mutex(%ebx) sete %cl - subl $1, %ecx + je 18f + + movl dep_mutex(%ebx), %edi + /* Requeue to a non-robust PI mutex if the PI bit is set and + the robust bit is not set. */ + movl MUTEX_KIND(%edi), %eax + andl $(ROBUST_BIT|PI_BIT), %eax + cmpl $PI_BIT, %eax + jne 18f + + movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx + movl %ebp, %edx + xorl %esi, %esi + addl $cond_futex, %ebx + movl $SYS_futex, %eax + ENTER_KERNEL + subl $cond_futex, %ebx + /* Set the pi-requeued flag only if the kernel has returned 0. The + kernel does not hold the mutex on error. */ + cmpl $0, %eax + sete 16(%esp) + je 19f + + /* Normal and PI futexes dont mix. Use normal futex functions only + if the kernel does not support the PI futex functions. */ + cmpl $-ENOSYS, %eax + jne 19f + xorl %ecx, %ecx + +18: subl $1, %ecx #ifdef __ASSUME_PRIVATE_FUTEX andl $FUTEX_PRIVATE_FLAG, %ecx #else @@ -128,7 +162,7 @@ __pthread_cond_wait: #if FUTEX_WAIT != 0 addl $FUTEX_WAIT, %ecx #endif - movl %edi, %edx + movl %ebp, %edx addl $cond_futex, %ebx .Ladd_cond_futex: movl $SYS_futex, %eax @@ -136,7 +170,7 @@ __pthread_cond_wait: subl $cond_futex, %ebx .Lsub_cond_futex: - movl (%esp), %eax +19: movl (%esp), %eax call __pthread_disable_asynccancel .LcleanupEND: @@ -212,9 +246,15 @@ __pthread_cond_wait: #endif jne 10f -11: movl 20+FRAME_SIZE(%esp), %eax + /* With requeue_pi, the mutex lock is held in the kernel. */ +11: xorl %eax, %eax + movl 16(%esp), %ecx + testl %ecx, %ecx + jnz 20f + + movl 24+FRAME_SIZE(%esp), %eax call __pthread_mutex_cond_lock - addl $FRAME_SIZE, %esp +20: addl $FRAME_SIZE, %esp cfi_adjust_cfa_offset(-FRAME_SIZE); 14: popl %ebx @@ -226,12 +266,19 @@ __pthread_cond_wait: popl %edi cfi_adjust_cfa_offset(-4) cfi_restore(%edi) + popl %ebp + cfi_adjust_cfa_offset(-4) + cfi_restore(%ebp) /* We return the result of the mutex_lock operation. */ ret cfi_restore_state +21: call __pthread_mutex_cond_lock_adjust + xorl %eax, %eax + jmp 20b + /* Initial locking failed. */ 1: #if cond_lock == 0 @@ -484,7 +531,7 @@ __condvar_w_cleanup: movl $0x7fffffff, %edx ENTER_KERNEL -5: movl 20+FRAME_SIZE(%esp), %eax +5: movl 24+FRAME_SIZE(%esp), %eax call __pthread_mutex_cond_lock movl %esi, (%esp) diff --git a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h index 66e0e628fd..4bb585af3b 100644 --- a/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h +++ b/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h @@ -54,6 +54,8 @@ #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 -- cgit v1.2.3