summaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-08-08 17:48:09 -0700
committerUlrich Drepper <drepper@redhat.com>2009-08-08 17:48:09 -0700
commitefa0569d2bfdbb7367fce42b1c99821b85d2d3ba (patch)
tree73e7beba2f7ee1b08281621bc288ac47c3e55934 /nptl
parent5d368296eafdf38a81228d118e772134734d6bb3 (diff)
Optimize x86-64 version of sem_timedwait.
Diffstat (limited to 'nptl')
-rw-r--r--nptl/ChangeLog3
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S210
2 files changed, 148 insertions, 65 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index 48fcc0fe09..5be464e2f2 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,5 +1,8 @@
2009-08-08 Ulrich Drepper <drepper@redhat.com>
+ * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait):
+ Optimize code path used when FUTEX_CLOCK_REALTIME is supported.
+
* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
(__pthread_cond_wait): Optimize by avoiding use of callee-safe
register.
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
index 95762834d3..0291beb169 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S
@@ -65,34 +65,9 @@ sem_timedwait:
retq
/* Check whether the timeout value is valid. */
-1: pushq %r12
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r12, 0)
- pushq %r13
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r13, 0)
- pushq %r14
- cfi_adjust_cfa_offset(8)
- cfi_rel_offset(%r14, 0)
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define STACKFRAME 8
-#else
-# define STACKFRAME 24
-#endif
- subq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(STACKFRAME)
-
- movq %rdi, %r12
- movq %rsi, %r13
-
- /* Check for invalid nanosecond field. */
- cmpq $1000000000, 8(%r13)
- movl $EINVAL, %r14d
+1: cmpq $1000000000, 8(%rsi)
jae 6f
- LOCK
- addq $1, NWAITERS(%r12)
-
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
# ifdef PIC
cmpl $0, __have_futex_clock_realtime(%rip)
@@ -102,15 +77,22 @@ sem_timedwait:
je .Lreltmo
#endif
+ /* This push is only needed to store the sem_t pointer for the
+ exception handler. */
+ pushq %rdi
+ cfi_adjust_cfa_offset(8)
+
+ movq %rsi, %r10
+
+ LOCK
+ addq $1, NWAITERS(%rdi)
+
.LcleanupSTART:
13: call __pthread_enable_asynccancel
- movl %eax, (%rsp)
+ movl %eax, %r8d
- movq %r13, %r10
-#if VALUE == 0
- movq %r12, %rdi
-#else
- leaq VALUE(%r12), %rdi
+#if VALUE != 0
+ leaq VALUE(%rdi), %rdi
#endif
movl $0xffffffff, %r9d
movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi
@@ -118,22 +100,26 @@ sem_timedwait:
movl $SYS_futex, %eax
xorl %edx, %edx
syscall
- movq %rax, %r14
+ movq %rax, %r9
+#if VALUE != 0
+ leaq -VALUE(%rdi), %rdi
+#endif
- movl (%rsp), %edi
+ xchgq %r8, %rdi
call __pthread_disable_asynccancel
.LcleanupEND:
+ movq %r8, %rdi
- testq %r14, %r14
+ testq %r9, %r9
je 11f
- cmpq $-EWOULDBLOCK, %r14
+ cmpq $-EWOULDBLOCK, %r9
jne 3f
11:
#if VALUE == 0
- movl (%r12), %eax
+ movl (%rdi), %eax
#else
- movl VALUE(%r12), %eax
+ movl VALUE(%rdi), %eax
#endif
14: testl %eax, %eax
je 13b
@@ -141,49 +127,74 @@ sem_timedwait:
leaq -1(%rax), %rcx
LOCK
#if VALUE == 0
- cmpxchgl %ecx, (%r12)
+ cmpxchgl %ecx, (%rdi)
#else
- cmpxchgl %ecx, VALUE(%r12)
+ cmpxchgl %ecx, VALUE(%rdi)
#endif
jne 14b
-10: xorl %eax, %eax
+ xorl %eax, %eax
15: LOCK
- subq $1, NWAITERS(%r12)
+ subq $1, NWAITERS(%rdi)
- addq $STACKFRAME, %rsp
- cfi_adjust_cfa_offset(-STACKFRAME)
- popq %r14
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r14)
- popq %r13
+ leaq 8(%rsp), %rsp
cfi_adjust_cfa_offset(-8)
- cfi_restore(%r13)
- popq %r12
- cfi_adjust_cfa_offset(-8)
- cfi_restore(%r12)
retq
- cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
- cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
- cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
- cfi_rel_offset(%r14, STACKFRAME)
-3: negq %r14
-6:
+ cfi_adjust_cfa_offset(8)
+3: negq %r9
#if USE___THREAD
movq errno@gottpoff(%rip), %rdx
- movl %r14d, %fs:(%rdx)
+ movl %r9d, %fs:(%rdx)
#else
callq __errno_location@plt
- movl %r14d, (%rax)
+ movl %r9d, (%rax)
#endif
orl $-1, %eax
jmp 15b
+ cfi_adjust_cfa_offset(-8)
+6:
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl $EINVAL, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl $EINVAL, (%rax)
+#endif
+
+ orl $-1, %eax
+
+ retq
+
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.Lreltmo:
+ pushq %r12
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r12, 0)
+ pushq %r13
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r13, 0)
+ pushq %r14
+ cfi_adjust_cfa_offset(8)
+ cfi_rel_offset(%r14, 0)
+
+#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
+# define STACKFRAME 8
+#else
+# define STACKFRAME 24
+#endif
+ subq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(STACKFRAME)
+
+ movq %rdi, %r12
+ movq %rsi, %r13
+
+ LOCK
+ addq $1, NWAITERS(%r12)
+
7: xorl %esi, %esi
movq %rsp, %rdi
movq $VSYSCALL_ADDR_vgettimeofday, %rax
@@ -202,7 +213,7 @@ sem_timedwait:
decq %rdi
5: testq %rdi, %rdi
movl $ETIMEDOUT, %r14d
- js 6b /* Time is already up. */
+ js 36f /* Time is already up. */
movq %rdi, (%rsp) /* Store relative timeout. */
movq %rsi, 8(%rsp)
@@ -235,7 +246,7 @@ sem_timedwait:
testq %r14, %r14
je 9f
cmpq $-EWOULDBLOCK, %r14
- jne 3b
+ jne 33f
9:
# if VALUE == 0
@@ -254,15 +265,54 @@ sem_timedwait:
cmpxchgl %ecx, VALUE(%r12)
# endif
jne 8b
- jmp 10b
+
+ xorl %eax, %eax
+
+45: LOCK
+ subq $1, NWAITERS(%r12)
+
+ addq $STACKFRAME, %rsp
+ cfi_adjust_cfa_offset(-STACKFRAME)
+ popq %r14
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r14)
+ popq %r13
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r13)
+ popq %r12
+ cfi_adjust_cfa_offset(-8)
+ cfi_restore(%r12)
+ retq
+
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+33: negq %r14
+36:
+#if USE___THREAD
+ movq errno@gottpoff(%rip), %rdx
+ movl %r14d, %fs:(%rdx)
+#else
+ callq __errno_location@plt
+ movl %r14d, (%rax)
#endif
+
+ orl $-1, %eax
+ jmp 45b
+#endif
+ cfi_endproc
.size sem_timedwait,.-sem_timedwait
.type sem_timedwait_cleanup,@function
sem_timedwait_cleanup:
+ cfi_startproc
+ cfi_adjust_cfa_offset(8)
+
+ movq (%rsp), %rdi
LOCK
- subq $1, NWAITERS(%r12)
+ subq $1, NWAITERS(%rdi)
movq %rax, %rdi
.LcallUR:
call _Unwind_Resume@PLT
@@ -272,6 +322,30 @@ sem_timedwait_cleanup:
.size sem_timedwait_cleanup,.-sem_timedwait_cleanup
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .type sem_timedwait_cleanup2,@function
+sem_timedwait_cleanup2:
+ cfi_startproc
+ cfi_adjust_cfa_offset(STACKFRAME + 3 * 8)
+ cfi_rel_offset(%r12, STACKFRAME + 2 * 8)
+ cfi_rel_offset(%r13, STACKFRAME + 1 * 8)
+ cfi_rel_offset(%r14, STACKFRAME)
+
+ LOCK
+ subq $1, NWAITERS(%r12)
+ movq %rax, %rdi
+ movq STACKFRAME(%rsp), %r14
+ movq STACKFRAME+8(%rsp), %r13
+ movq STACKFRAME+16(%rsp), %r12
+.LcallUR2:
+ call _Unwind_Resume@PLT
+ hlt
+.LENDCODE2:
+ cfi_endproc
+ .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2
+#endif
+
+
.section .gcc_except_table,"a",@progbits
.LexceptSTART:
.byte DW_EH_PE_omit # @LPStart format
@@ -286,13 +360,19 @@ sem_timedwait_cleanup:
#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
.uleb128 .LcleanupSTART2-.LSTARTCODE
.uleb128 .LcleanupEND2-.LcleanupSTART2
- .uleb128 sem_timedwait_cleanup-.LSTARTCODE
+ .uleb128 sem_timedwait_cleanup2-.LSTARTCODE
.uleb128 0
#endif
.uleb128 .LcallUR-.LSTARTCODE
.uleb128 .LENDCODE-.LcallUR
.uleb128 0
.uleb128 0
+#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
+ .uleb128 .LcallUR2-.LSTARTCODE
+ .uleb128 .LENDCODE2-.LcallUR2
+ .uleb128 0
+ .uleb128 0
+#endif
.Lcstend: