From 421665c40ae002f74130eb4e0b19cb22d97860cf Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 4 Aug 2009 11:03:46 -0700 Subject: Optimize x86-64 syscall cancellation handling. The syscall wrappers had to save and restore the syscall parameter values and return value when calling the functions to enable/disable cancellation were called. Not anymore. The called functions are special and don't modify any unexpected registers. --- nptl/ChangeLog | 12 +++ nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S | 114 +++++++++++++++++++++ .../unix/sysv/linux/x86_64/libc-cancellation.S | 22 ++++ .../unix/sysv/linux/x86_64/librt-cancellation.S | 22 ++++ .../sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h | 55 ++-------- nptl/sysdeps/x86_64/tcb-offsets.sym | 10 ++ 6 files changed, 191 insertions(+), 44 deletions(-) create mode 100644 nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S create mode 100644 nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S create mode 100644 nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 3ebe841452..3a74b5fecd 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,15 @@ +2009-08-04 Ulrich Drepper + + * sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file. + * sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S: New file. + * sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S: New file. + * sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h (PSEUDO): Optimize + since we can assume the special __*_{en,dis}able_asynccancel + functions. + (PUSHARGS_*, POPARGS_*, SAVESTK_*, RESTSTK_*): Removed. + * sysdeps/x86_64/tcb-offsets.sym: Add cancellation-related bits + and PTHREAD_CANCELED. + 2009-07-31 Ulrich Drepper * descr.h: Better definition of *_BITMASK macros for cancellation. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S new file mode 100644 index 0000000000..a51df3eacf --- /dev/null +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S @@ -0,0 +1,114 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2009. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include + +#ifdef IS_IN_libpthread +# ifdef SHARED +# define __pthread_unwind __GI___pthread_unwind +# endif +#else +# ifndef SHARED + .weak __pthread_unwind +# endif +#endif + + +#ifdef __ASSUME_PRIVATE_FUTEX +# define LOAD_PRIVATE_FUTEX_WAIT(reg) \ + movl $(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg +#else +# if FUTEX_WAIT == 0 +# define LOAD_PRIVATE_FUTEX_WAIT(reg) \ + movl %fs:PRIVATE_FUTEX, reg +# else +# define LOAD_PRIVATE_FUTEX_WAIT(reg) \ + movl %fs:PRIVATE_FUTEX, reg ; \ + orl $FUTEX_WAIT, reg +# endif +#endif + +/* It is crucial that the functions in this file don't modify registers + other than %rax and %r11. The syscall wrapper code depends on this + because it doesn't explicitly save the other registers which hold + relevant values. */ + .text + + .hidden __pthread_enable_asynccancel +ENTRY(__pthread_enable_asynccancel) + movl %fs:CANCELHANDLING, %eax +2: movl %eax, %r11d + orl $TCB_CANCELTYPE_BITMASK, %r11d + cmpl %eax, %r11d + je 1f + + lock + cmpxchgl %r11d, %fs:CANCELHANDLING + jnz 2b + + andl $(TCB_CANCELSTATE_BITMASK|TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK|TCB_EXITING_BITMASK|TCB_CANCEL_RESTMASK|TCB_TERMINATED_BITMASK), %r11d + cmpl $(TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK), %r11d + je 3f + +1: ret + +3: movq $TCB_PTHREAD_CANCELED, %fs:RESULT + lock + orl $TCB_EXITING_BITMASK, %fs:CANCELHANDLING + movq %fs:CLEANUP_JMP_BUF, %rdi +#ifdef SHARED + call __pthread_unwind@PLT +#else + call __pthread_unwind +#endif + hlt +END(__pthread_enable_asynccancel) + + + .hidden __pthread_disable_asynccancel +ENTRY(__pthread_disable_asynccancel) + testl $TCB_CANCELTYPE_BITMASK, %edi + jnz 1f + + movl %fs:CANCELHANDLING, %eax +2: movl %eax, %r11d + andl $~TCB_CANCELTYPE_BITMASK, %r11d + lock + cmpxchgl %r11d, %fs:CANCELHANDLING + jnz 2b + +3: movl %r11d, %eax + andl $(TCB_CANCELING_BITMASK|TCB_CANCELED_BITMASK), %eax + cmpl $TCB_CANCELING_BITMASK, %eax + je 4f +1: ret + + /* Performance doesn't matter in this loop. We will + delay until the thread is canceled. And we will unlikely + enter the loop twice. */ +4: movq %fs:0, %rdi + movl $__NR_futex, %eax + xorq %r10, %r10 + addq $CANCELHANDLING, %rdi + LOAD_PRIVATE_FUTEX_WAIT (%esi) + syscall + jmp 3b +END(__pthread_disable_asynccancel) diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S new file mode 100644 index 0000000000..1100588502 --- /dev/null +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S @@ -0,0 +1,22 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2009. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#define __pthread_enable_asynccancel __libc_enable_asynccancel +#define __pthread_disable_asynccancel __libc_disable_asynccancel +#include "cancellation.S" diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S new file mode 100644 index 0000000000..ce4192b5d3 --- /dev/null +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S @@ -0,0 +1,22 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2009. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#define __pthread_enable_asynccancel __librt_enable_asynccancel +#define __pthread_disable_asynccancel __librt_disable_asynccancel +#include "cancellation.S" diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h b/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h index 3e741da794..1e92de1dcc 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sysdep-cancel.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +/* Copyright (C) 2002-2006, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek , 2002. @@ -25,6 +25,10 @@ #if !defined NOT_IN_libc || defined IS_IN_libpthread || defined IS_IN_librt +/* The code to disable cancellation depends on the fact that the called + functions are special. They don't modify registers other than %rax + and %r11 if they return. Therefore we don't have to preserve other + registers around these calls. */ # undef PSEUDO # define PSEUDO(name, syscall_name, args) \ .text; \ @@ -40,60 +44,23 @@ ret; \ .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \ L(pseudo_cancel): \ - /* Save registers that might get destroyed. */ \ - SAVESTK_##args \ - PUSHARGS_##args \ + /* We always have to align the stack before calling a function. */ \ + subq $8, %rsp; cfi_adjust_cfa_offset (8); \ CENABLE \ - /* Restore registers. */ \ - POPARGS_##args \ /* The return value from CENABLE is argument for CDISABLE. */ \ movq %rax, (%rsp); \ - movl $SYS_ify (syscall_name), %eax; \ - syscall; \ + DO_CALL (syscall_name, args); \ movq (%rsp), %rdi; \ /* Save %rax since it's the error code from the syscall. */ \ - movq %rax, 8(%rsp); \ + movq %rax, %rdx; \ CDISABLE \ - movq 8(%rsp), %rax; \ - RESTSTK_##args \ + movq %rdx, %rax; \ + addq $8,%rsp; cfi_adjust_cfa_offset (-8); \ cmpq $-4095, %rax; \ jae SYSCALL_ERROR_LABEL; \ L(pseudo_end): -# define PUSHARGS_0 /* Nothing. */ -# define PUSHARGS_1 PUSHARGS_0 movq %rdi, 8(%rsp); -# define PUSHARGS_2 PUSHARGS_1 movq %rsi, 16(%rsp); -# define PUSHARGS_3 PUSHARGS_2 movq %rdx, 24(%rsp); -# define PUSHARGS_4 PUSHARGS_3 movq %rcx, 32(%rsp); -# define PUSHARGS_5 PUSHARGS_4 movq %r8, 40(%rsp); -# define PUSHARGS_6 PUSHARGS_5 movq %r9, 48(%rsp); - -# define POPARGS_0 /* Nothing. */ -# define POPARGS_1 POPARGS_0 movq 8(%rsp), %rdi; -# define POPARGS_2 POPARGS_1 movq 16(%rsp), %rsi; -# define POPARGS_3 POPARGS_2 movq 24(%rsp), %rdx; -# define POPARGS_4 POPARGS_3 movq 32(%rsp), %r10; -# define POPARGS_5 POPARGS_4 movq 40(%rsp), %r8; -# define POPARGS_6 POPARGS_5 movq 48(%rsp), %r9; - -/* We always have to align the stack before calling a function. */ -# define SAVESTK_0 subq $24, %rsp; cfi_adjust_cfa_offset (24); -# define SAVESTK_1 SAVESTK_0 -# define SAVESTK_2 SAVESTK_1 -# define SAVESTK_3 subq $40, %rsp; cfi_adjust_cfa_offset (40); -# define SAVESTK_4 SAVESTK_3 -# define SAVESTK_5 subq $56, %rsp; cfi_adjust_cfa_offset (56); -# define SAVESTK_6 SAVESTK_5 - -# define RESTSTK_0 addq $24,%rsp; cfi_adjust_cfa_offset (-24); -# define RESTSTK_1 RESTSTK_0 -# define RESTSTK_2 RESTSTK_1 -# define RESTSTK_3 addq $40, %rsp; cfi_adjust_cfa_offset (-40); -# define RESTSTK_4 RESTSTK_3 -# define RESTSTK_5 addq $56, %rsp; cfi_adjust_cfa_offset (-56); -# define RESTSTK_6 RESTSTK_5 - # ifdef IS_IN_libpthread # define CENABLE call __pthread_enable_asynccancel; # define CDISABLE call __pthread_disable_asynccancel; diff --git a/nptl/sysdeps/x86_64/tcb-offsets.sym b/nptl/sysdeps/x86_64/tcb-offsets.sym index 51f35c61cf..cf863752ee 100644 --- a/nptl/sysdeps/x86_64/tcb-offsets.sym +++ b/nptl/sysdeps/x86_64/tcb-offsets.sym @@ -16,3 +16,13 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache) PRIVATE_FUTEX offsetof (tcbhead_t, private_futex) #endif RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse) + +-- Not strictly offsets, but these values are also used in the TCB. +TCB_CANCELSTATE_BITMASK CANCELSTATE_BITMASK +TCB_CANCELTYPE_BITMASK CANCELTYPE_BITMASK +TCB_CANCELING_BITMASK CANCELING_BITMASK +TCB_CANCELED_BITMASK CANCELED_BITMASK +TCB_EXITING_BITMASK EXITING_BITMASK +TCB_CANCEL_RESTMASK CANCEL_RESTMASK +TCB_TERMINATED_BITMASK TERMINATED_BITMASK +TCB_PTHREAD_CANCELED PTHREAD_CANCELED -- cgit v1.2.3 From 9083bcc5dc747eeec24c2bb8bf5b7055b3bd4dbf Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 7 Aug 2009 20:36:53 -0700 Subject: Small optimizations to cancellable x86-64 code. --- nptl/ChangeLog | 9 ++++ nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S | 1 + .../sysv/linux/x86_64/pthread_cond_timedwait.S | 2 - .../unix/sysv/linux/x86_64/pthread_cond_wait.S | 40 ++++++--------- nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S | 58 +++++++++------------- 5 files changed, 49 insertions(+), 61 deletions(-) (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 3a74b5fecd..0f5c231765 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,12 @@ +2009-08-07 Ulrich Drepper + + * sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations + enabled by the special *_asynccancel functions. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise. + + * sysdeps/unix/sysv/linux/x86_64/cancellation.S: Include lowlevellock.h. + 2009-08-04 Ulrich Drepper * sysdeps/unix/sysv/linux/x86_64/cancellation.S: New file. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S index a51df3eacf..0d48ec6fcd 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S @@ -20,6 +20,7 @@ #include #include #include +#include "lowlevellock.h" #ifdef IS_IN_libpthread # ifdef SHARED diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S index 4913beb8af..86bdac1b1b 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S @@ -157,7 +157,6 @@ __pthread_cond_timedwait: .LcleanupSTART1: 34: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi movq %r13, %r10 movl $FUTEX_WAIT_BITSET, %esi @@ -511,7 +510,6 @@ __pthread_cond_timedwait: .LcleanupSTART2: 4: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi leaq 32(%rsp), %r10 cmpq $-1, dep_mutex(%rdi) diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S index a66523eab6..45116b1ab0 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S @@ -45,14 +45,11 @@ __pthread_cond_wait: cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) #endif - pushq %r12 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) pushq %r13 cfi_adjust_cfa_offset(8) cfi_rel_offset(%r13, 0) #define FRAME_SIZE 32 - subq $FRAME_SIZE, %rsp + leaq -FRAME_SIZE(%rsp), %rsp cfi_adjust_cfa_offset(FRAME_SIZE) /* Stack frame: @@ -112,7 +109,7 @@ __pthread_cond_wait: movl %edx, 4(%rsp) /* Unlock. */ -8: movl cond_futex(%rdi), %r12d +8: movl cond_futex(%rdi), %edx LOCK #if cond_lock == 0 decl (%rdi) @@ -125,9 +122,7 @@ __pthread_cond_wait: 4: callq __pthread_enable_asynccancel movl %eax, (%rsp) - movq 8(%rsp), %rdi xorq %r10, %r10 - movq %r12, %rdx cmpq $-1, dep_mutex(%rdi) leaq cond_futex(%rdi), %rdi movl $FUTEX_WAIT, %esi @@ -243,21 +238,14 @@ __pthread_cond_wait: callq __pthread_mutex_cond_lock -14: addq $FRAME_SIZE, %rsp - cfi_adjust_cfa_offset(-FRAME_SIZE) - - popq %r13 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) +14: movq FRAME_SIZE(%rsp), %r13 + leaq FRAME_SIZE+8(%rsp), %rsp + cfi_adjust_cfa_offset(-(FRAME_SIZE + 8)) /* We return the result of the mutex_lock operation. */ retq - cfi_adjust_cfa_offset(16 + FRAME_SIZE) - cfi_rel_offset(%r12, FRAME_SIZE + 8) + cfi_adjust_cfa_offset(8 + FRAME_SIZE) cfi_rel_offset(%r13, FRAME_SIZE) 18: callq __pthread_mutex_cond_lock_adjust @@ -285,7 +273,11 @@ __pthread_cond_wait: movl $LLL_PRIVATE, %eax movl $LLL_SHARED, %esi cmovne %eax, %esi + /* The call preserves %rdx. */ callq __lll_unlock_wake +#if cond_lock != 0 + subq $cond_lock, %rdi +#endif jmp 4b /* Locking in loop failed. */ @@ -349,9 +341,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, __condvar_cleanup1: /* Stack frame: - rsp + 48 - +--------------------------+ - rsp + 40 | %r12 | + rsp + 40 +--------------------------+ rsp + 32 | %r13 | +--------------------------+ @@ -410,7 +400,7 @@ __condvar_cleanup1: 3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) /* Wake up a thread which wants to destroy the condvar object. */ - xorq %r12, %r12 + xorl %ecx, %ecx cmpq $0xffffffffffffffff, total_seq(%rdi) jne 4f movl cond_nwaiters(%rdi), %eax @@ -433,7 +423,7 @@ __condvar_cleanup1: movl $SYS_futex, %eax syscall subq $cond_nwaiters, %rdi - movl $1, %r12d + movl $1, %ecx 4: LOCK #if cond_lock == 0 @@ -449,10 +439,11 @@ __condvar_cleanup1: movl $LLL_PRIVATE, %eax movl $LLL_SHARED, %esi cmovne %eax, %esi + /* The call preserves %rcx. */ callq __lll_unlock_wake /* Wake up all waiters to make sure no signal gets lost. */ -2: testq %r12, %r12 +2: testl %ecx, %ecx jnz 5f addq $cond_futex, %rdi cmpq $-1, dep_mutex-cond_futex(%rdi) @@ -474,7 +465,6 @@ __condvar_cleanup1: callq __pthread_mutex_cond_lock movq 24(%rsp), %rdi - movq 40(%rsp), %r12 movq 32(%rsp), %r13 .LcallUR: call _Unwind_Resume@PLT diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S index a01d745a17..2cf6ec10a4 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S @@ -61,16 +61,13 @@ sem_wait: xorl %eax, %eax retq -1: pushq %r12 + /* This push is only needed to store the sem_t pointer for the + exception handler. */ +1: pushq %rdi cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) - pushq %r13 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r13, 0) - movq %rdi, %r13 LOCK - addq $1, NWAITERS(%r13) + addq $1, NWAITERS(%rdi) .LcleanupSTART: 6: call __pthread_enable_asynccancel @@ -78,7 +75,6 @@ sem_wait: xorq %r10, %r10 movl $SYS_futex, %eax - movq %r13, %rdi #if FUTEX_WAIT == 0 movl PRIVATE(%rdi), %esi #else @@ -87,22 +83,23 @@ sem_wait: #endif xorl %edx, %edx syscall - movq %rax, %r12 + movq %rax, %rcx - movl %r8d, %edi + xchgq %r8, %rdi call __pthread_disable_asynccancel .LcleanupEND: + movq %r8, %rdi - testq %r12, %r12 + testq %rcx, %rcx je 3f - cmpq $-EWOULDBLOCK, %r12 + cmpq $-EWOULDBLOCK, %rcx jne 4f 3: #if VALUE == 0 - movl (%r13), %eax + movl (%rdi), %eax #else - movl VALUE(%r13), %eax + movl VALUE(%rdi), %eax #endif 5: testl %eax, %eax je 6b @@ -110,50 +107,43 @@ sem_wait: leal -1(%rax), %edx LOCK #if VALUE == 0 - cmpxchgl %edx, (%r13) + cmpxchgl %edx, (%rdi) #else - cmpxchgl %edx, VALUE(%r13) + cmpxchgl %edx, VALUE(%rdi) #endif jne 5b - LOCK - subq $1, NWAITERS(%r13) - xorl %eax, %eax -9: popq %r13 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 +9: LOCK + subq $1, NWAITERS(%rdi) + + leaq 8(%rsp), %rsp cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) retq - cfi_adjust_cfa_offset(2 * 8) - cfi_rel_offset(%r12, 8) - cfi_rel_offset(%r13, 0) -4: negq %r12 + cfi_adjust_cfa_offset(8) +4: negq %rcx #if USE___THREAD movq errno@gottpoff(%rip), %rdx - movl %r12d, %fs:(%rdx) + movl %ecx, %fs:(%rdx) #else +# error "not supported. %rcx and %rdi must be preserved" callq __errno_location@plt - movl %r12d, (%rax) + movl %ecx, (%rax) #endif orl $-1, %eax - LOCK - subq $1, NWAITERS(%r13) - jmp 9b .size sem_wait,.-sem_wait .type sem_wait_cleanup,@function sem_wait_cleanup: + movq (%rsp), %rdi LOCK - subq $1, NWAITERS(%r13) + subq $1, NWAITERS(%rdi) movq %rax, %rdi .LcallUR: call _Unwind_Resume@PLT -- cgit v1.2.3 From 49eea97b00d367aa3eac3dfd047c259e7bd04732 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 8 Aug 2009 10:21:46 -0700 Subject: Another minor optimization of x86-64 pthread_cond_wait. --- nptl/ChangeLog | 6 ++++++ .../unix/sysv/linux/x86_64/pthread_cond_wait.S | 22 +++++++--------------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 0f5c231765..48fcc0fe09 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,3 +1,9 @@ +2009-08-08 Ulrich Drepper + + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S + (__pthread_cond_wait): Optimize by avoiding use of callee-safe + register. + 2009-08-07 Ulrich Drepper * sysdeps/unix/sysv/linux/x86_64/sem_wait.S: Little optimizations diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S index 45116b1ab0..f5b929ea71 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S @@ -45,9 +45,6 @@ __pthread_cond_wait: cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) #endif - pushq %r13 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r13, 0) #define FRAME_SIZE 32 leaq -FRAME_SIZE(%rsp), %rsp cfi_adjust_cfa_offset(FRAME_SIZE) @@ -140,7 +137,7 @@ __pthread_cond_wait: movl $SYS_futex, %eax syscall - movl $1, %r13d + movl $1, %r8d #ifdef __ASSUME_REQUEUE_PI jmp 62f #else @@ -158,7 +155,7 @@ __pthread_cond_wait: #else orl %fs:PRIVATE_FUTEX, %esi #endif -60: xorl %r13d, %r13d +60: xorl %r8d, %r8d movl $SYS_futex, %eax syscall @@ -233,20 +230,18 @@ __pthread_cond_wait: /* If requeue_pi is used the kernel performs the locking of the mutex. */ 11: movq 16(%rsp), %rdi - testl %r13d, %r13d + testl %r8d, %r8d jnz 18f callq __pthread_mutex_cond_lock -14: movq FRAME_SIZE(%rsp), %r13 - leaq FRAME_SIZE+8(%rsp), %rsp - cfi_adjust_cfa_offset(-(FRAME_SIZE + 8)) +14: leaq FRAME_SIZE(%rsp), %rsp + cfi_adjust_cfa_offset(-FRAME_SIZE) /* We return the result of the mutex_lock operation. */ retq - cfi_adjust_cfa_offset(8 + FRAME_SIZE) - cfi_rel_offset(%r13, FRAME_SIZE) + cfi_adjust_cfa_offset(FRAME_SIZE) 18: callq __pthread_mutex_cond_lock_adjust xorl %eax, %eax @@ -341,9 +336,7 @@ versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, __condvar_cleanup1: /* Stack frame: - rsp + 40 - +--------------------------+ - rsp + 32 | %r13 | + rsp + 32 +--------------------------+ rsp + 24 | unused | +--------------------------+ @@ -465,7 +458,6 @@ __condvar_cleanup1: callq __pthread_mutex_cond_lock movq 24(%rsp), %rdi - movq 32(%rsp), %r13 .LcallUR: call _Unwind_Resume@PLT hlt -- cgit v1.2.3 From efa0569d2bfdbb7367fce42b1c99821b85d2d3ba Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 8 Aug 2009 17:48:09 -0700 Subject: Optimize x86-64 version of sem_timedwait. --- nptl/ChangeLog | 3 + .../sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S | 210 ++++++++++++++------- 2 files changed, 148 insertions(+), 65 deletions(-) (limited to 'nptl') diff --git a/nptl/ChangeLog b/nptl/ChangeLog index 48fcc0fe09..5be464e2f2 100644 --- a/nptl/ChangeLog +++ b/nptl/ChangeLog @@ -1,5 +1,8 @@ 2009-08-08 Ulrich Drepper + * sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S (sem_timedwait): + Optimize code path used when FUTEX_CLOCK_REALTIME is supported. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S (__pthread_cond_wait): Optimize by avoiding use of callee-safe register. diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S index 95762834d3..0291beb169 100644 --- a/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/sem_timedwait.S @@ -65,34 +65,9 @@ sem_timedwait: retq /* Check whether the timeout value is valid. */ -1: pushq %r12 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r12, 0) - pushq %r13 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r13, 0) - pushq %r14 - cfi_adjust_cfa_offset(8) - cfi_rel_offset(%r14, 0) -#ifdef __ASSUME_FUTEX_CLOCK_REALTIME -# define STACKFRAME 8 -#else -# define STACKFRAME 24 -#endif - subq $STACKFRAME, %rsp - cfi_adjust_cfa_offset(STACKFRAME) - - movq %rdi, %r12 - movq %rsi, %r13 - - /* Check for invalid nanosecond field. */ - cmpq $1000000000, 8(%r13) - movl $EINVAL, %r14d +1: cmpq $1000000000, 8(%rsi) jae 6f - LOCK - addq $1, NWAITERS(%r12) - #ifndef __ASSUME_FUTEX_CLOCK_REALTIME # ifdef PIC cmpl $0, __have_futex_clock_realtime(%rip) @@ -102,15 +77,22 @@ sem_timedwait: je .Lreltmo #endif + /* This push is only needed to store the sem_t pointer for the + exception handler. */ + pushq %rdi + cfi_adjust_cfa_offset(8) + + movq %rsi, %r10 + + LOCK + addq $1, NWAITERS(%rdi) + .LcleanupSTART: 13: call __pthread_enable_asynccancel - movl %eax, (%rsp) + movl %eax, %r8d - movq %r13, %r10 -#if VALUE == 0 - movq %r12, %rdi -#else - leaq VALUE(%r12), %rdi +#if VALUE != 0 + leaq VALUE(%rdi), %rdi #endif movl $0xffffffff, %r9d movl $FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, %esi @@ -118,22 +100,26 @@ sem_timedwait: movl $SYS_futex, %eax xorl %edx, %edx syscall - movq %rax, %r14 + movq %rax, %r9 +#if VALUE != 0 + leaq -VALUE(%rdi), %rdi +#endif - movl (%rsp), %edi + xchgq %r8, %rdi call __pthread_disable_asynccancel .LcleanupEND: + movq %r8, %rdi - testq %r14, %r14 + testq %r9, %r9 je 11f - cmpq $-EWOULDBLOCK, %r14 + cmpq $-EWOULDBLOCK, %r9 jne 3f 11: #if VALUE == 0 - movl (%r12), %eax + movl (%rdi), %eax #else - movl VALUE(%r12), %eax + movl VALUE(%rdi), %eax #endif 14: testl %eax, %eax je 13b @@ -141,49 +127,74 @@ sem_timedwait: leaq -1(%rax), %rcx LOCK #if VALUE == 0 - cmpxchgl %ecx, (%r12) + cmpxchgl %ecx, (%rdi) #else - cmpxchgl %ecx, VALUE(%r12) + cmpxchgl %ecx, VALUE(%rdi) #endif jne 14b -10: xorl %eax, %eax + xorl %eax, %eax 15: LOCK - subq $1, NWAITERS(%r12) + subq $1, NWAITERS(%rdi) - addq $STACKFRAME, %rsp - cfi_adjust_cfa_offset(-STACKFRAME) - popq %r14 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r14) - popq %r13 + leaq 8(%rsp), %rsp cfi_adjust_cfa_offset(-8) - cfi_restore(%r13) - popq %r12 - cfi_adjust_cfa_offset(-8) - cfi_restore(%r12) retq - cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) - cfi_rel_offset(%r12, STACKFRAME + 2 * 8) - cfi_rel_offset(%r13, STACKFRAME + 1 * 8) - cfi_rel_offset(%r14, STACKFRAME) -3: negq %r14 -6: + cfi_adjust_cfa_offset(8) +3: negq %r9 #if USE___THREAD movq errno@gottpoff(%rip), %rdx - movl %r14d, %fs:(%rdx) + movl %r9d, %fs:(%rdx) #else callq __errno_location@plt - movl %r14d, (%rax) + movl %r9d, (%rax) #endif orl $-1, %eax jmp 15b + cfi_adjust_cfa_offset(-8) +6: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl $EINVAL, %fs:(%rdx) +#else + callq __errno_location@plt + movl $EINVAL, (%rax) +#endif + + orl $-1, %eax + + retq + #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .Lreltmo: + pushq %r12 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r12, 0) + pushq %r13 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r13, 0) + pushq %r14 + cfi_adjust_cfa_offset(8) + cfi_rel_offset(%r14, 0) + +#ifdef __ASSUME_FUTEX_CLOCK_REALTIME +# define STACKFRAME 8 +#else +# define STACKFRAME 24 +#endif + subq $STACKFRAME, %rsp + cfi_adjust_cfa_offset(STACKFRAME) + + movq %rdi, %r12 + movq %rsi, %r13 + + LOCK + addq $1, NWAITERS(%r12) + 7: xorl %esi, %esi movq %rsp, %rdi movq $VSYSCALL_ADDR_vgettimeofday, %rax @@ -202,7 +213,7 @@ sem_timedwait: decq %rdi 5: testq %rdi, %rdi movl $ETIMEDOUT, %r14d - js 6b /* Time is already up. */ + js 36f /* Time is already up. */ movq %rdi, (%rsp) /* Store relative timeout. */ movq %rsi, 8(%rsp) @@ -235,7 +246,7 @@ sem_timedwait: testq %r14, %r14 je 9f cmpq $-EWOULDBLOCK, %r14 - jne 3b + jne 33f 9: # if VALUE == 0 @@ -254,15 +265,54 @@ sem_timedwait: cmpxchgl %ecx, VALUE(%r12) # endif jne 8b - jmp 10b + + xorl %eax, %eax + +45: LOCK + subq $1, NWAITERS(%r12) + + addq $STACKFRAME, %rsp + cfi_adjust_cfa_offset(-STACKFRAME) + popq %r14 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r14) + popq %r13 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r13) + popq %r12 + cfi_adjust_cfa_offset(-8) + cfi_restore(%r12) + retq + + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) +33: negq %r14 +36: +#if USE___THREAD + movq errno@gottpoff(%rip), %rdx + movl %r14d, %fs:(%rdx) +#else + callq __errno_location@plt + movl %r14d, (%rax) #endif + + orl $-1, %eax + jmp 45b +#endif + cfi_endproc .size sem_timedwait,.-sem_timedwait .type sem_timedwait_cleanup,@function sem_timedwait_cleanup: + cfi_startproc + cfi_adjust_cfa_offset(8) + + movq (%rsp), %rdi LOCK - subq $1, NWAITERS(%r12) + subq $1, NWAITERS(%rdi) movq %rax, %rdi .LcallUR: call _Unwind_Resume@PLT @@ -272,6 +322,30 @@ sem_timedwait_cleanup: .size sem_timedwait_cleanup,.-sem_timedwait_cleanup +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .type sem_timedwait_cleanup2,@function +sem_timedwait_cleanup2: + cfi_startproc + cfi_adjust_cfa_offset(STACKFRAME + 3 * 8) + cfi_rel_offset(%r12, STACKFRAME + 2 * 8) + cfi_rel_offset(%r13, STACKFRAME + 1 * 8) + cfi_rel_offset(%r14, STACKFRAME) + + LOCK + subq $1, NWAITERS(%r12) + movq %rax, %rdi + movq STACKFRAME(%rsp), %r14 + movq STACKFRAME+8(%rsp), %r13 + movq STACKFRAME+16(%rsp), %r12 +.LcallUR2: + call _Unwind_Resume@PLT + hlt +.LENDCODE2: + cfi_endproc + .size sem_timedwait_cleanup2,.-sem_timedwait_cleanup2 +#endif + + .section .gcc_except_table,"a",@progbits .LexceptSTART: .byte DW_EH_PE_omit # @LPStart format @@ -286,13 +360,19 @@ sem_timedwait_cleanup: #ifndef __ASSUME_FUTEX_CLOCK_REALTIME .uleb128 .LcleanupSTART2-.LSTARTCODE .uleb128 .LcleanupEND2-.LcleanupSTART2 - .uleb128 sem_timedwait_cleanup-.LSTARTCODE + .uleb128 sem_timedwait_cleanup2-.LSTARTCODE .uleb128 0 #endif .uleb128 .LcallUR-.LSTARTCODE .uleb128 .LENDCODE-.LcallUR .uleb128 0 .uleb128 0 +#ifndef __ASSUME_FUTEX_CLOCK_REALTIME + .uleb128 .LcallUR2-.LSTARTCODE + .uleb128 .LENDCODE2-.LcallUR2 + .uleb128 0 + .uleb128 0 +#endif .Lcstend: -- cgit v1.2.3