summaryrefslogtreecommitdiff
path: root/nptl
diff options
context:
space:
mode:
authorAndreas Schwab <schwab@redhat.com>2009-07-30 14:18:37 +0200
committerAndreas Schwab <schwab@redhat.com>2009-07-30 14:18:37 +0200
commitb870de510d54108c7c839abc17ea1559085e55a3 (patch)
treed59aca63c9713ac51b929e388187f6ec0bb1273e /nptl
parentca2a37b64e0347b400e58da9ca238c9320a55edb (diff)
parent78c4ef475d47a2289635f74b726f52defedb4651 (diff)
Merge commit 'origin/master' into fedora/master
Diffstat (limited to 'nptl')
-rw-r--r--nptl/ChangeLog29
-rw-r--r--nptl/pthreadP.h2
-rw-r--r--nptl/pthread_mutex_lock.c21
-rw-r--r--nptl/pthread_mutex_unlock.c7
-rw-r--r--nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym1
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S6
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S6
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S24
-rw-r--r--nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S23
-rw-r--r--nptl/sysdeps/x86_64/tcb-offsets.sym1
-rw-r--r--nptl/sysdeps/x86_64/tls.h80
11 files changed, 157 insertions, 43 deletions
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index e5fc474916..0046b20608 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,3 +1,32 @@
+2009-07-29 Ulrich Drepper <drepper@redhat.com>
+
+ * sysdeps/x86_64/tls.h (TLS_TCB_ALIGN): Define explicitly to 32.
+
+ * sysdeps/x86_64/tls.h (tcbhead_t): Add room for SSE registers the
+ dynamic linker might have to save.
+ Define RTLD_CHECK_FOREIGN_CALL, RTLD_ENABLE_FOREIGN_CALL,
+ RTLD_PREPARE_FOREIGN_CALL, and RTLD_FINALIZE_FOREIGN_CALL. Pretty
+ printing.
+
+ * sysdeps/x86_64/tcb-offsets.sym: Add RTLD_SAVESPACE_SSE.
+
+2009-07-28 Ulrich Drepper <drepper@redhat.com>
+
+ * pthread_mutex_lock.c [NO_INCR] (__pthread_mutex_cond_lock_adjust):
+ New function.
+ * pthreadP.h: Declare __pthread_mutex_cond_lock_adjust.
+ * sysdeps/unix/sysv/linux/pthread-pi-defines.sym: Add ROBUST_BIT.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Don't use
+ requeue_pi for robust mutexes.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise.
+ Don't only skip __pthread_mutex_cond_lock. Call instead
+ __pthread_mutex_cond_lock_adjust.
+ * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise.
+
+ * pthread_mutex_unlock.c (__pthread_mutex_unlock_full): Minor
+ optimization of PI mutex handling.
+
2009-07-27 Ulrich Drepper <drepper@redhat.com>
[BZ #10418]
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index ed9fc625ba..43ca44c829 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -418,6 +418,8 @@ extern int __pthread_mutex_lock_internal (pthread_mutex_t *__mutex)
attribute_hidden;
extern int __pthread_mutex_cond_lock (pthread_mutex_t *__mutex)
attribute_hidden internal_function;
+extern void __pthread_mutex_cond_lock_adjust (pthread_mutex_t *__mutex)
+ attribute_hidden internal_function;
extern int __pthread_mutex_unlock (pthread_mutex_t *__mutex);
extern int __pthread_mutex_unlock_internal (pthread_mutex_t *__mutex)
attribute_hidden;
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 406e588fdb..50dc18803d 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -473,3 +473,22 @@ __pthread_mutex_lock_full (pthread_mutex_t *mutex)
strong_alias (__pthread_mutex_lock, pthread_mutex_lock)
strong_alias (__pthread_mutex_lock, __pthread_mutex_lock_internal)
#endif
+
+
+#ifdef NO_INCR
+void
+__pthread_mutex_cond_lock_adjust (mutex)
+ pthread_mutex_t *mutex;
+{
+ assert ((mutex->__data.__kind & PTHREAD_MUTEX_PRIO_INHERIT_NP) != 0);
+ assert ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) == 0);
+ assert ((mutex->__data.__kind & PTHREAD_MUTEX_PSHARED_BIT) == 0);
+
+ /* Record the ownership. */
+ pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
+ mutex->__data.__owner = id;
+
+ if (mutex->__data.__kind == PTHREAD_MUTEX_PI_RECURSIVE_NP)
+ ++mutex->__data.__count;
+}
+#endif
diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
index fbe8274a55..f9fe10b0f2 100644
--- a/nptl/pthread_mutex_unlock.c
+++ b/nptl/pthread_mutex_unlock.c
@@ -150,7 +150,7 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
if (--mutex->__data.__count != 0)
/* We still hold the mutex. */
return 0;
- goto continue_pi;
+ goto continue_pi_non_robust;
case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
/* Recursive mutex. */
@@ -173,7 +173,7 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
/* We still hold the mutex. */
return 0;
- goto continue_pi;
+ goto continue_pi_robust;
case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
case PTHREAD_MUTEX_PI_NORMAL_NP:
@@ -195,9 +195,9 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
pi_notrecoverable:
newowner = PTHREAD_MUTEX_NOTRECOVERABLE;
- continue_pi:
if ((mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP) != 0)
{
+ continue_pi_robust:
/* Remove mutex from the list.
Note: robust PI futexes are signaled by setting bit 0. */
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
@@ -206,6 +206,7 @@ __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
DEQUEUE_MUTEX (mutex);
}
+ continue_pi_non_robust:
mutex->__data.__owner = newowner;
if (decr)
/* One less user. */
diff --git a/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym b/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
index d985c6a79b..46fbd0de74 100644
--- a/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
+++ b/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
@@ -3,5 +3,6 @@
-- These PI macros are used by assembly code.
MUTEX_KIND offsetof (pthread_mutex_t, __data.__kind)
+ROBUST_BIT PTHREAD_MUTEX_ROBUST_NORMAL_NP
PI_BIT PTHREAD_MUTEX_PRIO_INHERIT_NP
PS_BIT PTHREAD_MUTEX_PSHARED_BIT
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
index 0f10ec910c..224a56088e 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S
@@ -75,8 +75,10 @@ __pthread_cond_broadcast:
jne 9f
/* Requeue to a PI mutex if the PI bit is set. */
- testl $PI_BIT, MUTEX_KIND(%r8)
- jne 81f
+ movl MUTEX_KIND(%r8), %eax
+ andl $(ROBUST_BIT|PI_BIT), %eax
+ cmpl $PI_BIT, %eax
+ je 81f
/* Wake up all threads. */
#ifdef __ASSUME_PRIVATE_FUTEX
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
index f1050fea7c..4d001eec7f 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S
@@ -64,8 +64,10 @@ __pthread_cond_signal:
/* Get the address of the mutex used. */
movq dep_mutex(%r8), %rcx
- testl $PI_BIT, MUTEX_KIND(%rcx)
- jne 9f
+ movl MUTEX_KIND(%rcx), %eax
+ andl $(ROBUST_BIT|PI_BIT), %eax
+ cmpl $PI_BIT, %eax
+ je 9f
#ifdef __ASSUME_PRIVATE_FUTEX
movl $(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 7486825d5f..4913beb8af 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -165,9 +165,12 @@ __pthread_cond_timedwait:
je 60f
movq dep_mutex(%rdi), %r8
- /* Requeue to a PI mutex if the PI bit is set. */
- testl $PI_BIT, MUTEX_KIND(%r8)
- je 61f
+ /* Requeue to a non-robust PI mutex if the PI bit is set and
+ the robust bit is not set. */
+ movl MUTEX_KIND(%r8), %eax
+ andl $(ROBUST_BIT|PI_BIT), %eax
+ cmpl $PI_BIT, %eax
+ jne 61f
movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
xorl %eax, %eax
@@ -289,11 +292,10 @@ __pthread_cond_timedwait:
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
-41: xorl %eax, %eax
+41: movq 16(%rsp), %rdi
testl %r15d, %r15d
- jnz 63f
+ jnz 64f
- movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
63: testq %rax, %rax
@@ -316,12 +318,18 @@ __pthread_cond_timedwait:
retq
- /* Initial locking failed. */
-31: cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE)
+ cfi_adjust_cfa_offset(4 * 8 + FRAME_SIZE)
cfi_rel_offset(%r12, FRAME_SIZE + 24)
cfi_rel_offset(%r13, FRAME_SIZE + 16)
cfi_rel_offset(%r14, FRAME_SIZE + 8)
cfi_rel_offset(%r15, FRAME_SIZE)
+
+64: callq __pthread_mutex_cond_lock_adjust
+ movq %r14, %rax
+ jmp 48b
+
+ /* Initial locking failed. */
+31:
#if cond_lock != 0
addq $cond_lock, %rdi
#endif
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index 2fab38e277..a66523eab6 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -134,9 +134,12 @@ __pthread_cond_wait:
je 60f
movq dep_mutex-cond_futex(%rdi), %r8
- /* Requeue to a PI mutex if the PI bit is set. */
- testl $PI_BIT, MUTEX_KIND(%r8)
- je 61f
+ /* Requeue to a non-robust PI mutex if the PI bit is set and
+ the robust bit is not set. */
+ movl MUTEX_KIND(%r8), %eax
+ andl $(ROBUST_BIT|PI_BIT), %eax
+ cmpl $PI_BIT, %eax
+ jne 61f
movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi
movl $SYS_futex, %eax
@@ -234,11 +237,10 @@ __pthread_cond_wait:
/* If requeue_pi is used the kernel performs the locking of the
mutex. */
-11: xorl %eax, %eax
+11: movq 16(%rsp), %rdi
testl %r13d, %r13d
- jnz 14f
+ jnz 18f
- movq 16(%rsp), %rdi
callq __pthread_mutex_cond_lock
14: addq $FRAME_SIZE, %rsp
@@ -254,11 +256,16 @@ __pthread_cond_wait:
/* We return the result of the mutex_lock operation. */
retq
- /* Initial locking failed. */
-1:
cfi_adjust_cfa_offset(16 + FRAME_SIZE)
cfi_rel_offset(%r12, FRAME_SIZE + 8)
cfi_rel_offset(%r13, FRAME_SIZE)
+
+18: callq __pthread_mutex_cond_lock_adjust
+ xorl %eax, %eax
+ jmp 14b
+
+ /* Initial locking failed. */
+1:
#if cond_lock != 0
addq $cond_lock, %rdi
#endif
diff --git a/nptl/sysdeps/x86_64/tcb-offsets.sym b/nptl/sysdeps/x86_64/tcb-offsets.sym
index 1c70c6bde7..51f35c61cf 100644
--- a/nptl/sysdeps/x86_64/tcb-offsets.sym
+++ b/nptl/sysdeps/x86_64/tcb-offsets.sym
@@ -15,3 +15,4 @@ VGETCPU_CACHE_OFFSET offsetof (tcbhead_t, vgetcpu_cache)
#ifndef __ASSUME_PRIVATE_FUTEX
PRIVATE_FUTEX offsetof (tcbhead_t, private_futex)
#endif
+RTLD_SAVESPACE_SSE offsetof (tcbhead_t, rtld_savespace_sse)
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index ea89f3b1a2..4212038ab5 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -29,6 +29,7 @@
# include <sysdep.h>
# include <kernel-features.h>
# include <bits/wordsize.h>
+# include <xmmintrin.h>
/* Type for the dtv. */
@@ -55,16 +56,23 @@ typedef struct
uintptr_t stack_guard;
uintptr_t pointer_guard;
unsigned long int vgetcpu_cache[2];
-#ifndef __ASSUME_PRIVATE_FUTEX
+# ifndef __ASSUME_PRIVATE_FUTEX
int private_futex;
-#else
+# else
int __unused1;
-#endif
-#if __WORDSIZE == 64
- int __pad1;
-#endif
+# endif
+# if __WORDSIZE == 64
+ int rtld_must_xmm_save;
+# endif
/* Reservation of some values for the TM ABI. */
void *__private_tm[5];
+# if __WORDSIZE == 64
+ long int __unused2;
+ /* Have space for the post-AVX register size. */
+ __m128 rtld_savespace_sse[8][4];
+
+ void *__padding[8];
+# endif
} tcbhead_t;
#else /* __ASSEMBLER__ */
@@ -109,7 +117,12 @@ typedef struct
# define TLS_TCB_SIZE sizeof (struct pthread)
/* Alignment requirements for the TCB. */
-# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+//# define TLS_TCB_ALIGN __alignof__ (struct pthread)
+// Normally the above would be correct But we have to store post-AVX
+// vector registers in the TCB and we want the storage to be aligned.
+// unfortunately there isn't yet a type for these values and hence no
+// 32-byte alignment requirement. Make this explicit, for now.
+# define TLS_TCB_ALIGN 32
/* The TCB can have any size and the memory following the address the
thread pointer points to is unspecified. Allocate the TCB there. */
@@ -298,7 +311,7 @@ typedef struct
/* Atomic compare and exchange on TLS, returning old value. */
-#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
({ __typeof (descr->member) __ret; \
__typeof (oldval) __old = (oldval); \
if (sizeof (descr->member) == 4) \
@@ -313,7 +326,7 @@ typedef struct
/* Atomic logical and. */
-#define THREAD_ATOMIC_AND(descr, member, val) \
+# define THREAD_ATOMIC_AND(descr, member, val) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
@@ -324,7 +337,7 @@ typedef struct
/* Atomic set bit. */
-#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
@@ -334,7 +347,7 @@ typedef struct
abort (); })
-#define CALL_THREAD_FCT(descr) \
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
"callq *%%fs:%P1" \
@@ -355,18 +368,18 @@ typedef struct
/* Set the pointer guard field in the TCB head. */
-#define THREAD_SET_POINTER_GUARD(value) \
+# define THREAD_SET_POINTER_GUARD(value) \
THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
-#define THREAD_COPY_POINTER_GUARD(descr) \
+# define THREAD_COPY_POINTER_GUARD(descr) \
((descr)->header.pointer_guard \
= THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
/* Get and set the global scope generation counter in the TCB head. */
-#define THREAD_GSCOPE_FLAG_UNUSED 0
-#define THREAD_GSCOPE_FLAG_USED 1
-#define THREAD_GSCOPE_FLAG_WAIT 2
-#define THREAD_GSCOPE_RESET_FLAG() \
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED 1
+# define THREAD_GSCOPE_FLAG_WAIT 2
+# define THREAD_GSCOPE_RESET_FLAG() \
do \
{ int __res; \
asm volatile ("xchgl %0, %%fs:%P1" \
@@ -377,11 +390,40 @@ typedef struct
lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
} \
while (0)
-#define THREAD_GSCOPE_SET_FLAG() \
+# define THREAD_GSCOPE_SET_FLAG() \
THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
+# define THREAD_GSCOPE_WAIT() \
GL(dl_wait_lookup_done) ()
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S. */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+ (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+# define RTLD_ENABLE_FOREIGN_CALL \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+# define RTLD_PREPARE_FOREIGN_CALL \
+ do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \
+ { \
+ _dl_x86_64_save_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
+ } \
+ while (0)
+
+# define RTLD_FINALIZE_FOREIGN_CALL \
+ do { \
+ if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \
+ _dl_x86_64_restore_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
+ } while (0)
+# endif
+
+
#endif /* __ASSEMBLER__ */
#endif /* tls.h */