summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/dl-tlsdesc.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/dl-tlsdesc.S')
-rw-r--r--sysdeps/x86_64/dl-tlsdesc.S245
1 files changed, 245 insertions, 0 deletions
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
new file mode 100644
index 0000000000..5eac1f2a5b
--- /dev/null
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -0,0 +1,245 @@
+/* Thread-local storage handling in the ELF dynamic linker. x86_64 version.
+ Copyright (C) 2004, 2005, 2008 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+ .text
+
+ /* This function is used to compute the TP offset for symbols in
+ Static TLS, i.e., whose TP offset is the same for all
+ threads.
+
+ The incoming %rax points to the TLS descriptor, such that
+ 0(%rax) points to _dl_tlsdesc_return itself, and 8(%rax) holds
+ the TP offset of the symbol corresponding to the object
+ denoted by the argument. */
+
+ .hidden _dl_tlsdesc_return
+ .global _dl_tlsdesc_return
+ .type _dl_tlsdesc_return,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_return:
+ movq 8(%rax), %rax
+ ret
+ cfi_endproc
+ .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+ /* This function is used for undefined weak TLS symbols, for
+ which the base address (i.e., disregarding any addend) should
+ resolve to NULL.
+
+ %rax points to the TLS descriptor, such that 0(%rax) points to
+ _dl_tlsdesc_undefweak itself, and 8(%rax) holds the addend.
+ We return the addend minus the TP, such that, when the caller
+ adds TP, it gets the addend back. If that's zero, as usual,
+ that's most likely a NULL pointer. */
+
+ .hidden _dl_tlsdesc_undefweak
+ .global _dl_tlsdesc_undefweak
+ .type _dl_tlsdesc_undefweak,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_undefweak:
+ movq 8(%rax), %rax
+ subq %fs:0, %rax
+ ret
+ cfi_endproc
+ .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+ .hidden _dl_tlsdesc_dynamic
+ .global _dl_tlsdesc_dynamic
+ .type _dl_tlsdesc_dynamic,@function
+
+ /* %rax points to the TLS descriptor, such that 0(%rax) points to
+ _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
+ tlsdesc_dynamic_arg object. It must return in %rax the offset
+ between the thread pointer and the object denoted by the
+ argument, without clobbering any registers.
+
+ The assembly code that follows is a rendition of the following
+ C code, hand-optimized a little bit.
+
+ptrdiff_t
+_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
+{
+ struct tlsdesc_dynamic_arg *td = tdp->arg;
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+ if (__builtin_expect (td->gen_count <= dtv[0].counter
+ && (dtv[td->tlsinfo.ti_module].pointer.val
+ != TLS_DTV_UNALLOCATED),
+ 1))
+ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+ - __thread_pointer;
+
+ return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
+}
+*/
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_dynamic:
+ /* Preserve call-clobbered registers that we modify.
+ We need two scratch regs anyway. */
+ movq %rsi, -16(%rsp)
+ movq %fs:DTV_OFFSET, %rsi
+ movq %rdi, -8(%rsp)
+ movq TLSDESC_ARG(%rax), %rdi
+ movq (%rsi), %rax
+ cmpq %rax, TLSDESC_GEN_COUNT(%rdi)
+ ja .Lslow
+ movq TLSDESC_MODID(%rdi), %rax
+ salq $4, %rax
+ movq (%rax,%rsi), %rax
+ cmpq $-1, %rax
+ je .Lslow
+ addq TLSDESC_MODOFF(%rdi), %rax
+.Lret:
+ movq -16(%rsp), %rsi
+ subq %fs:0, %rax
+ movq -8(%rsp), %rdi
+ ret
+.Lslow:
+ /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
+ r10 and r11. Also, align the stack, that's off by 8 bytes. */
+ subq $72, %rsp
+ cfi_adjust_cfa_offset (72)
+ movq %rdx, 8(%rsp)
+ movq %rcx, 16(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ /* %rdi already points to the tlsinfo data structure. */
+ call __tls_get_addr@PLT
+ movq 8(%rsp), %rdx
+ movq 16(%rsp), %rcx
+ movq 24(%rsp), %r8
+ movq 32(%rsp), %r9
+ movq 40(%rsp), %r10
+ movq 48(%rsp), %r11
+ addq $72, %rsp
+ cfi_adjust_cfa_offset (-72)
+ jmp .Lret
+ cfi_endproc
+ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+#endif /* SHARED */
+
+ /* This function is a wrapper for a lazy resolver for TLS_DESC
+ RELA relocations. The incoming 0(%rsp) points to the caller's
+ link map, pushed by the dynamic object's internal lazy TLS
+ resolver front-end before tail-calling us. We need to pop it
+ ourselves. %rax points to a TLS descriptor, such that 0(%rax)
+ holds the address of the internal resolver front-end (unless
+ some other thread beat us to resolving it) and 8(%rax) holds a
+ pointer to the relocation.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_rela
+ .global _dl_tlsdesc_resolve_rela
+ .type _dl_tlsdesc_resolve_rela,@function
+ cfi_startproc
+ .align 16
+ /* The PLT entry will have pushed the link_map pointer. */
+_dl_tlsdesc_resolve_rela:
+ cfi_adjust_cfa_offset (8)
+ /* Save all call-clobbered registers. */
+ subq $72, %rsp
+ cfi_adjust_cfa_offset (72)
+ movq %rax, (%rsp)
+ movq %rdi, 8(%rsp)
+ movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
+ movq %rsi, 16(%rsp)
+ movq 72(%rsp), %rsi /* Pass link_map* in %rsi. */
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ movq %rdx, 56(%rsp)
+ movq %rcx, 64(%rsp)
+ call _dl_tlsdesc_resolve_rela_fixup
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 24(%rsp), %r8
+ movq 32(%rsp), %r9
+ movq 40(%rsp), %r10
+ movq 48(%rsp), %r11
+ movq 56(%rsp), %rdx
+ movq 64(%rsp), %rcx
+ addq $80, %rsp
+ cfi_adjust_cfa_offset (-80)
+ jmp *(%rax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
+
+ /* This function is a placeholder for lazy resolving of TLS
+ relocations. Once some thread starts resolving a TLS
+ relocation, it sets up the TLS descriptor to use this
+ resolver, such that other threads that would attempt to
+ resolve it concurrently may skip the call to the original lazy
+ resolver and go straight to a condition wait.
+
+ When the actual resolver returns, it will have adjusted the
+ TLS descriptor such that we can tail-call it for it to return
+ the TP offset of the symbol. */
+
+ .hidden _dl_tlsdesc_resolve_hold
+ .global _dl_tlsdesc_resolve_hold
+ .type _dl_tlsdesc_resolve_hold,@function
+ cfi_startproc
+ .align 16
+_dl_tlsdesc_resolve_hold:
+0:
+ /* Save all call-clobbered registers. */
+ subq $72, %rsp
+ cfi_adjust_cfa_offset (72)
+ movq %rax, (%rsp)
+ movq %rdi, 8(%rsp)
+ movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
+ movq %rsi, 16(%rsp)
+ /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */
+ leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi
+ movq %r8, 24(%rsp)
+ movq %r9, 32(%rsp)
+ movq %r10, 40(%rsp)
+ movq %r11, 48(%rsp)
+ movq %rdx, 56(%rsp)
+ movq %rcx, 64(%rsp)
+ call _dl_tlsdesc_resolve_hold_fixup
+1:
+ movq (%rsp), %rax
+ movq 8(%rsp), %rdi
+ movq 16(%rsp), %rsi
+ movq 24(%rsp), %r8
+ movq 32(%rsp), %r9
+ movq 40(%rsp), %r10
+ movq 48(%rsp), %r11
+ movq 56(%rsp), %rdx
+ movq 64(%rsp), %rcx
+ addq $72, %rsp
+ cfi_adjust_cfa_offset (-72)
+ jmp *(%eax)
+ cfi_endproc
+ .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold