summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/dl-trampoline.h
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/dl-trampoline.h')
-rw-r--r--sysdeps/x86_64/dl-trampoline.h182
1 files changed, 93 insertions, 89 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index f4191833ab..a28b1e73a4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -1,5 +1,5 @@
/* PLT trampolines. x86-64 version.
- Copyright (C) 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2009-2018 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,40 +16,47 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#undef REGISTER_SAVE_AREA_RAW
-#ifdef __ILP32__
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
- VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8)
-#else
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
- BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
-# define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8)
-#endif
+ .text
+#ifdef _dl_runtime_resolve
-#undef REGISTER_SAVE_AREA
-#undef LOCAL_STORAGE_AREA
-#undef BASE
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8)
-/* Local stack area before jumping to function address: RBX. */
-# define LOCAL_STORAGE_AREA 8
-# define BASE rbx
-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
-# error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+# undef REGISTER_SAVE_AREA
+# undef LOCAL_STORAGE_AREA
+# undef BASE
+
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multples of 16
+# endif
+
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
# endif
-#else
-# define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW
+
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX. */
+# define LOCAL_STORAGE_AREA 8
+# define BASE rbx
+# ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
+# if (REGISTER_SAVE_AREA % 16) != 0
+# error REGISTER_SAVE_AREA must be multples of 16
+# endif
+# endif
+# else
+# ifndef USE_FXSAVE
+# error USE_FXSAVE must be defined
+# endif
+/* Use fxsave to save XMM registers. */
+# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
/* Local stack area before jumping to function address: All saved
registers. */
-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
-# define BASE rsp
-# if (REGISTER_SAVE_AREA % 16) != 8
-# error REGISTER_SAVE_AREA must be odd multples of 8
+# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
+# define BASE rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+# error REGISTER_SAVE_AREA must be odd multples of 8
+# endif
# endif
-#endif
- .text
.globl _dl_runtime_resolve
.hidden _dl_runtime_resolve
.type _dl_runtime_resolve, @function
@@ -57,19 +64,31 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# if LOCAL_STORAGE_AREA != 8
-# error LOCAL_STORAGE_AREA must be 8
-# endif
+ _CET_ENDBR
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# if LOCAL_STORAGE_AREA != 8
+# error LOCAL_STORAGE_AREA must be 8
+# endif
pushq %rbx # push subtracts stack by 8.
cfi_adjust_cfa_offset(8)
cfi_rel_offset(%rbx, 0)
mov %RSP_LP, %RBX_LP
cfi_def_cfa_register(%rbx)
- and $-VEC_SIZE, %RSP_LP
-#endif
+ and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+# endif
+# ifdef REGISTER_SAVE_AREA
sub $REGISTER_SAVE_AREA, %RSP_LP
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+# else
+ # Allocate stack space of the required size to save the state.
+# if IS_IN (rtld)
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# else
+ sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+# endif
+# endif
# Preserve registers otherwise clobbered.
movq %rax, REGISTER_SAVE_RAX(%rsp)
movq %rcx, REGISTER_SAVE_RCX(%rsp)
@@ -78,59 +97,42 @@ _dl_runtime_resolve:
movq %rdi, REGISTER_SAVE_RDI(%rsp)
movq %r8, REGISTER_SAVE_R8(%rsp)
movq %r9, REGISTER_SAVE_R9(%rsp)
- VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
- VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
- VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
- VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
- VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
- VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
- VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
- VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
-#ifndef __ILP32__
- # We also have to preserve bound registers. These are nops if
- # Intel MPX isn't available or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
- bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
- bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
- bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# ifdef USE_FXSAVE
+ fxsave STATE_SAVE_OFFSET(%rsp)
# else
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1b,0x04,0x24
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ # Clear the XSAVE Header.
+# ifdef USE_XSAVE
+ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+# endif
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+# ifdef USE_XSAVE
+ xsave STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+ xsavec STATE_SAVE_OFFSET(%rsp)
# endif
- .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
- .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
# endif
-#endif
# Copy args pushed by PLT in register.
# %rdi: link_map, %rsi: reloc_index
mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
call _dl_fixup # Call resolver.
mov %RAX_LP, %R11_LP # Save return value
-#ifndef __ILP32__
- # Restore bound registers. These are nops if Intel MPX isn't
- # avaiable or disabled.
-# ifdef HAVE_MPX_SUPPORT
- bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
- bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
- bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
- bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+ # Get register content back.
+# ifdef USE_FXSAVE
+ fxrstor STATE_SAVE_OFFSET(%rsp)
# else
- .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
- .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
- .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
-# if REGISTER_SAVE_BND0 == 0
- .byte 0x66,0x0f,0x1a,0x04,0x24
-# else
- .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
-# endif
+ movl $STATE_SAVE_MASK, %eax
+ xorl %edx, %edx
+ xrstor STATE_SAVE_OFFSET(%rsp)
# endif
-#endif
- # Get register content back.
movq REGISTER_SAVE_R9(%rsp), %r9
movq REGISTER_SAVE_R8(%rsp), %r8
movq REGISTER_SAVE_RDI(%rsp), %rdi
@@ -138,20 +140,12 @@ _dl_runtime_resolve:
movq REGISTER_SAVE_RDX(%rsp), %rdx
movq REGISTER_SAVE_RCX(%rsp), %rcx
movq REGISTER_SAVE_RAX(%rsp), %rax
- VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
- VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
mov %RBX_LP, %RSP_LP
cfi_def_cfa_register(%rsp)
movq (%rsp), %rbx
cfi_restore(%rbx)
-#endif
+# endif
# Adjust stack(PLT did 2 pushes)
add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
@@ -160,9 +154,10 @@ _dl_runtime_resolve:
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#endif
-#ifndef PROF
+#if !defined PROF && defined _dl_runtime_profile
# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
# endif
@@ -174,6 +169,7 @@ _dl_runtime_resolve:
_dl_runtime_profile:
cfi_startproc
cfi_adjust_cfa_offset(16) # Incorporate PLT
+ _CET_ENDBR
/* The La_x86_64_regs data structure pointed to by the
fourth paramater must be VEC_SIZE-byte aligned. This must
be explicitly enforced. We have the set up a dynamically
@@ -446,8 +442,16 @@ _dl_runtime_profile:
# ifdef RESTORE_AVX
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
registers to detect if xmm0/xmm1 registers are changed
- by audit module. */
- sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
+ by audit module. Since rsp is aligned to VEC_SIZE, we
+ need to make sure that the address of La_x86_64_retval +
+ LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
+# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
+# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
+# if LRV_MISALIGNED == 0
+ sub $LRV_SPACE, %RSP_LP
+# else
+ sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
+# endif
# else
sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
# endif