summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Zamyatin <igor.zamyatin@intel.com>2014-04-01 10:16:04 -0700
committerH.J. Lu <hjl.tools@gmail.com>2014-04-09 15:38:09 -0700
commita4c75cfd56e536c2b18556e8a482d88dffa0fffc (patch)
tree209bb53676f4cee8fde2a1f9d038829dd78fe28c
parent27822ce67fbf7f2b204992a410e7da2e8c1e2607 (diff)
Save/restore bound registers in _dl_runtime_resolve
This patch saves and restores bound registers in symbol lookup for x86-64: 1. Branches without BND prefix clear bound registers. 2. x86-64 pass bounds in bound registers as specified in MPX psABI extension on hjl/mpx/master branch at https://github.com/hjl-tools/x86-64-psABI https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc Binutils has been updated to create an alternate PLT to add BND prefix when branching to ld.so. * config.h.in (HAVE_MPX_SUPPORT): New #undef. * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New macro. (REGISTER_SAVE_RAX): Likewise. (REGISTER_SAVE_RCX): Likewise. (REGISTER_SAVE_RDX): Likewise. (REGISTER_SAVE_RSI): Likewise. (REGISTER_SAVE_RDI): Likewise. (REGISTER_SAVE_R8): Likewise. (REGISTER_SAVE_R9): Likewise. (REGISTER_SAVE_BND0): Likewise. (REGISTER_SAVE_BND1): Likewise. (REGISTER_SAVE_BND2): Likewise. (_dl_runtime_resolve): Use them. Save and restore Intel MPX bound registers when calling _dl_fixup.
-rw-r--r--ChangeLog21
-rw-r--r--config.h.in3
-rw-r--r--sysdeps/x86_64/configure27
-rw-r--r--sysdeps/x86_64/configure.ac15
-rw-r--r--sysdeps/x86_64/dl-trampoline.S99
5 files changed, 145 insertions, 20 deletions
diff --git a/ChangeLog b/ChangeLog
index 4598b60648..7cc48ce4e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2014-04-09 Igor Zamyatin <igor.zamyatin@intel.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ * config.h.in (HAVE_MPX_SUPPORT): New #undef.
+ * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
+ * sysdeps/x86_64/configure: Regenerated.
+ * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
+ macro.
+ (REGISTER_SAVE_RAX): Likewise.
+ (REGISTER_SAVE_RCX): Likewise.
+ (REGISTER_SAVE_RDX): Likewise.
+ (REGISTER_SAVE_RSI): Likewise.
+ (REGISTER_SAVE_RDI): Likewise.
+ (REGISTER_SAVE_R8): Likewise.
+ (REGISTER_SAVE_R9): Likewise.
+ (REGISTER_SAVE_BND0): Likewise.
+ (REGISTER_SAVE_BND1): Likewise.
+ (REGISTER_SAVE_BND2): Likewise.
+ (_dl_runtime_resolve): Use them. Save and restore Intel MPX
+ bound registers when calling _dl_fixup.
+
2014-04-09 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* bits/string.h (_STRING_ARCH_unaligned): Define it to 0.
diff --git a/config.h.in b/config.h.in
index 3fc34bdb1e..b6e3623d92 100644
--- a/config.h.in
+++ b/config.h.in
@@ -104,6 +104,9 @@
/* Define if assembler supports AVX512. */
#undef HAVE_AVX512_ASM_SUPPORT
+/* Define if assembler supports Intel MPX. */
+#undef HAVE_MPX_SUPPORT
+
/* Define if gcc supports FMA4. */
#undef HAVE_FMA4_SUPPORT
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index c1c88c8619..45d868d93d 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -222,6 +222,33 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; }
config_vars="$config_vars
config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
+$as_echo_n "checking for Intel MPX support... " >&6; }
+if ${libc_cv_asm_mpx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ bndmov %bnd0,(%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
+$as_echo "$libc_cv_asm_mpx" >&6; }
+if test $libc_cv_asm_mpx == yes; then
+ $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
+
+fi
+
$as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
# work around problem with autoconf and empty lines at the end of files
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index d34f9a8eec..9138f63a3f 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -75,6 +75,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper],
])
LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
+dnl Check whether asm supports Intel MPX
+AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
+cat > conftest.s <<\EOF
+ bndmov %bnd0,(%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_asm_mpx=yes
+else
+ libc_cv_asm_mpx=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_mpx == yes; then
+ AC_DEFINE(HAVE_MPX_SUPPORT)
+fi
+
dnl It is always possible to access static and hidden symbols in an
dnl position independent way.
AC_DEFINE(PI_STATIC_AND_HIDDEN)
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 77c4d0f147..3c0f54274a 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -24,6 +24,30 @@
# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
#endif
+/* Area on stack to save and restore registers used for parameter
+ passing when calling _dl_fixup. */
+#ifdef __ILP32__
+/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
+# define REGISTER_SAVE_AREA (8 * 7)
+# define REGISTER_SAVE_RAX 0
+#else
+/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
+ BND1, BND2, BND3. */
+# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
+/* Align bound register save area to 16 bytes. */
+# define REGISTER_SAVE_BND0 0
+# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
+# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
+# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
+# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
+#endif
+#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
+#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
+#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
+#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
+#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
+#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
+
.text
.globl _dl_runtime_resolve
.type _dl_runtime_resolve, @function
@@ -31,28 +55,63 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
- subq $56,%rsp
- cfi_adjust_cfa_offset(56)
- movq %rax,(%rsp) # Preserve registers otherwise clobbered.
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
- movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
- movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+ subq $REGISTER_SAVE_AREA,%rsp
+ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+ # Preserve registers otherwise clobbered.
+ movq %rax, REGISTER_SAVE_RAX(%rsp)
+ movq %rcx, REGISTER_SAVE_RCX(%rsp)
+ movq %rdx, REGISTER_SAVE_RDX(%rsp)
+ movq %rsi, REGISTER_SAVE_RSI(%rsp)
+ movq %rdi, REGISTER_SAVE_RDI(%rsp)
+ movq %r8, REGISTER_SAVE_R8(%rsp)
+ movq %r9, REGISTER_SAVE_R9(%rsp)
+#ifndef __ILP32__
+ # We also have to preserve bound registers. These are nops if
+ # Intel MPX isn't available or disabled.
+# ifdef HAVE_MPX_SUPPORT
+ bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
+ bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
+ bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
+ bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# else
+ .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+ .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
+ .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
+ .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
+# endif
+#endif
+ # Copy args pushed by PLT in register.
+ # %rdi: link_map, %rsi: reloc_index
+ movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
+ movq REGISTER_SAVE_AREA(%rsp), %rdi
call _dl_fixup # Call resolver.
movq %rax, %r11 # Save return value
- movq 48(%rsp), %r9 # Get register content back.
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
- movq (%rsp), %rax
- addq $72, %rsp # Adjust stack(PLT did 2 pushes)
- cfi_adjust_cfa_offset(-72)
+#ifndef __ILP32__
+ # Restore bound registers. These are nops if Intel MPX isn't
+ # avaiable or disabled.
+# ifdef HAVE_MPX_SUPPORT
+ bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
+ bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
+ bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
+ bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+# else
+ .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
+ .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
+ .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
+ .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
+# endif
+#endif
+ # Get register content back.
+ movq REGISTER_SAVE_R9(%rsp), %r9
+ movq REGISTER_SAVE_R8(%rsp), %r8
+ movq REGISTER_SAVE_RDI(%rsp), %rdi
+ movq REGISTER_SAVE_RSI(%rsp), %rsi
+ movq REGISTER_SAVE_RDX(%rsp), %rdx
+ movq REGISTER_SAVE_RCX(%rsp), %rcx
+ movq REGISTER_SAVE_RAX(%rsp), %rax
+ # Adjust stack(PLT did 2 pushes)
+ addq $(REGISTER_SAVE_AREA + 16), %rsp
+ cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve