From a4c75cfd56e536c2b18556e8a482d88dffa0fffc Mon Sep 17 00:00:00 2001 From: Igor Zamyatin Date: Tue, 1 Apr 2014 10:16:04 -0700 Subject: Save/restore bound registers in _dl_runtime_resolve This patch saves and restores bound registers in symbol lookup for x86-64: 1. Branches without BND prefix clear bound registers. 2. x86-64 pass bounds in bound registers as specified in MPX psABI extension on hjl/mpx/master branch at https://github.com/hjl-tools/x86-64-psABI https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc Binutils has been updated to create an alternate PLT to add BND prefix when branching to ld.so. * config.h.in (HAVE_MPX_SUPPORT): New #undef. * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT. * sysdeps/x86_64/configure: Regenerated. * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New macro. (REGISTER_SAVE_RAX): Likewise. (REGISTER_SAVE_RCX): Likewise. (REGISTER_SAVE_RDX): Likewise. (REGISTER_SAVE_RSI): Likewise. (REGISTER_SAVE_RDI): Likewise. (REGISTER_SAVE_R8): Likewise. (REGISTER_SAVE_R9): Likewise. (REGISTER_SAVE_BND0): Likewise. (REGISTER_SAVE_BND1): Likewise. (REGISTER_SAVE_BND2): Likewise. (_dl_runtime_resolve): Use them. Save and restore Intel MPX bound registers when calling _dl_fixup. --- ChangeLog | 21 +++++++++ config.h.in | 3 ++ sysdeps/x86_64/configure | 27 ++++++++++++ sysdeps/x86_64/configure.ac | 15 +++++++ sysdeps/x86_64/dl-trampoline.S | 99 +++++++++++++++++++++++++++++++++--------- 5 files changed, 145 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4598b60648..7cc48ce4e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2014-04-09 Igor Zamyatin + H.J. Lu + + * config.h.in (HAVE_MPX_SUPPORT): New #undef. + * sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT. + * sysdeps/x86_64/configure: Regenerated. + * sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New + macro. + (REGISTER_SAVE_RAX): Likewise. + (REGISTER_SAVE_RCX): Likewise. + (REGISTER_SAVE_RDX): Likewise. + (REGISTER_SAVE_RSI): Likewise. + (REGISTER_SAVE_RDI): Likewise. + (REGISTER_SAVE_R8): Likewise. + (REGISTER_SAVE_R9): Likewise. + (REGISTER_SAVE_BND0): Likewise. + (REGISTER_SAVE_BND1): Likewise. + (REGISTER_SAVE_BND2): Likewise. + (_dl_runtime_resolve): Use them. Save and restore Intel MPX + bound registers when calling _dl_fixup. + 2014-04-09 Adhemerval Zanella * bits/string.h (_STRING_ARCH_unaligned): Define it to 0. diff --git a/config.h.in b/config.h.in index 3fc34bdb1e..b6e3623d92 100644 --- a/config.h.in +++ b/config.h.in @@ -104,6 +104,9 @@ /* Define if assembler supports AVX512. */ #undef HAVE_AVX512_ASM_SUPPORT +/* Define if assembler supports Intel MPX. */ +#undef HAVE_MPX_SUPPORT + /* Define if gcc supports FMA4. */ #undef HAVE_FMA4_SUPPORT diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure index c1c88c8619..45d868d93d 100644 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -222,6 +222,33 @@ $as_echo "$libc_cv_cc_novzeroupper" >&6; } config_vars="$config_vars config-cflags-novzeroupper = $libc_cv_cc_novzeroupper" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5 +$as_echo_n "checking for Intel MPX support... " >&6; } +if ${libc_cv_asm_mpx+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat > conftest.s <<\EOF + bndmov %bnd0,(%rsp) +EOF +if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + libc_cv_asm_mpx=yes +else + libc_cv_asm_mpx=no +fi +rm -f conftest* +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5 +$as_echo "$libc_cv_asm_mpx" >&6; } +if test $libc_cv_asm_mpx == yes; then + $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h + +fi + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h # work around problem with autoconf and empty lines at the end of files diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index d34f9a8eec..9138f63a3f 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -75,6 +75,21 @@ LIBC_TRY_CC_OPTION([-mno-vzeroupper], ]) LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper]) +dnl Check whether asm supports Intel MPX +AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl +cat > conftest.s <<\EOF + bndmov %bnd0,(%rsp) +EOF +if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_asm_mpx=yes +else + libc_cv_asm_mpx=no +fi +rm -f conftest*]) +if test $libc_cv_asm_mpx == yes; then + AC_DEFINE(HAVE_MPX_SUPPORT) +fi + dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 77c4d0f147..3c0f54274a 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -24,6 +24,30 @@ # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes #endif +/* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ +#ifdef __ILP32__ +/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */ +# define REGISTER_SAVE_AREA (8 * 7) +# define REGISTER_SAVE_RAX 0 +#else +/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0, + BND1, BND2, BND3. */ +# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4) +/* Align bound register save area to 16 bytes. */ +# define REGISTER_SAVE_BND0 0 +# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16) +# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16) +# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16) +# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16) +#endif +#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8) +#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) +#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8) +#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8) +#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8) +#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) + .text .globl _dl_runtime_resolve .type _dl_runtime_resolve, @function @@ -31,28 +55,63 @@ cfi_startproc _dl_runtime_resolve: cfi_adjust_cfa_offset(16) # Incorporate PLT - subq $56,%rsp - cfi_adjust_cfa_offset(56) - movq %rax,(%rsp) # Preserve registers otherwise clobbered. - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - movq 64(%rsp), %rsi # Copy args pushed by PLT in register. - movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index + subq $REGISTER_SAVE_AREA,%rsp + cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) + # Preserve registers otherwise clobbered. + movq %rax, REGISTER_SAVE_RAX(%rsp) + movq %rcx, REGISTER_SAVE_RCX(%rsp) + movq %rdx, REGISTER_SAVE_RDX(%rsp) + movq %rsi, REGISTER_SAVE_RSI(%rsp) + movq %rdi, REGISTER_SAVE_RDI(%rsp) + movq %r8, REGISTER_SAVE_R8(%rsp) + movq %r9, REGISTER_SAVE_R9(%rsp) +#ifndef __ILP32__ + # We also have to preserve bound registers. These are nops if + # Intel MPX isn't available or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) + bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) + bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) + bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) +# else + .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 + .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 + .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 +# endif +#endif + # Copy args pushed by PLT in register. + # %rdi: link_map, %rsi: reloc_index + movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi + movq REGISTER_SAVE_AREA(%rsp), %rdi call _dl_fixup # Call resolver. movq %rax, %r11 # Save return value - movq 48(%rsp), %r9 # Get register content back. - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $72, %rsp # Adjust stack(PLT did 2 pushes) - cfi_adjust_cfa_offset(-72) +#ifndef __ILP32__ + # Restore bound registers. These are nops if Intel MPX isn't + # avaiable or disabled. +# ifdef HAVE_MPX_SUPPORT + bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 + bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 + bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 + bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 +# else + .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 + .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 + .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 + .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 +# endif +#endif + # Get register content back. + movq REGISTER_SAVE_R9(%rsp), %r9 + movq REGISTER_SAVE_R8(%rsp), %r8 + movq REGISTER_SAVE_RDI(%rsp), %rdi + movq REGISTER_SAVE_RSI(%rsp), %rsi + movq REGISTER_SAVE_RDX(%rsp), %rdx + movq REGISTER_SAVE_RCX(%rsp), %rcx + movq REGISTER_SAVE_RAX(%rsp), %rax + # Adjust stack(PLT did 2 pushes) + addq $(REGISTER_SAVE_AREA + 16), %rsp + cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16)) jmp *%r11 # Jump to function address. cfi_endproc .size _dl_runtime_resolve, .-_dl_runtime_resolve -- cgit v1.2.3