diff options
author | Andreas Schwab <schwab@redhat.com> | 2009-07-30 14:18:37 +0200 |
---|---|---|
committer | Andreas Schwab <schwab@redhat.com> | 2009-07-30 14:18:37 +0200 |
commit | b870de510d54108c7c839abc17ea1559085e55a3 (patch) | |
tree | d59aca63c9713ac51b929e388187f6ec0bb1273e /sysdeps/x86_64 | |
parent | ca2a37b64e0347b400e58da9ca238c9320a55edb (diff) | |
parent | 78c4ef475d47a2289635f74b726f52defedb4651 (diff) |
Merge commit 'origin/master' into fedora/master
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r-- | sysdeps/x86_64/dl-trampoline.S | 105 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/Versions | 5 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.c | 10 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/init-arch.h | 22 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/s_fma.c | 43 | ||||
-rw-r--r-- | sysdeps/x86_64/multiarch/s_fmaf.c | 42 | ||||
-rwxr-xr-x | sysdeps/x86_64/tst-xmmymm.sh | 7 |
7 files changed, 218 insertions, 16 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S index 49d239f075..20da6956f1 100644 --- a/sysdeps/x86_64/dl-trampoline.S +++ b/sysdeps/x86_64/dl-trampoline.S @@ -61,6 +61,7 @@ _dl_runtime_resolve: cfi_startproc _dl_runtime_profile: + cfi_adjust_cfa_offset(16) # Incorporate PLT /* The La_x86_64_regs data structure pointed to by the fourth paramater must be 16-byte aligned. This must be explicitly enforced. We have the set up a dynamically @@ -68,7 +69,7 @@ _dl_runtime_profile: has a fixed size and preserves the original stack pointer. */ subq $32, %rsp # Allocate the local storage. - cfi_adjust_cfa_offset(48) # Incorporate PLT + cfi_adjust_cfa_offset(32) movq %rbx, (%rsp) cfi_rel_offset(%rbx, 0) @@ -203,49 +204,49 @@ L(no_avx1): vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 vpmovmskb %xmm8, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7 L(no_avx2): @@ -361,13 +362,13 @@ L(no_avx3): vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 vpmovmskb %xmm2, %esi cmpl $0xffff, %esi - je 1f + jne 1f vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 L(no_avx4): @@ -390,3 +391,85 @@ L(no_avx4): cfi_endproc .size _dl_runtime_profile, .-_dl_runtime_profile #endif + + +#ifdef SHARED + .globl _dl_x86_64_save_sse + .type _dl_x86_64_save_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_save_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + jne 1f + movq %rbx, %r11 # Save rbx + movl $1, %eax + cpuid + movq %r11,%rbx # Restore rbx + movl $1, %eax + testl $(1 << 28), %ecx + jne 2f + negl %eax +2: movl %eax, L(have_avx)(%rip) + cmpl $0, %eax + +1: js L(no_avx5) + +# define YMM_SIZE 32 + vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE + vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE + vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE + vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE + vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE + vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE + vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE + vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE + ret +L(no_avx5): +# endif +# define YMM_SIZE 16 + movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE + movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE + movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE + movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE + movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE + movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE + movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE + movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE + ret + cfi_endproc + .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse + + + .globl _dl_x86_64_restore_sse + .type _dl_x86_64_restore_sse, @function + .align 16 + cfi_startproc +_dl_x86_64_restore_sse: +# ifdef HAVE_AVX_SUPPORT + cmpl $0, L(have_avx)(%rip) + js L(no_avx6) + + vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 + vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 + vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 + vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 + vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 + vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 + vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 + vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 + ret +L(no_avx6): +# endif + movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 + movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 + movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 + movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 + movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 + movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 + movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 + movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 + ret + cfi_endproc + .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse +#endif diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions new file mode 100644 index 0000000000..59b185ac8d --- /dev/null +++ b/sysdeps/x86_64/multiarch/Versions @@ -0,0 +1,5 @@ +libc { + GLIBC_PRIVATE { + __get_cpu_features; + } +} diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 35fd19af0e..49b421eac8 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -86,3 +86,13 @@ __init_cpu_features (void) else __cpu_features.kind = arch_kind_other; } + + +const struct cpu_features * +__get_cpu_features (void) +{ + if (__cpu_features.kind == arch_kind_unknown) + __init_cpu_features (); + + return &__cpu_features; +} diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 48a2127418..0151e8b95b 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -54,10 +54,28 @@ extern void __init_cpu_features (void) attribute_hidden; __init_cpu_features (); \ while (0) +/* Used from outside libc.so to get access to the CPU features structure. */ +extern const struct cpu_features *__get_cpu_features (void) + __attribute__ ((const)); + /* Following are the feature tests used throughout libc. */ -#define HAS_POPCOUNT \ +#ifndef NOT_IN_libc +# define HAS_POPCOUNT \ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0) -#define HAS_SSE4_2 \ +# define HAS_SSE4_2 \ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0) + +# define HAS_FMA \ + ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0) +#else +# define HAS_POPCOUNT \ + ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0) + +# define HAS_SSE4_2 \ + ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0) + +# define HAS_FMA \ + ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0) +#endif diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c new file mode 100644 index 0000000000..40601e9a68 --- /dev/null +++ b/sysdeps/x86_64/multiarch/s_fma.c @@ -0,0 +1,43 @@ +/* FMA version of fma. + Copyright (C) 2009 Free Software Foundation, Inc. + Contributed by Intel Corporation. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <config.h> +#include <math.h> +#include <init-arch.h> + +#ifdef HAVE_AVX_SUPPORT + +extern double __fma_sse2 (double x, double y, double z); + + +double +__fma_fma (double x, double y, double z) +{ + asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); + return x; +} + +libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2); +weak_alias (__fma, fma) + +# define __fma __fma_sse2 +#endif + +#include <math/s_fma.c> diff --git a/sysdeps/x86_64/multiarch/s_fmaf.c b/sysdeps/x86_64/multiarch/s_fmaf.c new file mode 100644 index 0000000000..f3d37f8f4a --- /dev/null +++ b/sysdeps/x86_64/multiarch/s_fmaf.c @@ -0,0 +1,42 @@ +/* FMA version of fmaf. + Copyright (C) 2009 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <config.h> +#include <math.h> +#include <init-arch.h> + +#ifdef HAVE_AVX_SUPPORT + +extern float __fmaf_sse2 (float x, float y, float z); + + +float +__fmaf_fma (float x, float y, float z) +{ + asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z)); + return x; +} + +libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2); +weak_alias (__fmaf, fmaf) + +# define __fmaf __fmaf_sse2 +#endif + +#include <math/s_fmaf.c> diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh index a576e7da0d..da8af7e686 100755 --- a/sysdeps/x86_64/tst-xmmymm.sh +++ b/sysdeps/x86_64/tst-xmmymm.sh @@ -59,10 +59,11 @@ for f in $tocheck; do objdump -d "$objpfx"../*/"$f" | awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' | while read fct; do - if test "$fct" != "_dl_runtime_profile"; then - echo "function $fct in $f modifies xmm/ymm" >> "$tmp" - result=1 + if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then + continue; fi + echo "function $fct in $f modifies xmm/ymm" >> "$tmp" + result=1 done done |