summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64
diff options
context:
space:
mode:
authorAndreas Schwab <schwab@redhat.com>2009-07-30 14:18:37 +0200
committerAndreas Schwab <schwab@redhat.com>2009-07-30 14:18:37 +0200
commitb870de510d54108c7c839abc17ea1559085e55a3 (patch)
treed59aca63c9713ac51b929e388187f6ec0bb1273e /sysdeps/x86_64
parentca2a37b64e0347b400e58da9ca238c9320a55edb (diff)
parent78c4ef475d47a2289635f74b726f52defedb4651 (diff)
Merge commit 'origin/master' into fedora/master
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r--sysdeps/x86_64/dl-trampoline.S105
-rw-r--r--sysdeps/x86_64/multiarch/Versions5
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.c10
-rw-r--r--sysdeps/x86_64/multiarch/init-arch.h22
-rw-r--r--sysdeps/x86_64/multiarch/s_fma.c43
-rw-r--r--sysdeps/x86_64/multiarch/s_fmaf.c42
-rwxr-xr-xsysdeps/x86_64/tst-xmmymm.sh7
7 files changed, 218 insertions, 16 deletions
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 49d239f075..20da6956f1 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -61,6 +61,7 @@ _dl_runtime_resolve:
cfi_startproc
_dl_runtime_profile:
+ cfi_adjust_cfa_offset(16) # Incorporate PLT
/* The La_x86_64_regs data structure pointed to by the
fourth paramater must be 16-byte aligned. This must
be explicitly enforced. We have the set up a dynamically
@@ -68,7 +69,7 @@ _dl_runtime_profile:
has a fixed size and preserves the original stack pointer. */
subq $32, %rsp # Allocate the local storage.
- cfi_adjust_cfa_offset(48) # Incorporate PLT
+ cfi_adjust_cfa_offset(32)
movq %rbx, (%rsp)
cfi_rel_offset(%rbx, 0)
@@ -203,49 +204,49 @@ L(no_avx1):
vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
vpmovmskb %xmm8, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
L(no_avx2):
@@ -361,13 +362,13 @@ L(no_avx3):
vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
vpmovmskb %xmm2, %esi
cmpl $0xffff, %esi
- je 1f
+ jne 1f
vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
L(no_avx4):
@@ -390,3 +391,85 @@ L(no_avx4):
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
#endif
+
+
+#ifdef SHARED
+ .globl _dl_x86_64_save_sse
+ .type _dl_x86_64_save_sse, @function
+ .align 16
+ cfi_startproc
+_dl_x86_64_save_sse:
+# ifdef HAVE_AVX_SUPPORT
+ cmpl $0, L(have_avx)(%rip)
+ jne 1f
+ movq %rbx, %r11 # Save rbx
+ movl $1, %eax
+ cpuid
+ movq %r11,%rbx # Restore rbx
+ movl $1, %eax
+ testl $(1 << 28), %ecx
+ jne 2f
+ negl %eax
+2: movl %eax, L(have_avx)(%rip)
+ cmpl $0, %eax
+
+1: js L(no_avx5)
+
+# define YMM_SIZE 32
+ vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
+ vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
+ vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
+ vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
+ vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
+ vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
+ vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
+ vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
+ ret
+L(no_avx5):
+# endif
+# define YMM_SIZE 16
+ movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
+ movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
+ movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
+ movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
+ movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
+ movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
+ movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
+ movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
+ ret
+ cfi_endproc
+ .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
+
+
+ .globl _dl_x86_64_restore_sse
+ .type _dl_x86_64_restore_sse, @function
+ .align 16
+ cfi_startproc
+_dl_x86_64_restore_sse:
+# ifdef HAVE_AVX_SUPPORT
+ cmpl $0, L(have_avx)(%rip)
+ js L(no_avx6)
+
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
+ vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
+ ret
+L(no_avx6):
+# endif
+ movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
+ movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
+ movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
+ movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
+ movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
+ movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
+ movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
+ movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
+ ret
+ cfi_endproc
+ .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#endif
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
new file mode 100644
index 0000000000..59b185ac8d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -0,0 +1,5 @@
+libc {
+ GLIBC_PRIVATE {
+ __get_cpu_features;
+ }
+}
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 35fd19af0e..49b421eac8 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -86,3 +86,13 @@ __init_cpu_features (void)
else
__cpu_features.kind = arch_kind_other;
}
+
+
+const struct cpu_features *
+__get_cpu_features (void)
+{
+ if (__cpu_features.kind == arch_kind_unknown)
+ __init_cpu_features ();
+
+ return &__cpu_features;
+}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 48a2127418..0151e8b95b 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -54,10 +54,28 @@ extern void __init_cpu_features (void) attribute_hidden;
__init_cpu_features (); \
while (0)
+/* Used from outside libc.so to get access to the CPU features structure. */
+extern const struct cpu_features *__get_cpu_features (void)
+ __attribute__ ((const));
+
/* Following are the feature tests used throughout libc. */
-#define HAS_POPCOUNT \
+#ifndef NOT_IN_libc
+# define HAS_POPCOUNT \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
-#define HAS_SSE4_2 \
+# define HAS_SSE4_2 \
((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0)
+
+# define HAS_FMA \
+ ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0)
+#else
+# define HAS_POPCOUNT \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 23)) != 0)
+
+# define HAS_SSE4_2 \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 20)) != 0)
+
+# define HAS_FMA \
+ ((__get_cpu_features ()->cpuid[COMMON_CPUID_INDEX_1].ecx & (1 << 12)) != 0)
+#endif
diff --git a/sysdeps/x86_64/multiarch/s_fma.c b/sysdeps/x86_64/multiarch/s_fma.c
new file mode 100644
index 0000000000..40601e9a68
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/s_fma.c
@@ -0,0 +1,43 @@
+/* FMA version of fma.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <config.h>
+#include <math.h>
+#include <init-arch.h>
+
+#ifdef HAVE_AVX_SUPPORT
+
+extern double __fma_sse2 (double x, double y, double z);
+
+
+double
+__fma_fma (double x, double y, double z)
+{
+ asm ("vfmadd213sd %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
+ return x;
+}
+
+libm_ifunc (__fma, HAS_FMA ? __fma_fma : __fma_sse2);
+weak_alias (__fma, fma)
+
+# define __fma __fma_sse2
+#endif
+
+#include <math/s_fma.c>
diff --git a/sysdeps/x86_64/multiarch/s_fmaf.c b/sysdeps/x86_64/multiarch/s_fmaf.c
new file mode 100644
index 0000000000..f3d37f8f4a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/s_fmaf.c
@@ -0,0 +1,42 @@
+/* FMA version of fmaf.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <config.h>
+#include <math.h>
+#include <init-arch.h>
+
+#ifdef HAVE_AVX_SUPPORT
+
+extern float __fmaf_sse2 (float x, float y, float z);
+
+
+float
+__fmaf_fma (float x, float y, float z)
+{
+ asm ("vfmadd213ss %3, %2, %0" : "=x" (x) : "0" (x), "x" (y), "xm" (z));
+ return x;
+}
+
+libm_ifunc (__fmaf, HAS_FMA ? __fmaf_fma : __fmaf_sse2);
+weak_alias (__fmaf, fmaf)
+
+# define __fmaf __fmaf_sse2
+#endif
+
+#include <math/s_fmaf.c>
diff --git a/sysdeps/x86_64/tst-xmmymm.sh b/sysdeps/x86_64/tst-xmmymm.sh
index a576e7da0d..da8af7e686 100755
--- a/sysdeps/x86_64/tst-xmmymm.sh
+++ b/sysdeps/x86_64/tst-xmmymm.sh
@@ -59,10 +59,11 @@ for f in $tocheck; do
objdump -d "$objpfx"../*/"$f" |
awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
while read fct; do
- if test "$fct" != "_dl_runtime_profile"; then
- echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
- result=1
+ if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+ continue;
fi
+ echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
+ result=1
done
done