diff options
Diffstat (limited to 'sysdeps/x86_64/fpu')
417 files changed, 7343 insertions, 3849 deletions
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index 88742faff1..2b7d69bb50 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -30,11 +30,171 @@ ifeq ($(subdir),math) ifeq ($(build-mathvec),yes) libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \ float-vlen4 float-vlen8 float-vlen8-avx2 +tests += test-double-libmvec-alias test-double-libmvec-alias-avx \ + test-double-libmvec-alias-avx2 test-double-libmvec-alias-main \ + test-double-libmvec-alias-avx-main test-double-libmvec-alias-avx2-main \ + test-float-libmvec-alias test-float-libmvec-alias-avx \ + test-float-libmvec-alias-avx2 test-float-libmvec-alias-main \ + test-float-libmvec-alias-avx-main test-float-libmvec-alias-avx2-main \ + test-double-libmvec-sincos test-double-libmvec-sincos-avx \ + test-double-libmvec-sincos-avx2 test-float-libmvec-sincosf \ + test-float-libmvec-sincosf-avx test-float-libmvec-sincosf-avx2 +modules-names += test-double-libmvec-alias-mod \ + test-double-libmvec-alias-avx-mod \ + test-double-libmvec-alias-avx2-mod \ + test-float-libmvec-alias-mod \ + test-float-libmvec-alias-avx-mod \ + test-float-libmvec-alias-avx2-mod +modules-names-tests += test-double-libmvec-alias-mod \ + test-double-libmvec-alias-avx-mod \ + test-double-libmvec-alias-avx2-mod \ + test-float-libmvec-alias-mod \ + test-float-libmvec-alias-avx-mod \ + test-float-libmvec-alias-avx2-mod +extra-test-objs += test-double-libmvec-sincos-avx-main.o \ + test-double-libmvec-sincos-avx2-main.o \ + test-double-libmvec-sincos-main.o \ + test-float-libmvec-sincosf-avx-main.o \ + test-float-libmvec-sincosf-avx2-main.o\ + test-float-libmvec-sincosf-main.o +test-double-libmvec-alias-mod.so-no-z-defs = yes +test-double-libmvec-alias-avx-mod.so-no-z-defs = yes +test-double-libmvec-alias-avx2-mod.so-no-z-defs = yes +test-float-libmvec-alias-mod.so-no-z-defs = yes +test-float-libmvec-alias-avx-mod.so-no-z-defs = yes +test-float-libmvec-alias-avx2-mod.so-no-z-defs = yes + +$(objpfx)test-double-libmvec-alias: \ + $(objpfx)test-double-libmvec-alias-mod.so +$(objpfx)test-double-libmvec-alias-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-avx: \ + $(objpfx)test-double-libmvec-alias-avx-mod.so +$(objpfx)test-double-libmvec-alias-avx-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-avx2: \ + $(objpfx)test-double-libmvec-alias-avx2-mod.so +$(objpfx)test-double-libmvec-alias-avx2-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-main: \ + $(objpfx)test-double-libmvec-alias-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-avx-main: \ + $(objpfx)test-double-libmvec-alias-avx-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-avx2-main: \ + $(objpfx)test-double-libmvec-alias-avx2-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias: \ + $(objpfx)test-float-libmvec-alias-mod.so +$(objpfx)test-float-libmvec-alias-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx: \ + $(objpfx)test-float-libmvec-alias-avx-mod.so +$(objpfx)test-float-libmvec-alias-avx-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx2: \ + $(objpfx)test-float-libmvec-alias-avx2-mod.so +$(objpfx)test-float-libmvec-alias-avx2-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-main: \ + $(objpfx)test-float-libmvec-alias-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx-main: \ + $(objpfx)test-float-libmvec-alias-avx-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx2-main: \ + $(objpfx)test-float-libmvec-alias-avx2-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-sincos: \ + $(objpfx)test-double-libmvec-sincos.o \ + $(objpfx)test-double-libmvec-sincos-main.o $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx: \ + $(objpfx)test-double-libmvec-sincos-avx.o \ + $(objpfx)test-double-libmvec-sincos-avx-main.o $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx2: \ + $(objpfx)test-double-libmvec-sincos-avx2.o \ + $(objpfx)test-double-libmvec-sincos-avx2-main.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf: \ + $(objpfx)test-float-libmvec-sincosf.o \ + $(objpfx)test-float-libmvec-sincosf-main.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx: \ + $(objpfx)test-float-libmvec-sincosf-avx.o \ + $(objpfx)test-float-libmvec-sincosf-avx-main.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx2: \ + $(objpfx)test-float-libmvec-sincosf-avx2.o \ + $(objpfx)test-float-libmvec-sincosf-avx2-main.o $(libmvec) ifeq (yes,$(config-cflags-avx512)) libmvec-tests += double-vlen8 float-vlen16 +tests += test-double-libmvec-alias-avx512 \ + test-float-libmvec-alias-avx512 \ + test-double-libmvec-alias-avx512-main \ + test-float-libmvec-alias-avx512-main \ + test-double-libmvec-sincos-avx512 \ + test-float-libmvec-sincosf-avx512 +modules-names += test-double-libmvec-alias-avx512-mod \ + test-float-libmvec-alias-avx512-mod +modules-names-tests += test-double-libmvec-alias-avx512-mod \ + test-float-libmvec-alias-avx512-mod +extra-test-objs += test-double-libmvec-sincos-avx512-main.o \ + test-float-libmvec-sincosf-avx512-main.o +test-double-libmvec-alias-avx512-mod.so-no-z-defs = yes +test-float-libmvec-alias-avx512-mod.so-no-z-defs = yes + +$(objpfx)test-double-libmvec-alias-avx512: \ + $(objpfx)test-double-libmvec-alias-avx512-mod.so +$(objpfx)test-double-libmvec-alias-avx512-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-alias-avx512-main: \ + $(objpfx)test-double-libmvec-alias-avx512-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx512: \ + $(objpfx)test-float-libmvec-alias-avx512-mod.so +$(objpfx)test-float-libmvec-alias-avx512-mod.so: \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-float-libmvec-alias-avx512-main: \ + $(objpfx)test-float-libmvec-alias-avx512-mod.os \ + $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec) + +$(objpfx)test-double-libmvec-sincos-avx512: \ + $(objpfx)test-double-libmvec-sincos-avx512.o \ + $(objpfx)test-double-libmvec-sincos-avx512-main.o $(libmvec) + +$(objpfx)test-float-libmvec-sincosf-avx512: \ + $(objpfx)test-float-libmvec-sincosf-avx512.o \ + $(objpfx)test-float-libmvec-sincosf-avx512-main.o $(libmvec) endif +double-vlen2-funcs = cos exp log pow sin sincos +double-vlen4-funcs = cos exp log pow sin sincos +double-vlen4-avx2-funcs = cos exp log pow sin sincos +double-vlen8-funcs = cos exp log pow sin sincos +float-vlen4-funcs = cos exp log pow sin sincos +float-vlen8-funcs = cos exp log pow sin sincos +float-vlen8-avx2-funcs = cos exp log pow sin sincos +float-vlen16-funcs = cos exp log pow sin sincos + double-vlen4-arch-ext-cflags = -mavx double-vlen4-arch-ext2-cflags = -mavx2 double-vlen8-arch-ext-cflags = -mavx512f @@ -43,11 +203,37 @@ float-vlen8-arch-ext-cflags = -mavx float-vlen8-arch-ext2-cflags = -mavx2 float-vlen16-arch-ext-cflags = -mavx512f -CFLAGS-test-double-vlen4-avx2.c = $(libm-test-vec-cflags) +libmvec-sincos-cflags = $(libm-test-fast-math-cflags) -fno-inline -fopenmp -Wno-unknown-pragmas +libmvec-alias-cflags = $(libmvec-sincos-cflags) -ffloat-store -ffinite-math-only + +CFLAGS-test-double-libmvec-alias-mod.c = $(libmvec-alias-cflags) +CFLAGS-test-double-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX +CFLAGS-test-double-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2 +CFLAGS-test-double-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F + +CFLAGS-test-float-libmvec-alias-mod.c = $(libmvec-alias-cflags) +CFLAGS-test-float-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX +CFLAGS-test-float-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2 +CFLAGS-test-float-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F + CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags) -CFLAGS-test-float-vlen8-avx2.c = $(libm-test-vec-cflags) CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags) +CFLAGS-test-double-libmvec-sincos-main.c = $(libmvec-sincos-cflags) +CFLAGS-test-double-libmvec-sincos-avx.c = -DREQUIRE_AVX +CFLAGS-test-double-libmvec-sincos-avx-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext-cflags) +CFLAGS-test-double-libmvec-sincos-avx2.c = -DREQUIRE_AVX2 +CFLAGS-test-double-libmvec-sincos-avx2-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext2-cflags) +CFLAGS-test-double-libmvec-sincos-avx512.c = -DREQUIRE_AVX512F +CFLAGS-test-double-libmvec-sincos-avx512-main.c = $(libmvec-sincos-cflags) $(double-vlen8-arch-ext-cflags) + +CFLAGS-test-float-libmvec-sincosf-main.c = $(libmvec-sincos-cflags) +CFLAGS-test-float-libmvec-sincosf-avx.c = -DREQUIRE_AVX +CFLAGS-test-float-libmvec-sincosf-avx-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext-cflags) +CFLAGS-test-float-libmvec-sincosf-avx2.c = -DREQUIRE_AVX2 +CFLAGS-test-float-libmvec-sincosf-avx2-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext2-cflags) +CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F +CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags) endif endif diff --git a/sysdeps/x86_64/fpu/dla.h b/sysdeps/x86_64/fpu/dla.h deleted file mode 100644 index 688efa0f5b..0000000000 --- a/sysdeps/x86_64/fpu/dla.h +++ /dev/null @@ -1,8 +0,0 @@ -#include <features.h> - -#ifdef __FMA4__ -# define DLA_FMS(x,y,z) \ - __builtin_fma (x, y, -(z)) -#endif - -#include "sysdeps/ieee754/dbl-64/dla.h" diff --git a/sysdeps/x86_64/fpu/e_expf.S b/sysdeps/x86_64/fpu/e_expf.S deleted file mode 100644 index d4b63a8d8e..0000000000 --- a/sysdeps/x86_64/fpu/e_expf.S +++ /dev/null @@ -1,339 +0,0 @@ -/* Optimized __ieee754_expf function. - Copyright (C) 2012-2016 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - -/* Short algorithm description: - * - * Let K = 64 (table size). - * e^x = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y)) - * where - * x = m*log(2)/K + y, y in [0.0..log(2)/K] - * m = n*K + j, m,n,j - signed integer, j in [0..K-1] - * values of 2^(j/K) are tabulated as T[j]. - * - * P(y) is a minimax polynomial approximation of expf(x)-1 - * on small interval [0.0..log(2)/K]. - * - * P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as - * z = y*y; P(y) = (P3*z + P1)*z + (P2*z + P0)*y - * - * Special cases: - * expf(NaN) = NaN - * expf(+INF) = +INF - * expf(-INF) = 0 - * expf(x) = 1 for subnormals - * for finite argument, only expf(0)=1 is exact - * expf(x) overflows if x>88.7228317260742190 - * expf(x) underflows if x<-103.972076416015620 - */ - - .text -ENTRY(__ieee754_expf) - /* Input: single precision x in %xmm0 */ - cvtss2sd %xmm0, %xmm1 /* Convert x to double precision */ - movd %xmm0, %ecx /* Copy x */ - movsd L(DP_KLN2)(%rip), %xmm2 /* DP K/log(2) */ - movsd L(DP_P2)(%rip), %xmm3 /* DP P2 */ - movl %ecx, %eax /* x */ - mulsd %xmm1, %xmm2 /* DP x*K/log(2) */ - andl $0x7fffffff, %ecx /* |x| */ - lea L(DP_T)(%rip), %rsi /* address of table T[j] */ - cmpl $0x42ad496b, %ecx /* |x|<125*log(2) ? */ - movsd L(DP_P3)(%rip), %xmm4 /* DP P3 */ - addsd L(DP_RS)(%rip), %xmm2 /* DP x*K/log(2)+RS */ - jae L(special_paths) - - /* Here if |x|<125*log(2) */ - cmpl $0x31800000, %ecx /* |x|<2^(-28) ? */ - jb L(small_arg) - - /* Main path: here if 2^(-28)<=|x|<125*log(2) */ - cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */ - movd %xmm2, %eax /* bits of n*K+j with trash */ - subss L(SP_RS)(%rip), %xmm2 /* SP t=round(x*K/log(2)) */ - movl %eax, %edx /* n*K+j with trash */ - cvtss2sd %xmm2, %xmm2 /* DP t */ - andl $0x3f, %eax /* bits of j */ - mulsd L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */ - andl $0xffffffc0, %edx /* bits of n */ -#ifdef __AVX__ - vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */ - vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */ -#else - addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */ - movaps %xmm2, %xmm0 /* DP y */ - mulsd %xmm2, %xmm2 /* DP z=y*y */ -#endif - mulsd %xmm2, %xmm4 /* DP P3*z */ - addl $0x1fc0, %edx /* bits of n + SP exponent bias */ - mulsd %xmm2, %xmm3 /* DP P2*z */ - shll $17, %edx /* SP 2^n */ - addsd L(DP_P1)(%rip), %xmm4 /* DP P3*z+P1 */ - addsd L(DP_P0)(%rip), %xmm3 /* DP P2*z+P0 */ - movd %edx, %xmm1 /* SP 2^n */ - mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */ - mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */ - addsd %xmm4, %xmm0 /* DP P(y) */ - mulsd (%rsi,%rax,8), %xmm0 /* DP P(y)*T[j] */ - addsd (%rsi,%rax,8), %xmm0 /* DP T[j]*(P(y)+1) */ - cvtsd2ss %xmm0, %xmm0 /* SP T[j]*(P(y)+1) */ - mulss %xmm1, %xmm0 /* SP result=2^n*(T[j]*(P(y)+1)) */ - ret - - .p2align 4 -L(small_arg): - /* Here if 0<=|x|<2^(-28) */ - addss L(SP_ONE)(%rip), %xmm0 /* 1.0 + x */ - /* Return 1.0 with inexact raised, except for x==0 */ - ret - - .p2align 4 -L(special_paths): - /* Here if 125*log(2)<=|x| */ - shrl $31, %eax /* Get sign bit of x, and depending on it: */ - lea L(SP_RANGE)(%rip), %rdx /* load over/underflow bound */ - cmpl (%rdx,%rax,4), %ecx /* |x|<under/overflow bound ? */ - jbe L(near_under_or_overflow) - - /* Here if |x|>under/overflow bound */ - cmpl $0x7f800000, %ecx /* |x| is finite ? */ - jae L(arg_inf_or_nan) - - /* Here if |x|>under/overflow bound, and x is finite */ - testq %rax, %rax /* sign of x nonzero ? */ - je L(res_overflow) - - /* Here if -inf<x<underflow bound (x<0) */ - movss L(SP_SMALL)(%rip), %xmm0/* load small value 2^(-100) */ - mulss %xmm0, %xmm0 /* Return underflowed result (zero or subnormal) */ - ret - - .p2align 4 -L(res_overflow): - /* Here if overflow bound<x<inf (x>0) */ - movss L(SP_LARGE)(%rip), %xmm0/* load large value 2^100 */ - mulss %xmm0, %xmm0 /* Return overflowed result (Inf or max normal) */ - ret - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(arg_nan) /* |x| is Inf ? */ - - /* Here if |x| is Inf */ - lea L(SP_INF_0)(%rip), %rdx /* depending on sign of x: */ - movss (%rdx,%rax,4), %xmm0 /* return zero or Inf */ - ret - - .p2align 4 -L(arg_nan): - /* Here if |x| is NaN */ - addss %xmm0, %xmm0 /* Return x+x (raise invalid) */ - ret - - .p2align 4 -L(near_under_or_overflow): - /* Here if 125*log(2)<=|x|<under/overflow bound */ - cvtsd2ss %xmm2, %xmm2 /* SP x*K/log(2)+RS */ - movd %xmm2, %eax /* bits of n*K+j with trash */ - subss L(SP_RS)(%rip), %xmm2 /* SP t=round(x*K/log(2)) */ - movl %eax, %edx /* n*K+j with trash */ - cvtss2sd %xmm2, %xmm2 /* DP t */ - andl $0x3f, %eax /* bits of j */ - mulsd L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */ - andl $0xffffffc0, %edx /* bits of n */ -#ifdef __AVX__ - vaddsd %xmm1, %xmm2, %xmm0 /* DP y=x-t*log(2)/K */ - vmulsd %xmm0, %xmm0, %xmm2 /* DP z=y*y */ -#else - addsd %xmm1, %xmm2 /* DP y=x-t*log(2)/K */ - movaps %xmm2, %xmm0 /* DP y */ - mulsd %xmm2, %xmm2 /* DP z=y*y */ -#endif - mulsd %xmm2, %xmm4 /* DP P3*z */ - addl $0xffc0, %edx /* bits of n + DP exponent bias */ - mulsd %xmm2, %xmm3 /* DP P2*z */ - shlq $46, %rdx /* DP 2^n */ - addsd L(DP_P1)(%rip), %xmm4 /* DP P3*z+P1 */ - addsd L(DP_P0)(%rip), %xmm3 /* DP P2*z+P0 */ - movd %rdx, %xmm1 /* DP 2^n */ - mulsd %xmm2, %xmm4 /* DP (P3*z+P1)*z */ - mulsd %xmm3, %xmm0 /* DP (P2*z+P0)*y */ - addsd %xmm4, %xmm0 /* DP P(y) */ - mulsd (%rsi,%rax,8), %xmm0 /* DP P(y)*T[j] */ - addsd (%rsi,%rax,8), %xmm0 /* DP T[j]*(P(y)+1) */ - mulsd %xmm1, %xmm0 /* DP result=2^n*(T[j]*(P(y)+1)) */ - cvtsd2ss %xmm0, %xmm0 /* convert result to single precision */ - ret -END(__ieee754_expf) - - .section .rodata, "a" - .p2align 3 -L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */ - .long 0x00000000, 0x3ff00000 - .long 0x3e778061, 0x3ff02c9a - .long 0xd3158574, 0x3ff059b0 - .long 0x18759bc8, 0x3ff08745 - .long 0x6cf9890f, 0x3ff0b558 - .long 0x32d3d1a2, 0x3ff0e3ec - .long 0xd0125b51, 0x3ff11301 - .long 0xaea92de0, 0x3ff1429a - .long 0x3c7d517b, 0x3ff172b8 - .long 0xeb6fcb75, 0x3ff1a35b - .long 0x3168b9aa, 0x3ff1d487 - .long 0x88628cd6, 0x3ff2063b - .long 0x6e756238, 0x3ff2387a - .long 0x65e27cdd, 0x3ff26b45 - .long 0xf51fdee1, 0x3ff29e9d - .long 0xa6e4030b, 0x3ff2d285 - .long 0x0a31b715, 0x3ff306fe - .long 0xb26416ff, 0x3ff33c08 - .long 0x373aa9cb, 0x3ff371a7 - .long 0x34e59ff7, 0x3ff3a7db - .long 0x4c123422, 0x3ff3dea6 - .long 0x21f72e2a, 0x3ff4160a - .long 0x6061892d, 0x3ff44e08 - .long 0xb5c13cd0, 0x3ff486a2 - .long 0xd5362a27, 0x3ff4bfda - .long 0x769d2ca7, 0x3ff4f9b2 - .long 0x569d4f82, 0x3ff5342b - .long 0x36b527da, 0x3ff56f47 - .long 0xdd485429, 0x3ff5ab07 - .long 0x15ad2148, 0x3ff5e76f - .long 0xb03a5585, 0x3ff6247e - .long 0x82552225, 0x3ff66238 - .long 0x667f3bcd, 0x3ff6a09e - .long 0x3c651a2f, 0x3ff6dfb2 - .long 0xe8ec5f74, 0x3ff71f75 - .long 0x564267c9, 0x3ff75feb - .long 0x73eb0187, 0x3ff7a114 - .long 0x36cf4e62, 0x3ff7e2f3 - .long 0x994cce13, 0x3ff82589 - .long 0x9b4492ed, 0x3ff868d9 - .long 0x422aa0db, 0x3ff8ace5 - .long 0x99157736, 0x3ff8f1ae - .long 0xb0cdc5e5, 0x3ff93737 - .long 0x9fde4e50, 0x3ff97d82 - .long 0x82a3f090, 0x3ff9c491 - .long 0x7b5de565, 0x3ffa0c66 - .long 0xb23e255d, 0x3ffa5503 - .long 0x5579fdbf, 0x3ffa9e6b - .long 0x995ad3ad, 0x3ffae89f - .long 0xb84f15fb, 0x3ffb33a2 - .long 0xf2fb5e47, 0x3ffb7f76 - .long 0x904bc1d2, 0x3ffbcc1e - .long 0xdd85529c, 0x3ffc199b - .long 0x2e57d14b, 0x3ffc67f1 - .long 0xdcef9069, 0x3ffcb720 - .long 0x4a07897c, 0x3ffd072d - .long 0xdcfba487, 0x3ffd5818 - .long 0x03db3285, 0x3ffda9e6 - .long 0x337b9b5f, 0x3ffdfc97 - .long 0xe78b3ff6, 0x3ffe502e - .long 0xa2a490da, 0x3ffea4af - .long 0xee615a27, 0x3ffefa1b - .long 0x5b6e4540, 0x3fff5076 - .long 0x819e90d8, 0x3fffa7c1 - .type L(DP_T), @object - ASM_SIZE_DIRECTIVE(L(DP_T)) - - .section .rodata.cst8,"aM",@progbits,8 - .p2align 3 -L(DP_KLN2): /* double precision K/log(2) */ - .long 0x652b82fe, 0x40571547 - .type L(DP_KLN2), @object - ASM_SIZE_DIRECTIVE(L(DP_KLN2)) - - .p2align 3 -L(DP_NLN2K): /* double precision -log(2)/K */ - .long 0xfefa39ef, 0xbf862e42 - .type L(DP_NLN2K), @object - ASM_SIZE_DIRECTIVE(L(DP_NLN2K)) - - .p2align 3 -L(DP_RS): /* double precision 2^23+2^22 */ - .long 0x00000000, 0x41680000 - .type L(DP_RS), @object - ASM_SIZE_DIRECTIVE(L(DP_RS)) - - .p2align 3 -L(DP_P3): /* double precision polynomial coefficient P3 */ - .long 0xeb78fa85, 0x3fa56420 - .type L(DP_P3), @object - ASM_SIZE_DIRECTIVE(L(DP_P3)) - - .p2align 3 -L(DP_P1): /* double precision polynomial coefficient P1 */ - .long 0x008d6118, 0x3fe00000 - .type L(DP_P1), @object - ASM_SIZE_DIRECTIVE(L(DP_P1)) - - .p2align 3 -L(DP_P2): /* double precision polynomial coefficient P2 */ - .long 0xda752d4f, 0x3fc55550 - .type L(DP_P2), @object - ASM_SIZE_DIRECTIVE(L(DP_P2)) - - .p2align 3 -L(DP_P0): /* double precision polynomial coefficient P0 */ - .long 0xffffe7c6, 0x3fefffff - .type L(DP_P0), @object - ASM_SIZE_DIRECTIVE(L(DP_P0)) - - .p2align 2 -L(SP_RANGE): /* single precision overflow/underflow bounds */ - .long 0x42b17217 /* if x>this bound, then result overflows */ - .long 0x42cff1b4 /* if x<this bound, then result underflows */ - .type L(SP_RANGE), @object - ASM_SIZE_DIRECTIVE(L(SP_RANGE)) - - .p2align 2 -L(SP_INF_0): - .long 0x7f800000 /* single precision Inf */ - .long 0 /* single precision zero */ - .type L(SP_INF_0), @object - ASM_SIZE_DIRECTIVE(L(SP_INF_0)) - - .section .rodata.cst4,"aM",@progbits,4 - .p2align 2 -L(SP_RS): /* single precision 2^23+2^22 */ - .long 0x4b400000 - .type L(SP_RS), @object - ASM_SIZE_DIRECTIVE(L(SP_RS)) - - .p2align 2 -L(SP_SMALL): /* single precision small value 2^(-100) */ - .long 0x0d800000 - .type L(SP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(SP_SMALL)) - - .p2align 2 -L(SP_LARGE): /* single precision large value 2^100 */ - .long 0x71800000 - .type L(SP_LARGE), @object - ASM_SIZE_DIRECTIVE(L(SP_LARGE)) - - .p2align 2 -L(SP_ONE): /* single precision 1.0 */ - .long 0x3f800000 - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -strong_alias (__ieee754_expf, __expf_finite) diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S index 8b3ddaec59..b75a103803 100644 --- a/sysdeps/x86_64/fpu/e_expl.S +++ b/sysdeps/x86_64/fpu/e_expl.S @@ -22,6 +22,7 @@ * -- moshier@na-net.ornl.gov */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> #include <x86_64-math-asm.h> @@ -99,7 +100,7 @@ ENTRY(IEEE754_EXPL) /* Below -64.0 (may be -NaN or -Inf). */ andb %ah, %dh cmpb $0x01, %dh - je 2f /* Is +-NaN, jump. */ + je 6f /* Is +-NaN, jump. */ jmp 1f /* -large, possibly -Inf. */ 4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ @@ -141,7 +142,7 @@ ENTRY(IEEE754_EXPL) cmpb $0x05, %dh je 1f /* Is +-Inf, jump. */ cmpb $0x01, %dh - je 2f /* Is +-NaN, jump. */ + je 6f /* Is +-NaN, jump. */ /* Overflow or underflow; saturate. */ fstp %st fldt MO(csat) @@ -207,10 +208,13 @@ ENTRY(IEEE754_EXPL) fldz /* Set result to 0. */ #endif 2: ret +6: /* NaN argument. */ + fadd %st + ret END(IEEE754_EXPL) #ifdef USE_AS_EXPM1L libm_hidden_def (__expm1l) -weak_alias (__expm1l, expm1l) +libm_alias_ldouble (__expm1, expm1) #else strong_alias (IEEE754_EXPL, EXPL_FINITE) #endif diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S index 8fa61644c1..e0cb88e32e 100644 --- a/sysdeps/x86_64/fpu/e_log10l.S +++ b/sysdeps/x86_64/fpu/e_log10l.S @@ -64,6 +64,7 @@ ENTRY(__ieee754_log10l) jnz 4b // in case x is ±Inf fstp %st(1) fstp %st(1) + fadd %st(0) ret END(__ieee754_log10l) diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S index a063255ddd..023ec29164 100644 --- a/sysdeps/x86_64/fpu/e_log2l.S +++ b/sysdeps/x86_64/fpu/e_log2l.S @@ -63,6 +63,7 @@ ENTRY(__ieee754_log2l) jnz 4b // in case x is ±Inf fstp %st(1) fstp %st(1) + fadd %st(0) ret END (__ieee754_log2l) diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S index dbe6fd59dc..0d3576f48b 100644 --- a/sysdeps/x86_64/fpu/e_logl.S +++ b/sysdeps/x86_64/fpu/e_logl.S @@ -66,6 +66,7 @@ ENTRY(__ieee754_logl) jnz 4b // in case x is +-Inf fstp %st(1) fstp %st(1) + fadd %st(0) ret END (__ieee754_logl) diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S index 1f68cf0102..f32228104e 100644 --- a/sysdeps/x86_64/fpu/e_powl.S +++ b/sysdeps/x86_64/fpu/e_powl.S @@ -1,5 +1,5 @@ /* ix87 specific implementation of pow function. - Copyright (C) 1996-2016 Free Software Foundation, Inc. + Copyright (C) 1996-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996. @@ -26,9 +26,9 @@ .type one,@object one: .double 1.0 ASM_SIZE_DIRECTIVE(one) - .type p3,@object -p3: .byte 0, 0, 0, 0, 0, 0, 0x20, 0x40 - ASM_SIZE_DIRECTIVE(p3) + .type p2,@object +p2: .byte 0, 0, 0, 0, 0, 0, 0x10, 0x40 + ASM_SIZE_DIRECTIVE(p2) .type p63,@object p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 ASM_SIZE_DIRECTIVE(p63) @@ -136,12 +136,12 @@ ENTRY(__ieee754_powl) jmp 3f 9: /* OK, we have an integer value for y. Unless very small - (we use < 8), use the algorithm for real exponent to avoid + (we use < 4), use the algorithm for real exponent to avoid accumulation of errors. */ - fldl MO(p3) // 8 : y : x - fld %st(1) // y : 8 : y : x - fabs // |y| : 8 : y : x - fcomip %st(1), %st // 8 : y : x + fldl MO(p2) // 4 : y : x + fld %st(1) // y : 4 : y : x + fabs // |y| : 4 : y : x + fcomip %st(1), %st // 4 : y : x fstp %st(0) // y : x jnc 3f mov -8(%rsp),%eax @@ -184,9 +184,15 @@ ENTRY(__ieee754_powl) 30: fldt 8(%rsp) // x : y fldl MO(one) // 1.0 : x : y fucomip %st(1),%st // x : y - je 31f - fxch // y : x -31: fstp %st(1) + je 32f +31: /* At least one argument NaN, and result should be NaN. */ + faddp + ret +32: jc 31b + /* pow (1, NaN); check if the NaN signaling. */ + testb $0x40, 31(%rsp) + jz 31b + fstp %st(1) ret .align ALIGNARG(4) @@ -217,12 +223,24 @@ ENTRY(__ieee754_powl) cfi_adjust_cfa_offset (-40) ret - // pow(x,±0) = 1 + // pow(x,±0) = 1, unless x is sNaN .align ALIGNARG(4) 11: fstp %st(0) // pop y + fldt 8(%rsp) // x + fxam + fnstsw + andb $0x45, %ah + cmpb $0x01, %ah + je 112f // x is NaN +111: fstp %st(0) fldl MO(one) ret +112: testb $0x40, 15(%rsp) + jnz 111b + fadd %st(0) + ret + // y == ±inf .align ALIGNARG(4) 12: fstp %st(0) // pop y @@ -255,6 +273,7 @@ ENTRY(__ieee754_powl) .align ALIGNARG(4) 13: fldt 8(%rsp) // load x == NaN + fadd %st(0) ret .align ALIGNARG(4) diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S index 331bee580c..2982dc3b9e 100644 --- a/sysdeps/x86_64/fpu/e_scalbl.S +++ b/sysdeps/x86_64/fpu/e_scalbl.S @@ -44,7 +44,7 @@ ENTRY(__ieee754_scalbl) fnstsw andl $0x4500, %eax cmpl $0x0100, %eax - je 3f + je 2f fld %st(1) frndint fcomip %st(2), %st @@ -75,15 +75,8 @@ ENTRY(__ieee754_scalbl) #endif ret - /* The result is NaN, but we must not raise an exception. - So use a variable. */ -2: fstp %st - fstp %st - fldl MO(nan) - ret - - /* The first parameter is a NaN. Return it. */ -3: fstp %st(1) + /* The result is NaN; raise an exception for sNaN arguments. */ +2: faddp ret /* Return NaN and raise the invalid exception. */ diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c index 4b86434913..f4c2e5fd7c 100644 --- a/sysdeps/x86_64/fpu/e_sqrt.c +++ b/sysdeps/x86_64/fpu/e_sqrt.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c index 639137b735..8f76ccb530 100644 --- a/sysdeps/x86_64/fpu/e_sqrtf.c +++ b/sysdeps/x86_64/fpu/e_sqrtf.c @@ -1,5 +1,5 @@ /* Square root of floating point number. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c index a8e00c0141..e7f6aa341f 100644 --- a/sysdeps/x86_64/fpu/fclrexcpt.c +++ b/sysdeps/x86_64/fpu/fclrexcpt.c @@ -1,5 +1,5 @@ /* Clear given exceptions in current floating-point environment. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c index f1ea6cfa97..9153f997ed 100644 --- a/sysdeps/x86_64/fpu/fedisblxcpt.c +++ b/sysdeps/x86_64/fpu/fedisblxcpt.c @@ -1,5 +1,5 @@ /* Disable floating-point exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c index df4c628b8d..7a3e26b3f9 100644 --- a/sysdeps/x86_64/fpu/feenablxcpt.c +++ b/sysdeps/x86_64/fpu/feenablxcpt.c @@ -1,5 +1,5 @@ /* Enable floating-point exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c index a28efb36f3..9461af7575 100644 --- a/sysdeps/x86_64/fpu/fegetenv.c +++ b/sysdeps/x86_64/fpu/fegetenv.c @@ -1,5 +1,5 @@ /* Store current floating-point environment. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fegetexcept.c b/sysdeps/x86_64/fpu/fegetexcept.c index 8acd0382a0..ce54c251ba 100644 --- a/sysdeps/x86_64/fpu/fegetexcept.c +++ b/sysdeps/x86_64/fpu/fegetexcept.c @@ -1,5 +1,5 @@ /* Get enabled floating-point exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2001. diff --git a/sysdeps/x86_64/fpu/fegetmode.c b/sysdeps/x86_64/fpu/fegetmode.c new file mode 100644 index 0000000000..cc4f12649b --- /dev/null +++ b/sysdeps/x86_64/fpu/fegetmode.c @@ -0,0 +1,28 @@ +/* Store current floating-point control modes. x86_64 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +int +fegetmode (femode_t *modep) +{ + _FPU_GETCW (modep->__control_word); + __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr)); + return 0; +} diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c index 296d366560..0f31cafedd 100644 --- a/sysdeps/x86_64/fpu/fegetround.c +++ b/sysdeps/x86_64/fpu/fegetround.c @@ -1,5 +1,5 @@ /* Return current rounding direction. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c index a040c3dea5..dec689beb2 100644 --- a/sysdeps/x86_64/fpu/feholdexcpt.c +++ b/sysdeps/x86_64/fpu/feholdexcpt.c @@ -1,5 +1,5 @@ /* Store current floating-point environment and clear exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c index 355d02aaa6..c12dba5101 100644 --- a/sysdeps/x86_64/fpu/fesetenv.c +++ b/sysdeps/x86_64/fpu/fesetenv.c @@ -1,5 +1,5 @@ /* Install given floating-point environment. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/fesetexcept.c index c7a3dff747..122a7629dc 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2.c +++ b/sysdeps/x86_64/fpu/fesetexcept.c @@ -1,5 +1,5 @@ -/* Tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* Set given exception flags. x86_64 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,16 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-double-vlen2.h" +#include <fenv.h> -#define TEST_VECTOR_cos 1 -#define TEST_VECTOR_sin 1 -#define TEST_VECTOR_sincos 1 -#define TEST_VECTOR_log 1 -#define TEST_VECTOR_exp 1 -#define TEST_VECTOR_pow 1 +int +fesetexcept (int excepts) +{ + unsigned int mxcsr; -#include "libm-test.c" + __asm__ ("stmxcsr %0" : "=m" (*&mxcsr)); + mxcsr |= excepts & FE_ALL_EXCEPT; + __asm__ ("ldmxcsr %0" : : "m" (*&mxcsr)); + + return 0; +} diff --git a/sysdeps/x86_64/fpu/fesetmode.c b/sysdeps/x86_64/fpu/fesetmode.c new file mode 100644 index 0000000000..0771e4c10a --- /dev/null +++ b/sysdeps/x86_64/fpu/fesetmode.c @@ -0,0 +1,50 @@ +/* Install given floating-point control modes. x86_64 version. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fenv.h> +#include <fpu_control.h> + +/* All exceptions, including the x86-specific "denormal operand" + exception. */ +#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM) + +int +fesetmode (const femode_t *modep) +{ + fpu_control_t cw; + unsigned int mxcsr; + __asm__ ("stmxcsr %0" : "=m" (mxcsr)); + /* Preserve SSE exception flags but restore other state in + MXCSR. */ + mxcsr &= FE_ALL_EXCEPT_X86; + if (modep == FE_DFL_MODE) + { + cw = _FPU_DEFAULT; + /* Default MXCSR state has all bits zero except for those + masking exceptions. */ + mxcsr |= FE_ALL_EXCEPT_X86 << 7; + } + else + { + cw = modep->__control_word; + mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86; + } + _FPU_SETCW (cw); + __asm__ ("ldmxcsr %0" : : "m" (mxcsr)); + return 0; +} diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c index 475d63f4db..e5afc1d57a 100644 --- a/sysdeps/x86_64/fpu/fesetround.c +++ b/sysdeps/x86_64/fpu/fesetround.c @@ -1,5 +1,5 @@ /* Set current rounding direction. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c index f035d57ca8..00da535e64 100644 --- a/sysdeps/x86_64/fpu/feupdateenv.c +++ b/sysdeps/x86_64/fpu/feupdateenv.c @@ -1,5 +1,5 @@ /* Install given floating-point environment and raise exceptions. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c index 938cf3e62b..16719ceb5e 100644 --- a/sysdeps/x86_64/fpu/fgetexcptflg.c +++ b/sysdeps/x86_64/fpu/fgetexcptflg.c @@ -1,5 +1,5 @@ /* Store current representation for exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c index e2abbbec33..ca1c223053 100644 --- a/sysdeps/x86_64/fpu/fraiseexcpt.c +++ b/sysdeps/x86_64/fpu/fraiseexcpt.c @@ -1,5 +1,5 @@ /* Raise given exceptions. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c index 76f7bad9a8..821dd9d786 100644 --- a/sysdeps/x86_64/fpu/fsetexcptflg.c +++ b/sysdeps/x86_64/fpu/fsetexcptflg.c @@ -1,5 +1,5 @@ /* Set floating-point environment exception handling. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c index c8f2c01c67..63167c68df 100644 --- a/sysdeps/x86_64/fpu/ftestexcept.c +++ b/sysdeps/x86_64/fpu/ftestexcept.c @@ -1,5 +1,5 @@ /* Test exception in current environment. - Copyright (C) 2001-2016 Free Software Foundation, Inc. + Copyright (C) 2001-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/k_rem_pio2l.c b/sysdeps/x86_64/fpu/k_rem_pio2l.c deleted file mode 100644 index eea55a98d2..0000000000 --- a/sysdeps/x86_64/fpu/k_rem_pio2l.c +++ /dev/null @@ -1 +0,0 @@ -/* Not needed. */ diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 445b47527d..912db318b6 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -3,1015 +3,1293 @@ # Maximal error of functions: Function: "acos": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "acos_downward": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "acos_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "acos_upward": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "acosh": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "acosh_downward": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: "acosh_towardzero": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: "acosh_upward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: "asin": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "asin_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "asin_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "asin_upward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "asinh": double: 1 float: 1 +float128: 3 idouble: 1 ifloat: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "asinh_downward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "asinh_towardzero": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: "asinh_upward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "atan": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "atan2": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "atan2_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "atan2_towardzero": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: "atan2_upward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "atan_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "atan_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "atan_upward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "atanh": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "atanh_downward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "atanh_towardzero": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: "atanh_upward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "cabs": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cabs_downward": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cabs_towardzero": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cabs_upward": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "cacos": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Imaginary part of "cacos": -double: 1 +double: 2 float: 2 -idouble: 1 +float128: 2 +idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "cacos_downward": -double: 2 +double: 3 float: 2 -idouble: 2 +float128: 3 +idouble: 3 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "cacos_downward": double: 5 float: 3 +float128: 6 idouble: 5 ifloat: 3 -ildouble: 5 -ldouble: 5 +ifloat128: 6 +ildouble: 6 +ldouble: 6 Function: Real part of "cacos_towardzero": -double: 2 +double: 3 float: 2 -idouble: 2 +float128: 3 +idouble: 3 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "cacos_towardzero": double: 5 float: 3 +float128: 5 idouble: 5 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: Real part of "cacos_upward": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "cacos_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 +double: 5 +float: 7 +float128: 7 +idouble: 5 +ifloat: 7 +ifloat128: 7 +ildouble: 7 +ldouble: 7 Function: Real part of "cacosh": -double: 1 +double: 2 float: 2 -idouble: 1 +float128: 2 +idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Imaginary part of "cacosh": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "cacosh_downward": double: 5 float: 3 +float128: 5 idouble: 5 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: Imaginary part of "cacosh_downward": -double: 2 -float: 2 -idouble: 2 -ifloat: 2 -ildouble: 2 -ldouble: 2 +double: 3 +float: 3 +float128: 4 +idouble: 3 +ifloat: 3 +ifloat128: 4 +ildouble: 3 +ldouble: 3 Function: Real part of "cacosh_towardzero": double: 5 float: 3 +float128: 5 idouble: 5 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: Imaginary part of "cacosh_towardzero": -double: 2 +double: 3 float: 2 -idouble: 2 +float128: 3 +idouble: 3 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Real part of "cacosh_upward": double: 4 float: 4 +float128: 6 idouble: 4 ifloat: 4 +ifloat128: 6 ildouble: 5 ldouble: 5 Function: Imaginary part of "cacosh_upward": -double: 2 +double: 3 float: 2 -idouble: 2 +float128: 4 +idouble: 3 ifloat: 2 -ildouble: 2 -ldouble: 2 +ifloat128: 4 +ildouble: 3 +ldouble: 3 Function: "carg": float: 1 +float128: 2 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "carg_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "carg_towardzero": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: "carg_upward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "casin": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Imaginary part of "casin": -double: 1 +double: 2 float: 2 -idouble: 1 +float128: 2 +idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "casin_downward": double: 3 -float: 1 +float: 2 +float128: 3 idouble: 3 -ifloat: 1 -ildouble: 2 -ldouble: 2 +ifloat: 2 +ifloat128: 3 +ildouble: 3 +ldouble: 3 Function: Imaginary part of "casin_downward": double: 5 float: 3 +float128: 6 idouble: 5 ifloat: 3 -ildouble: 5 -ldouble: 5 +ifloat128: 6 +ildouble: 6 +ldouble: 6 Function: Real part of "casin_towardzero": double: 3 float: 1 +float128: 3 idouble: 3 ifloat: 1 -ildouble: 2 -ldouble: 2 +ifloat128: 3 +ildouble: 3 +ldouble: 3 Function: Imaginary part of "casin_towardzero": double: 5 float: 3 +float128: 5 idouble: 5 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: Real part of "casin_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 +double: 3 +float: 2 +float128: 3 +idouble: 3 +ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "casin_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 +double: 5 +float: 7 +float128: 7 +idouble: 5 +ifloat: 7 +ifloat128: 7 +ildouble: 7 +ldouble: 7 Function: Real part of "casinh": -double: 1 +double: 2 float: 2 -idouble: 1 +float128: 2 +idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Imaginary part of "casinh": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "casinh_downward": double: 5 float: 3 +float128: 6 idouble: 5 ifloat: 3 -ildouble: 5 -ldouble: 5 +ifloat128: 6 +ildouble: 6 +ldouble: 6 Function: Imaginary part of "casinh_downward": double: 3 -float: 1 +float: 2 +float128: 3 idouble: 3 -ifloat: 1 -ildouble: 2 -ldouble: 2 +ifloat: 2 +ifloat128: 3 +ildouble: 3 +ldouble: 3 Function: Real part of "casinh_towardzero": double: 5 float: 3 +float128: 5 idouble: 5 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: Imaginary part of "casinh_towardzero": double: 3 float: 1 +float128: 3 idouble: 3 ifloat: 1 -ildouble: 2 -ldouble: 2 +ifloat128: 3 +ildouble: 3 +ldouble: 3 Function: Real part of "casinh_upward": -double: 4 -float: 4 -idouble: 4 -ifloat: 4 -ildouble: 5 -ldouble: 5 +double: 5 +float: 7 +float128: 7 +idouble: 5 +ifloat: 7 +ifloat128: 7 +ildouble: 7 +ldouble: 7 Function: Imaginary part of "casinh_upward": -double: 2 +double: 3 float: 2 -idouble: 2 +float128: 3 +idouble: 3 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Real part of "catan": +double: 1 float: 1 +float128: 1 +idouble: 1 ifloat: 1 +ifloat128: 1 +ildouble: 1 +ldouble: 1 Function: Imaginary part of "catan": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "catan_downward": double: 1 -float: 1 +float: 2 +float128: 2 idouble: 1 -ifloat: 1 +ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Imaginary part of "catan_downward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: Real part of "catan_towardzero": double: 1 -float: 1 +float: 2 +float128: 2 idouble: 1 -ifloat: 1 +ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Imaginary part of "catan_towardzero": double: 2 -float: 1 +float: 2 +float128: 2 idouble: 2 -ifloat: 1 +ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: Real part of "catan_upward": +double: 1 float: 1 +float128: 2 +idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Imaginary part of "catan_upward": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "catanh": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Imaginary part of "catanh": +double: 1 float: 1 +float128: 1 +idouble: 1 ifloat: 1 +ifloat128: 1 +ildouble: 1 +ldouble: 1 Function: Real part of "catanh_downward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: Imaginary part of "catanh_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "catanh_towardzero": double: 2 -float: 1 +float: 2 +float128: 2 idouble: 2 -ifloat: 1 +ifloat: 2 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: Imaginary part of "catanh_towardzero": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "catanh_upward": double: 4 -float: 3 +float: 4 +float128: 4 idouble: 4 -ifloat: 3 +ifloat: 4 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: Imaginary part of "catanh_upward": +double: 1 float: 1 +float128: 2 +idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "cbrt": double: 3 float: 1 +float128: 1 idouble: 3 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cbrt_downward": double: 4 float: 1 +float128: 1 idouble: 4 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cbrt_towardzero": double: 3 float: 1 +float128: 1 idouble: 3 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cbrt_upward": double: 5 float: 1 +float128: 1 idouble: 5 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "ccos": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Imaginary part of "ccos": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "ccos_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "ccos_downward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "ccos_towardzero": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "ccos_towardzero": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "ccos_upward": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "ccos_upward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "ccosh": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Imaginary part of "ccosh": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "ccosh_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "ccosh_downward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "ccosh_towardzero": double: 1 float: 3 +float128: 2 idouble: 1 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "ccosh_towardzero": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "ccosh_upward": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "ccosh_upward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "cexp": double: 2 float: 1 +float128: 1 idouble: 2 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Imaginary part of "cexp": double: 1 float: 2 +float128: 1 idouble: 1 ifloat: 2 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "cexp_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "cexp_downward": double: 1 float: 3 +float128: 2 idouble: 1 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "cexp_towardzero": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "cexp_towardzero": double: 1 float: 3 +float128: 2 idouble: 1 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "cexp_upward": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "cexp_upward": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "clog": double: 3 float: 3 +float128: 2 idouble: 3 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "clog": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "clog10": double: 3 float: 4 +float128: 2 idouble: 3 ifloat: 4 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: Imaginary part of "clog10": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "clog10_downward": double: 5 -float: 4 +float: 5 +float128: 3 idouble: 5 -ifloat: 4 +ifloat: 5 +ifloat128: 3 ildouble: 8 ldouble: 8 Function: Imaginary part of "clog10_downward": double: 2 float: 4 +float128: 3 idouble: 2 ifloat: 4 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "clog10_towardzero": double: 5 float: 5 +float128: 4 idouble: 5 ifloat: 5 +ifloat128: 4 ildouble: 8 ldouble: 8 Function: Imaginary part of "clog10_towardzero": double: 2 float: 4 +float128: 3 idouble: 2 ifloat: 4 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "clog10_upward": double: 6 float: 5 +float128: 4 idouble: 6 ifloat: 5 +ifloat128: 4 ildouble: 8 ldouble: 8 Function: Imaginary part of "clog10_upward": double: 2 float: 4 +float128: 3 idouble: 2 ifloat: 4 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "clog_downward": double: 4 float: 3 +float128: 3 idouble: 4 ifloat: 3 +ifloat128: 3 ildouble: 5 ldouble: 5 Function: Imaginary part of "clog_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "clog_towardzero": double: 4 float: 4 +float128: 3 idouble: 4 ifloat: 4 +ifloat128: 3 ildouble: 5 ldouble: 5 Function: Imaginary part of "clog_towardzero": double: 1 float: 3 +float128: 2 idouble: 1 ifloat: 3 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "clog_upward": double: 4 float: 3 +float128: 4 idouble: 4 ifloat: 3 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: Imaginary part of "clog_upward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "cos": +double: 1 +float128: 1 +idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "cos_downward": double: 1 +float128: 3 idouble: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "cos_towardzero": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "cos_upward": double: 1 +float128: 2 idouble: 1 +ifloat128: 2 ildouble: 2 ldouble: 2 @@ -1029,7 +1307,7 @@ Function: "cos_vlen4_avx2": double: 2 Function: "cos_vlen8": -double: 1 +double: 2 float: 1 Function: "cos_vlen8_avx2": @@ -1038,546 +1316,690 @@ float: 1 Function: "cosh": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "cosh_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 3 Function: "cosh_towardzero": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "cosh_upward": double: 1 float: 2 +float128: 3 idouble: 1 ifloat: 2 +ifloat128: 1 ildouble: 2 ldouble: 3 Function: Real part of "cpow": double: 2 float: 5 +float128: 4 idouble: 2 ifloat: 5 +ifloat128: 4 ildouble: 3 ldouble: 3 Function: Imaginary part of "cpow": float: 2 +float128: 1 ifloat: 2 +ifloat128: 1 ildouble: 4 ldouble: 4 Function: Real part of "cpow_downward": double: 4 float: 8 +float128: 6 idouble: 4 ifloat: 8 +ifloat128: 6 ildouble: 7 ldouble: 7 Function: Imaginary part of "cpow_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "cpow_towardzero": double: 4 float: 8 +float128: 6 idouble: 4 ifloat: 8 +ifloat128: 6 ildouble: 7 ldouble: 7 Function: Imaginary part of "cpow_towardzero": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: Real part of "cpow_upward": double: 4 float: 1 +float128: 3 idouble: 4 ifloat: 1 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "cpow_upward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "csin": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 +Function: Imaginary part of "csin": +float128: 1 +ifloat128: 1 + Function: Real part of "csin_downward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "csin_downward": double: 1 float: 2 +float128: 2 idouble: 1 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "csin_towardzero": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "csin_towardzero": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "csin_upward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "csin_upward": double: 1 float: 3 +float128: 3 idouble: 1 ifloat: 3 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "csinh": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Imaginary part of "csinh": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: Real part of "csinh_downward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "csinh_downward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "csinh_towardzero": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Imaginary part of "csinh_towardzero": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "csinh_upward": double: 1 float: 3 +float128: 3 idouble: 1 ifloat: 3 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Imaginary part of "csinh_upward": double: 2 float: 3 +float128: 2 idouble: 2 ifloat: 3 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: Real part of "csqrt": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Imaginary part of "csqrt": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: Real part of "csqrt_downward": double: 5 float: 4 +float128: 4 idouble: 5 ifloat: 4 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: Imaginary part of "csqrt_downward": double: 4 float: 3 +float128: 3 idouble: 4 ifloat: 3 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: Real part of "csqrt_towardzero": double: 4 float: 3 +float128: 3 idouble: 4 ifloat: 3 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: Imaginary part of "csqrt_towardzero": double: 4 float: 3 +float128: 3 idouble: 4 ifloat: 3 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: Real part of "csqrt_upward": double: 5 float: 4 +float128: 4 idouble: 5 ifloat: 4 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: Imaginary part of "csqrt_upward": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: Real part of "ctan": double: 1 float: 1 +float128: 3 idouble: 1 ifloat: 1 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Imaginary part of "ctan": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: Real part of "ctan_downward": double: 6 float: 5 +float128: 4 idouble: 6 ifloat: 5 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: Imaginary part of "ctan_downward": double: 2 float: 2 +float128: 5 idouble: 2 ifloat: 2 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: Real part of "ctan_towardzero": double: 5 float: 3 +float128: 4 idouble: 5 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: Imaginary part of "ctan_towardzero": double: 2 float: 2 +float128: 5 idouble: 2 ifloat: 2 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: Real part of "ctan_upward": double: 2 float: 4 +float128: 5 idouble: 2 ifloat: 4 +ifloat128: 5 ildouble: 3 ldouble: 3 Function: Imaginary part of "ctan_upward": double: 2 -float: 1 +float: 2 +float128: 5 idouble: 2 -ifloat: 1 +ifloat: 2 +ifloat128: 5 ildouble: 3 ldouble: 3 Function: Real part of "ctanh": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: Imaginary part of "ctanh": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: Real part of "ctanh_downward": double: 4 float: 2 +float128: 5 idouble: 4 ifloat: 2 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: Imaginary part of "ctanh_downward": double: 6 float: 5 +float128: 4 idouble: 6 ifloat: 5 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: Real part of "ctanh_towardzero": double: 2 float: 2 +float128: 5 idouble: 2 ifloat: 2 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: Imaginary part of "ctanh_towardzero": double: 5 float: 3 +float128: 3 idouble: 5 ifloat: 3 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: Real part of "ctanh_upward": double: 2 float: 2 +float128: 5 idouble: 2 ifloat: 2 +ifloat128: 5 ildouble: 3 ldouble: 3 Function: Imaginary part of "ctanh_upward": double: 2 float: 3 +float128: 5 idouble: 2 ifloat: 3 +ifloat128: 5 ildouble: 3 ldouble: 3 Function: "erf": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "erf_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "erf_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "erf_upward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "erfc": double: 3 float: 2 +float128: 2 idouble: 3 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: "erfc_downward": double: 5 float: 6 +float128: 5 idouble: 5 ifloat: 6 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: "erfc_towardzero": double: 3 float: 4 +float128: 4 idouble: 3 ifloat: 4 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "erfc_upward": double: 5 float: 6 +float128: 5 idouble: 5 ifloat: 6 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: "exp": +float128: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "exp10": double: 2 +float128: 2 idouble: 2 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "exp10_downward": double: 2 float: 1 +float128: 3 idouble: 2 ifloat: 1 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: "exp10_towardzero": double: 2 float: 1 +float128: 3 idouble: 2 ifloat: 1 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: "exp10_upward": double: 2 float: 1 +float128: 3 idouble: 2 ifloat: 1 +ifloat128: 3 ildouble: 2 ldouble: 2 Function: "exp2": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "exp2_downward": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "exp2_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "exp2_upward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "exp_downward": double: 1 +float: 1 idouble: 1 +ifloat: 1 ildouble: 1 ldouble: 1 Function: "exp_towardzero": double: 1 +float: 1 idouble: 1 +ifloat: 1 ildouble: 2 ldouble: 2 @@ -1612,32 +2034,40 @@ float: 1 Function: "expm1": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "expm1_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: "expm1_towardzero": double: 1 float: 2 +float128: 4 idouble: 1 ifloat: 2 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "expm1_upward": double: 1 float: 1 +float128: 3 idouble: 1 ifloat: 1 +ifloat128: 3 ildouble: 4 ldouble: 4 @@ -1675,275 +2105,347 @@ ldouble: 6 Function: "hypot": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "hypot_downward": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "hypot_towardzero": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "hypot_upward": double: 1 +float128: 1 idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "j0": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "j0_downward": double: 2 float: 4 +float128: 4 idouble: 2 ifloat: 4 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "j0_towardzero": double: 3 float: 2 +float128: 2 idouble: 3 ifloat: 2 +ifloat128: 2 ildouble: 5 ldouble: 5 Function: "j0_upward": double: 3 float: 2 +float128: 5 idouble: 3 ifloat: 2 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: "j1": double: 1 float: 2 +float128: 4 idouble: 1 ifloat: 2 +ifloat128: 4 ildouble: 1 ldouble: 1 Function: "j1_downward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "j1_towardzero": double: 3 float: 2 +float128: 4 idouble: 3 ifloat: 2 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "j1_upward": double: 3 float: 5 +float128: 3 idouble: 3 ifloat: 5 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "jn": double: 4 float: 4 +float128: 7 idouble: 4 ifloat: 4 +ifloat128: 7 ildouble: 4 ldouble: 4 Function: "jn_downward": double: 5 float: 5 +float128: 8 idouble: 5 ifloat: 5 +ifloat128: 8 ildouble: 4 ldouble: 4 Function: "jn_towardzero": double: 5 float: 5 +float128: 8 idouble: 5 ifloat: 5 +ifloat128: 8 ildouble: 5 ldouble: 5 Function: "jn_upward": double: 5 float: 5 +float128: 7 idouble: 5 ifloat: 5 +ifloat128: 7 ildouble: 5 ldouble: 5 Function: "lgamma": double: 4 float: 4 +float128: 5 idouble: 4 ifloat: 4 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: "lgamma_downward": double: 5 float: 4 +float128: 8 idouble: 5 ifloat: 4 +ifloat128: 8 ildouble: 7 ldouble: 7 Function: "lgamma_towardzero": double: 5 float: 4 +float128: 5 idouble: 5 ifloat: 4 +ifloat128: 5 ildouble: 7 ldouble: 7 Function: "lgamma_upward": double: 5 float: 5 +float128: 8 idouble: 5 ifloat: 5 +ifloat128: 8 ildouble: 6 ldouble: 6 Function: "log": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "log10": double: 2 float: 2 +float128: 1 idouble: 2 ifloat: 2 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "log10_downward": double: 2 float: 3 +float128: 1 idouble: 2 ifloat: 3 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "log10_towardzero": double: 2 float: 2 +float128: 1 idouble: 2 ifloat: 2 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "log10_upward": double: 2 float: 2 +float128: 1 idouble: 2 ifloat: 2 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "log1p": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "log1p_downward": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: "log1p_towardzero": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: "log1p_upward": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: "log2": double: 2 float: 1 +float128: 2 idouble: 2 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 Function: "log2_downward": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: "log2_towardzero": double: 2 float: 2 +float128: 1 idouble: 2 ifloat: 2 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "log2_upward": double: 3 float: 3 +float128: 1 idouble: 3 ifloat: 3 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "log_downward": float: 2 +float128: 1 ifloat: 2 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "log_towardzero": float: 2 +float128: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "log_upward": double: 1 float: 2 +float128: 1 idouble: 1 ifloat: 2 +ifloat128: 1 ildouble: 1 ldouble: 1 @@ -1965,67 +2467,47 @@ double: 1 float: 3 Function: "log_vlen8_avx2": -float: 2 +float: 3 Function: "pow": +double: 1 float: 1 +float128: 2 +idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 1 ldouble: 1 -Function: "pow10": -double: 2 -idouble: 2 -ildouble: 1 -ldouble: 1 - -Function: "pow10_downward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_towardzero": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - -Function: "pow10_upward": -double: 2 -float: 1 -idouble: 2 -ifloat: 1 -ildouble: 2 -ldouble: 2 - Function: "pow_downward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 +ifloat128: 2 ildouble: 4 ldouble: 4 Function: "pow_towardzero": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 -ildouble: 1 -ldouble: 1 +ifloat128: 2 +ildouble: 4 +ldouble: 4 Function: "pow_upward": double: 1 float: 1 +float128: 2 idouble: 1 ifloat: 1 -ildouble: 2 -ldouble: 2 +ifloat128: 2 +ildouble: 4 +ldouble: 4 Function: "pow_vlen16": float: 3 @@ -2048,24 +2530,34 @@ Function: "pow_vlen8_avx2": float: 3 Function: "sin": +double: 1 +float128: 1 +idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "sin_downward": double: 1 +float128: 3 idouble: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "sin_towardzero": double: 1 +float128: 2 idouble: 1 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "sin_upward": double: 1 +float128: 3 idouble: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 @@ -2090,24 +2582,34 @@ Function: "sin_vlen8_avx2": float: 1 Function: "sincos": +double: 1 +float128: 1 +idouble: 1 +ifloat128: 1 ildouble: 1 ldouble: 1 Function: "sincos_downward": double: 1 +float128: 3 idouble: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "sincos_towardzero": double: 1 +float128: 2 idouble: 1 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "sincos_upward": double: 1 +float128: 3 idouble: 1 +ifloat128: 3 ildouble: 3 ldouble: 3 @@ -2125,7 +2627,7 @@ Function: "sincos_vlen4_avx2": double: 2 Function: "sincos_vlen8": -double: 1 +double: 2 float: 1 Function: "sincos_vlen8_avx2": @@ -2134,222 +2636,278 @@ float: 1 Function: "sinh": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "sinh_downward": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 5 ldouble: 5 Function: "sinh_towardzero": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: "sinh_upward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "tan": float: 1 +float128: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "tan_downward": double: 1 float: 2 +float128: 1 idouble: 1 ifloat: 2 +ifloat128: 1 ildouble: 3 ldouble: 3 Function: "tan_towardzero": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 3 ldouble: 3 Function: "tan_upward": double: 1 float: 1 +float128: 1 idouble: 1 ifloat: 1 +ifloat128: 1 ildouble: 2 ldouble: 2 Function: "tanh": double: 2 float: 2 +float128: 2 idouble: 2 ifloat: 2 +ifloat128: 2 ildouble: 3 ldouble: 3 Function: "tanh_downward": double: 3 float: 3 +float128: 4 idouble: 3 ifloat: 3 +ifloat128: 4 ildouble: 4 ldouble: 4 Function: "tanh_towardzero": double: 2 float: 2 +float128: 3 idouble: 2 ifloat: 2 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "tanh_upward": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 4 ldouble: 4 Function: "tgamma": double: 5 float: 5 +float128: 4 idouble: 5 ifloat: 5 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "tgamma_downward": double: 5 float: 5 +float128: 5 idouble: 5 ifloat: 5 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_towardzero": double: 5 float: 5 +float128: 5 idouble: 5 ifloat: 5 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: "tgamma_upward": double: 5 float: 5 +float128: 4 idouble: 5 ifloat: 5 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "y0": double: 2 float: 1 +float128: 3 idouble: 2 ifloat: 1 +ifloat128: 3 ildouble: 1 ldouble: 1 Function: "y0_downward": double: 3 float: 4 +float128: 4 idouble: 3 ifloat: 4 +ifloat128: 4 ildouble: 5 ldouble: 5 Function: "y0_towardzero": double: 3 float: 3 +float128: 3 idouble: 3 ifloat: 3 +ifloat128: 3 ildouble: 5 ldouble: 5 Function: "y0_upward": double: 3 float: 5 +float128: 3 idouble: 3 ifloat: 5 +ifloat128: 3 ildouble: 3 ldouble: 3 Function: "y1": double: 3 float: 2 +float128: 2 idouble: 3 ifloat: 2 +ifloat128: 2 ildouble: 2 ldouble: 2 Function: "y1_downward": double: 3 float: 2 +float128: 4 idouble: 3 ifloat: 2 +ifloat128: 4 ildouble: 7 ldouble: 7 Function: "y1_towardzero": double: 3 float: 2 +float128: 2 idouble: 3 ifloat: 2 +ifloat128: 2 ildouble: 5 ldouble: 5 Function: "y1_upward": double: 7 float: 2 +float128: 5 idouble: 7 ifloat: 2 +ifloat128: 5 ildouble: 7 ldouble: 7 Function: "yn": double: 3 float: 3 +float128: 5 idouble: 3 ifloat: 3 +ifloat128: 5 ildouble: 4 ldouble: 4 Function: "yn_downward": double: 3 float: 4 +float128: 5 idouble: 3 ifloat: 4 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: "yn_towardzero": double: 3 float: 3 +float128: 5 idouble: 3 ifloat: 3 +ifloat128: 5 ildouble: 5 ldouble: 5 Function: "yn_upward": double: 4 float: 5 +float128: 5 idouble: 4 ifloat: 5 +ifloat128: 5 ildouble: 4 ldouble: 4 diff --git a/sysdeps/x86_64/fpu/libm-test-ulps-name b/sysdeps/x86_64/fpu/libm-test-ulps-name new file mode 100644 index 0000000000..1c09346681 --- /dev/null +++ b/sysdeps/x86_64/fpu/libm-test-ulps-name @@ -0,0 +1 @@ +x86_64 diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h index 867152046e..a5df133292 100644 --- a/sysdeps/x86_64/fpu/math-tests-arch.h +++ b/sysdeps/x86_64/fpu/math-tests-arch.h @@ -1,5 +1,5 @@ /* Runtime architecture check for math tests. x86_64 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,11 +16,11 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#include <cpu-features.h> + #if defined REQUIRE_AVX -# include <init-arch.h> # define INIT_ARCH_EXT - # define CHECK_ARCH_EXT \ do \ { \ @@ -29,10 +29,8 @@ while (0) #elif defined REQUIRE_AVX2 -# include <init-arch.h> # define INIT_ARCH_EXT - # define CHECK_ARCH_EXT \ do \ { \ @@ -41,10 +39,8 @@ while (0) #elif defined REQUIRE_AVX512F -# include <init-arch.h> # define INIT_ARCH_EXT - # define CHECK_ARCH_EXT \ do \ { \ diff --git a/sysdeps/x86_64/fpu/math_ldbl.h b/sysdeps/x86_64/fpu/math_ldbl.h index b9ff8dadaf..27f8fce904 100644 --- a/sysdeps/x86_64/fpu/math_ldbl.h +++ b/sysdeps/x86_64/fpu/math_ldbl.h @@ -1,6 +1,25 @@ -#ifndef _MATH_PRIVATE_H_ -#error "Never use <math_ldbl.h> directly; include <math_private.h> instead." -#endif +/* Manipulation of the bit representation of 'long double' quantities. + Copyright (C) 2001-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MATH_LDBL_H_ +#define _MATH_LDBL_H_ 1 + +#include <stdint.h> /* A union which permits us to convert between a long double and three 32 bit ints. */ @@ -10,8 +29,8 @@ typedef union long double value; struct { - u_int32_t lsw; - u_int32_t msw; + uint32_t lsw; + uint32_t msw; int sign_exponent:16; unsigned int empty1:16; unsigned int empty0:32; @@ -77,3 +96,5 @@ do { \ se_u.parts.sign_exponent = (exp); \ (d) = se_u.value; \ } while (0) + +#endif /* math_ldbl.h */ diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h index 027a6a3a4d..13052893ef 100644 --- a/sysdeps/x86_64/fpu/math_private.h +++ b/sysdeps/x86_64/fpu/math_private.h @@ -48,38 +48,6 @@ #include <sysdeps/i386/fpu/fenv_private.h> #include_next <math_private.h> -extern __always_inline double -__ieee754_sqrt (double d) -{ - double res; -#if defined __AVX__ || defined SSE2AVX - asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d)); -#else - asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d)); -#endif - return res; -} - -extern __always_inline float -__ieee754_sqrtf (float d) -{ - float res; -#if defined __AVX__ || defined SSE2AVX - asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d)); -#else - asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d)); -#endif - return res; -} - -extern __always_inline long double -__ieee754_sqrtl (long double d) -{ - long double res; - asm ("fsqrt" : "=t" (res) : "0" (d)); - return res; -} - #ifdef __SSE4_1__ extern __always_inline double __rint (double d) diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 34542155aa..9f387248aa 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,12 +1,54 @@ ifeq ($(subdir),math) libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ - s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c + s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \ + s_trunc-c s_truncf-c + +libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ + s_floorf-sse4_1 s_nearbyint-sse4_1 \ + s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \ + s_trunc-sse4_1 s_truncf-sse4_1 + +libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ + e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ + mpa-fma \ + sincos32-fma doasin-fma dosincos-fma \ + mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma + +CFLAGS-doasin-fma.c = -mfma -mavx2 +CFLAGS-dosincos-fma.c = -mfma -mavx2 +CFLAGS-e_asin-fma.c = -mfma -mavx2 +CFLAGS-e_atan2-fma.c = -mfma -mavx2 +CFLAGS-e_exp-fma.c = -mfma -mavx2 +CFLAGS-e_log-fma.c = -mfma -mavx2 +CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma) +CFLAGS-mpa-fma.c = -mfma -mavx2 +CFLAGS-mpatan-fma.c = -mfma -mavx2 +CFLAGS-mpatan2-fma.c = -mfma -mavx2 +CFLAGS-mpsqrt-fma.c = -mfma -mavx2 +CFLAGS-mptan-fma.c = -mfma -mavx2 +CFLAGS-s_atan-fma.c = -mfma -mavx2 +CFLAGS-sincos32-fma.c = -mfma -mavx2 +CFLAGS-s_sin-fma.c = -mfma -mavx2 +CFLAGS-s_tan-fma.c = -mfma -mavx2 + +libm-sysdep_routines += s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2 + +libm-sysdep_routines += e_exp2f-fma e_expf-fma e_log2f-fma e_logf-fma \ + e_powf-fma s_sinf-fma s_cosf-fma s_sincosf-fma + +CFLAGS-e_exp2f-fma.c = -mfma -mavx2 +CFLAGS-e_expf-fma.c = -mfma -mavx2 +CFLAGS-e_log2f-fma.c = -mfma -mavx2 +CFLAGS-e_logf-fma.c = -mfma -mavx2 +CFLAGS-e_powf-fma.c = -mfma -mavx2 +CFLAGS-s_sinf-fma.c = -mfma -mavx2 +CFLAGS-s_cosf-fma.c = -mfma -mavx2 +CFLAGS-s_sincosf-fma.c = -mfma -mavx2 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ - mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ + mpa-fma4 \ sincos32-fma4 doasin-fma4 dosincos-fma4 \ - halfulp-fma4 mpexp-fma4 \ mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 CFLAGS-doasin-fma4.c = -mfma4 @@ -16,35 +58,26 @@ CFLAGS-e_atan2-fma4.c = -mfma4 CFLAGS-e_exp-fma4.c = -mfma4 CFLAGS-e_log-fma4.c = -mfma4 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma) -CFLAGS-halfulp-fma4.c = -mfma4 CFLAGS-mpa-fma4.c = -mfma4 CFLAGS-mpatan-fma4.c = -mfma4 CFLAGS-mpatan2-fma4.c = -mfma4 -CFLAGS-mpexp-fma4.c = -mfma4 -CFLAGS-mplog-fma4.c = -mfma4 CFLAGS-mpsqrt-fma4.c = -mfma4 CFLAGS-mptan-fma4.c = -mfma4 CFLAGS-s_atan-fma4.c = -mfma4 CFLAGS-sincos32-fma4.c = -mfma4 -CFLAGS-slowexp-fma4.c = -mfma4 -CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \ e_atan2-avx s_sin-avx s_tan-avx \ - mplog-avx mpa-avx slowexp-avx \ - mpexp-avx + mpa-avx CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX -CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX -CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX -CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX endif @@ -66,5 +99,35 @@ libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \ svml_d_pow4_core_avx2 svml_d_pow8_core_avx512 \ svml_s_powf4_core_sse4 svml_s_powf8_core_avx2 \ svml_s_powf16_core_avx512 svml_s_sincosf4_core_sse4 \ - svml_s_sincosf8_core_avx2 svml_s_sincosf16_core_avx512 + svml_s_sincosf8_core_avx2 \ + svml_s_sincosf16_core_avx512 \ + svml_d_cos2_core-sse2 svml_d_cos4_core-sse \ + svml_d_cos8_core-avx2 svml_d_exp2_core-sse2 \ + svml_d_exp4_core-sse svml_d_exp8_core-avx2 \ + svml_d_log2_core-sse2 svml_d_log4_core-sse \ + svml_d_log8_core-avx2 svml_d_pow2_core-sse2 \ + svml_d_pow4_core-sse svml_d_pow8_core-avx2 \ + svml_d_sin2_core-sse2 svml_d_sin4_core-sse \ + svml_d_sin8_core-avx2 \ + svml_d_sincos2_core-sse2 \ + svml_d_sincos4_core-sse \ + svml_d_sincos8_core-avx2 \ + svml_s_cosf16_core-avx2 \ + svml_s_cosf4_core-sse2 \ + svml_s_cosf8_core-sse \ + svml_s_expf16_core-avx2 \ + svml_s_expf4_core-sse2 \ + svml_s_expf8_core-sse \ + svml_s_logf16_core-avx2 \ + svml_s_logf4_core-sse2 \ + svml_s_logf8_core-sse \ + svml_s_powf16_core-avx2 \ + svml_s_powf4_core-sse2 \ + svml_s_powf8_core-sse \ + svml_s_sincosf16_core-avx2 \ + svml_s_sincosf4_core-sse2 \ + svml_s_sincosf8_core-sse \ + svml_s_sinf16_core-avx2 \ + svml_s_sinf4_core-sse2 \ + svml_s_sinf8_core-sse endif diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c new file mode 100644 index 0000000000..7a09865fca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c @@ -0,0 +1,4 @@ +#define __doasin __doasin_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/doasin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c new file mode 100644 index 0000000000..5744586bdb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c @@ -0,0 +1,6 @@ +#define __docos __docos_fma +#define __dubcos __dubcos_fma +#define __dubsin __dubsin_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/dosincos.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c new file mode 100644 index 0000000000..50e9c64247 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c @@ -0,0 +1,11 @@ +#define __ieee754_acos __ieee754_acos_fma +#define __ieee754_asin __ieee754_asin_fma +#define __cos32 __cos32_fma +#define __doasin __doasin_fma +#define __docos __docos_fma +#define __dubcos __dubcos_fma +#define __dubsin __dubsin_fma +#define __sin32 __sin32_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c index 111a5b99bd..8d47004e4f 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_asin.c +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -1,26 +1,40 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> - -extern double __ieee754_acos_sse2 (double); -extern double __ieee754_asin_sse2 (double); -extern double __ieee754_acos_fma4 (double); -extern double __ieee754_asin_fma4 (double); - -libm_ifunc (__ieee754_acos, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_acos_fma4 - : __ieee754_acos_sse2); -strong_alias (__ieee754_acos, __acos_finite) +/* Multiple versions of IEEE 754 asin and acos. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_asin (double); +extern double __redirect_ieee754_acos (double); + +#define SYMBOL_NAME ieee754_asin +#include "ifunc-fma4.h" -libm_ifunc (__ieee754_asin, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_asin_fma4 - : __ieee754_asin_sse2); +libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin, + IFUNC_SELECTOR ()); strong_alias (__ieee754_asin, __asin_finite) -#define __ieee754_acos __ieee754_acos_sse2 -#define __ieee754_asin __ieee754_asin_sse2 +#undef SYMBOL_NAME +#define SYMBOL_NAME ieee754_acos +#include "ifunc-fma4.h" + +libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_acos, __acos_finite) +#define __ieee754_acos __ieee754_acos_sse2 +#define __ieee754_asin __ieee754_asin_sse2 #include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c new file mode 100644 index 0000000000..caba686496 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c @@ -0,0 +1,10 @@ +#define __ieee754_atan2 __ieee754_atan2_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __dvd __dvd_fma +#define __mpatan2 __mpatan2_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c index 9ca3c02a44..6c2dd5af37 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 atan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_atan2_sse2 (double, double); -extern double __ieee754_atan2_avx (double, double); -extern double __ieee754_atan2_fma4 (double, double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_atan2, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); -strong_alias (__ieee754_atan2, __atan2_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_atan2 __ieee754_atan2_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_atan2 (double, double); +#define SYMBOL_NAME ieee754_atan2 +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_atan2, + __ieee754_atan2, IFUNC_SELECTOR ()); +strong_alias (__ieee754_atan2, __atan2_finite) + +#define __ieee754_atan2 __ieee754_atan2_sse2 #include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c index ee5dd6d2dc..afd917442a 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c @@ -1,6 +1,5 @@ #define __ieee754_exp __ieee754_exp_avx #define __exp1 __exp1_avx -#define __slowexp __slowexp_avx #define SECTION __attribute__ ((section (".text.avx"))) #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c new file mode 100644 index 0000000000..765b1b9dd3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c @@ -0,0 +1,5 @@ +#define __ieee754_exp __ieee754_exp_fma +#define __exp1 __exp1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c index ae6eb67603..9ac7acad28 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_exp __ieee754_exp_fma4 #define __exp1 __exp1_fma4 -#define __slowexp __slowexp_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c index b7d7b5ff27..7cd7d1729c 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 exp. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_exp_sse2 (double); -extern double __ieee754_exp_avx (double); -extern double __ieee754_exp_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_exp, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_exp_avx : __ieee754_exp_sse2)); -strong_alias (__ieee754_exp, __exp_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_exp __ieee754_exp_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_exp (double); +#define SYMBOL_NAME ieee754_exp +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_exp, __exp_finite) + +#define __ieee754_exp __ieee754_exp_sse2 #include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c new file mode 100644 index 0000000000..c915a50794 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c @@ -0,0 +1,3 @@ +#define __exp2f __exp2f_fma + +#include <sysdeps/ieee754/flt-32/e_exp2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c new file mode 100644 index 0000000000..e3a0706839 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c @@ -0,0 +1,40 @@ +/* Multiple versions of exp2f. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_exp2f (float); + +#define SYMBOL_NAME exp2f +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ()); + +#ifdef SHARED +# include <shlib-compat.h> +versioned_symbol (libm, __exp2f, exp2f, GLIBC_2_27); +libm_alias_float_other (__exp2, exp2) +#else +libm_alias_float (__exp2, exp2) +#endif + +strong_alias (__exp2f, __ieee754_exp2f) +strong_alias (__exp2f, __exp2f_finite) + +#define __exp2f __exp2f_sse2 +#include <sysdeps/ieee754/flt-32/e_exp2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c new file mode 100644 index 0000000000..4e01cd6a82 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c @@ -0,0 +1,3 @@ +#define __expf __expf_fma + +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c new file mode 100644 index 0000000000..2b7c7ccbd0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c @@ -0,0 +1,43 @@ +/* Multiple versions of expf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_expf (float); + +#define SYMBOL_NAME expf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__expf, __GI___expf, __redirect_expf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __expf, expf, GLIBC_2_27); +libm_alias_float_other (__exp, exp) +#else +libm_alias_float (__exp, exp) +#endif + +strong_alias (__expf, __ieee754_expf) +strong_alias (__expf, __expf_finite) + +#define __expf __expf_sse2 +#include <sysdeps/ieee754/flt-32/e_expf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c index c669019bc2..b22a5767be 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c @@ -1,8 +1,4 @@ #define __ieee754_log __ieee754_log_avx -#define __mplog __mplog_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c new file mode 100644 index 0000000000..bce0ee03c2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c @@ -0,0 +1,4 @@ +#define __ieee754_log __ieee754_log_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c index a2346cc618..f458f9c23c 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c @@ -1,8 +1,4 @@ #define __ieee754_log __ieee754_log_fma4 -#define __mplog __mplog_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __sub __sub_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c index cf9533d6c0..e0a1b02fae 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_log.c +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -1,18 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 log. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_log_sse2 (double); -extern double __ieee754_log_avx (double); -extern double __ieee754_log_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_log, - HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4 - : (HAS_ARCH_FEATURE (AVX_Usable) - ? __ieee754_log_avx : __ieee754_log_sse2)); -strong_alias (__ieee754_log, __log_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_log __ieee754_log_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_log (double); +#define SYMBOL_NAME ieee754_log +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log, + IFUNC_SELECTOR ()); +strong_alias (__ieee754_log, __log_finite) + +#define __ieee754_log __ieee754_log_sse2 #include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c new file mode 100644 index 0000000000..8a76b836fb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c @@ -0,0 +1,3 @@ +#define __log2f __log2f_fma + +#include <sysdeps/ieee754/flt-32/e_log2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c new file mode 100644 index 0000000000..12d0c30dd3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c @@ -0,0 +1,43 @@ +/* Multiple versions of log2f. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_log2f (float); + +#define SYMBOL_NAME log2f +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __log2f, log2f, GLIBC_2_27); +libm_alias_float_other (__log2, log2) +#else +libm_alias_float (__log2, log2) +#endif + +strong_alias (__log2f, __ieee754_log2f) +strong_alias (__log2f, __log2f_finite) + +#define __log2f __log2f_sse2 +#include <sysdeps/ieee754/flt-32/e_log2f.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c new file mode 100644 index 0000000000..a47fd8195f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c @@ -0,0 +1,3 @@ +#define __logf __logf_fma + +#include <sysdeps/ieee754/flt-32/e_logf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c new file mode 100644 index 0000000000..224d40a1e4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c @@ -0,0 +1,43 @@ +/* Multiple versions of logf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_logf (float); + +#define SYMBOL_NAME logf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__logf, __GI___logf, __redirect_logf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __logf, logf, GLIBC_2_27); +libm_alias_float_other (__log, log) +#else +libm_alias_float (__log, log) +#endif + +strong_alias (__logf, __ieee754_logf) +strong_alias (__logf, __logf_finite) + +#define __logf __logf_sse2 +#include <sysdeps/ieee754/flt-32/e_logf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c new file mode 100644 index 0000000000..73c1e7fb89 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c @@ -0,0 +1,5 @@ +#define __ieee754_pow __ieee754_pow_fma +#define __exp1 __exp1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c index 5b3ea8e103..8971b655ca 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma4 #define __exp1 __exp1_fma4 -#define __slowpow __slowpow_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c index a5c5d89c3e..084073c936 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -1,17 +1,29 @@ -#include <init-arch.h> -#include <math.h> -#include <math_private.h> +/* Multiple versions of IEEE 754 pow. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __ieee754_pow_sse2 (double, double); -extern double __ieee754_pow_fma4 (double, double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (__ieee754_pow, - HAS_ARCH_FEATURE (FMA4_Usable) - ? __ieee754_pow_fma4 - : __ieee754_pow_sse2); -strong_alias (__ieee754_pow, __pow_finite) + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define __ieee754_pow __ieee754_pow_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern double __redirect_ieee754_pow (double, double); +#define SYMBOL_NAME ieee754_pow +#include "ifunc-fma4.h" +libc_ifunc_redirected (__redirect_ieee754_pow, + __ieee754_pow, IFUNC_SELECTOR ()); +strong_alias (__ieee754_pow, __pow_finite) + +#define __ieee754_pow __ieee754_pow_sse2 #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c new file mode 100644 index 0000000000..fdf5dcc56a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c @@ -0,0 +1,3 @@ +#define __powf __powf_fma + +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c new file mode 100644 index 0000000000..a185006f40 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c @@ -0,0 +1,46 @@ +/* Multiple versions of powf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define powf __redirect_powf +#define __DECL_SIMD___redirect_powf +#include <math.h> +#undef powf + +#define SYMBOL_NAME powf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (__powf, __GI___powf, __redirect_powf) + __attribute__ ((visibility ("hidden"))); + +# include <shlib-compat.h> +versioned_symbol (libm, __powf, powf, GLIBC_2_27); +libm_alias_float_other (__pow, pow) +#else +libm_alias_float (__pow, pow) +#endif + +strong_alias (__powf, __ieee754_powf) +strong_alias (__powf, __powf_finite) + +#define __powf __powf_sse2 +#include <sysdeps/ieee754/flt-32/e_powf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c deleted file mode 100644 index a00c17c016..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h new file mode 100644 index 0000000000..a5f9375afc --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h @@ -0,0 +1,43 @@ +/* Common definition for ifunc selections optimized with AVX, AVX2/FMA + and FMA4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable)) + return OPTIMIZE (fma4); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable)) + return OPTIMIZE (avx); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h new file mode 100644 index 0000000000..63a8cd221f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h @@ -0,0 +1,34 @@ +/* Common definition for ifunc selections optimized with AVX2/FMA. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h new file mode 100644 index 0000000000..a2526a2ee0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h @@ -0,0 +1,39 @@ +/* Common definition for ifunc selections optimized with AVX2/FMA and + FMA4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (fma); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable)) + return OPTIMIZE (fma4); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h new file mode 100644 index 0000000000..bd2d32e418 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h @@ -0,0 +1,39 @@ +/* Common definition for libmathvec ifunc selections optimized with + AVX2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse_wrapper) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + return OPTIMIZE (avx2); + + return OPTIMIZE (sse_wrapper); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h new file mode 100644 index 0000000000..174e462cfb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h @@ -0,0 +1,45 @@ +/* Common definition for libmathvec ifunc selections optimized with + AVX512. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_wrapper) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (knl) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (skx) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (!CPU_FEATURES_ARCH_P (cpu_features, MathVec_Prefer_No_AVX512)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512DQ_Usable)) + return OPTIMIZE (skx); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)) + return OPTIMIZE (knl); + } + + return OPTIMIZE (avx2_wrapper); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h new file mode 100644 index 0000000000..c1e70ebfc1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h @@ -0,0 +1,38 @@ +/* Common definition for libmathvec ifunc selections optimized with + SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +#undef PASTER2 +#define PASTER2(x,y) x##_##y + +extern void REDIRECT_NAME (void); +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h new file mode 100644 index 0000000000..a8710ba802 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h @@ -0,0 +1,33 @@ +/* Common definition for ifunc selections optimized with SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> + +extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse41); + + return OPTIMIZE (c); +} diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c new file mode 100644 index 0000000000..177cc2517f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c @@ -0,0 +1,14 @@ +#define __add __add_fma +#define __mul __mul_fma +#define __sqr __sqr_fma +#define __sub __sub_fma +#define __dbl_mp __dbl_mp_fma +#define __dvd __dvd_fma + +#define NO___CPY 1 +#define NO___MP_DBL 1 +#define NO___ACR 1 +#define NO__CONST 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpa.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c new file mode 100644 index 0000000000..d216f9142d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c @@ -0,0 +1,10 @@ +#define __mpatan __mpatan_fma +#define __add __add_fma +#define __dvd __dvd_fma +#define __mpsqrt __mpsqrt_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define AVOID_MPATAN_H 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpatan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c new file mode 100644 index 0000000000..98df336f79 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c @@ -0,0 +1,9 @@ +#define __mpatan2 __mpatan2_fma +#define __add __add_fma +#define __dvd __dvd_fma +#define __mpatan __mpatan_fma +#define __mpsqrt __mpsqrt_fma +#define __mul __mul_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpatan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c b/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c deleted file mode 100644 index 87f29c96c9..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __mpexp __mpexp_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __dvd __dvd_avx -#define __mul __mul_avx -#define AVOID_MPEXP_H 1 -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mpexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c deleted file mode 100644 index 07ca6e9ad0..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __mpexp __mpexp_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __dvd __dvd_fma4 -#define __mul __mul_fma4 -#define AVOID_MPEXP_H 1 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/mpexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c b/sysdeps/x86_64/fpu/multiarch/mplog-avx.c deleted file mode 100644 index fd783d9a67..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c +++ /dev/null @@ -1,8 +0,0 @@ -#define __mplog __mplog_avx -#define __add __add_avx -#define __mpexp __mpexp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/mplog.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c deleted file mode 100644 index b4733118d7..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c +++ /dev/null @@ -1,8 +0,0 @@ -#define __mplog __mplog_fma4 -#define __add __add_fma4 -#define __mpexp __mpexp_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/mplog.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c new file mode 100644 index 0000000000..44d7a23ae3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c @@ -0,0 +1,8 @@ +#define __mpsqrt __mpsqrt_fma +#define __dbl_mp __dbl_mp_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define AVOID_MPSQRT_H 1 +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mpsqrt.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c new file mode 100644 index 0000000000..d1a691413c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c @@ -0,0 +1,7 @@ +#define __mptan __mptan_fma +#define __c32 __c32_fma +#define __dvd __dvd_fma +#define __mpranred __mpranred_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/mptan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c index b5cb9c3a75..41816bfe6c 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c @@ -1,4 +1,4 @@ -#define atan __atan_avx +#define __atan __atan_avx #define __add __add_avx #define __dbl_mp __dbl_mp_avx #define __mul __mul_avx diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c new file mode 100644 index 0000000000..363e32bcbd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c @@ -0,0 +1,9 @@ +#define __atan __atan_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __mpatan __mpatan_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c index 9e83e6cdab..ad8d3af579 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c @@ -1,4 +1,4 @@ -#define atan __atan_fma4 +#define __atan __atan_fma4 #define __add __add_fma4 #define __dbl_mp __dbl_mp_fma4 #define __mpatan __mpatan_fma4 diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c index 742e95cb96..f9ce8549ab 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -1,15 +1,30 @@ -#include <init-arch.h> -#include <math.h> +/* Multiple versions of atan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __atan_sse2 (double); -extern double __atan_avx (double); -extern double __atan_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __atan_avx : __atan_sse2)); + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define atan __atan_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <libm-alias-double.h> +extern double __redirect_atan (double); + +#define SYMBOL_NAME atan +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ()); +libm_alias_double (__atan, atan) + +#define __atan __atan_sse2 #include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S index 40fa729955..e90f05b42f 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__ceil) - .type __ceil, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __ceil_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __ceil_c(%rip), %rax -2: ret -END(__ceil) -weak_alias (__ceil, ceil) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__ceil_sse41) - roundsd $2, %xmm0, %xmm0 + roundsd $10, %xmm0, %xmm0 ret END(__ceil_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..070fcdddea --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c @@ -0,0 +1,31 @@ +/* Multiple versions of __ceil. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define ceil __redirect_ceil +#define __ceil __redirect___ceil +#include <math.h> +#undef ceil +#undef __ceil + +#define SYMBOL_NAME ceil +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ()); +libm_alias_double (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S index 9a06a5c174..c3bd24c5ae 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__ceilf) - .type __ceilf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __ceilf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __ceilf_c(%rip), %rax -2: ret -END(__ceilf) -weak_alias (__ceilf, ceilf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__ceilf_sse41) - roundss $2, %xmm0, %xmm0 + roundss $10, %xmm0, %xmm0 ret END(__ceilf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..db0c6c4bc3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __ceilf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define ceilf __redirect_ceilf +#define __ceilf __redirect___ceilf +#include <math.h> +#undef ceilf +#undef __ceilf + +#define SYMBOL_NAME ceilf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ()); +libm_alias_float (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c new file mode 100644 index 0000000000..5f9191aef9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c @@ -0,0 +1,2 @@ +#define COSF __cosf_fma +#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c new file mode 100644 index 0000000000..87cf42a82a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c @@ -0,0 +1,2 @@ +#define COSF __cosf_sse2 +#include <sysdeps/ieee754/flt-32/s_cosf.c> diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c index f6a4cf5c1e..33959d3d01 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c @@ -1,5 +1,5 @@ -/* Tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* Multiple versions of cosf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,13 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-float-vlen4.h" +#include <libm-alias-float.h> -#define TEST_VECTOR_cosf 1 -#define TEST_VECTOR_sinf 1 -#define TEST_VECTOR_sincosf 1 -#define TEST_VECTOR_logf 1 -#define TEST_VECTOR_expf 1 -#define TEST_VECTOR_powf 1 +extern float __redirect_cosf (float); -#include "libm-test.c" +#define SYMBOL_NAME cosf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ()); + +libm_alias_float (__cos, cos) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S index 57a0eee5ba..b3c7aa29ff 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floor.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__floor) - .type __floor, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __floor_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __floor_c(%rip), %rax -2: ret -END(__floor) -weak_alias (__floor, floor) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__floor_sse41) - roundsd $1, %xmm0, %xmm0 + roundsd $9, %xmm0, %xmm0 ret END(__floor_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..58f8ed8eaf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c @@ -0,0 +1,31 @@ +/* Multiple versions of __floor. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define floor __redirect_floor +#define __floor __redirect___floor +#include <math.h> +#undef floor +#undef __floor + +#define SYMBOL_NAME floor +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ()); +libm_alias_double (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S index 74a149a950..43461d3e6b 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,23 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__floorf) - .type __floorf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __floorf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __floorf_c(%rip), %rax -2: ret -END(__floorf) -weak_alias (__floorf, floorf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__floorf_sse41) - roundss $1, %xmm0, %xmm0 + roundss $9, %xmm0, %xmm0 ret END(__floorf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..5ef2fec2e3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __floorf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define floorf __redirect_floorf +#define __floorf __redirect___floorf +#include <math.h> +#undef floorf +#undef __floorf + +#define SYMBOL_NAME floorf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ()); +libm_alias_float (__floor, floor) diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c index 1de1a84cbe..875c76d372 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fma.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c @@ -1,5 +1,5 @@ /* FMA version of fma. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -20,6 +20,7 @@ #include <config.h> #include <math.h> #include <init-arch.h> +#include <libm-alias-double.h> extern double __fma_sse2 (double x, double y, double z) attribute_hidden; @@ -43,7 +44,7 @@ __fma_fma4 (double x, double y, double z) libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable) ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) ? __fma_fma4 : __fma_sse2)); -weak_alias (__fma, fma) +libm_alias_double (__fma, fma) #define __fma __fma_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c index 8905e4b54f..5f4c2ec0be 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c @@ -1,5 +1,5 @@ /* FMA version of fmaf. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,6 +19,7 @@ #include <config.h> #include <math.h> #include <init-arch.h> +#include <libm-alias-float.h> extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden; @@ -42,7 +43,7 @@ __fmaf_fma4 (float x, float y, float z) libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable) ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable) ? __fmaf_fma4 : __fmaf_sse2)); -weak_alias (__fmaf, fmaf) +libm_alias_float (__fma, fma) #define __fmaf __fmaf_sse2 diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S index 5091cf5813..f9ac36e4f0 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__nearbyint) - .type __nearbyint, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __nearbyint_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __nearbyint_c(%rip), %rax -2: ret -END(__nearbyint) -weak_alias (__nearbyint, nearbyint) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__nearbyint_sse41) roundsd $0xc, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c new file mode 100644 index 0000000000..d92945fd14 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c @@ -0,0 +1,32 @@ +/* Multiple versions of __nearbyint. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define nearbyint __redirect_nearbyint +#define __nearbyint __redirect___nearbyint +#include <math.h> +#undef nearbyint +#undef __nearbyint + +#define SYMBOL_NAME nearbyint +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_nearbyint, __nearbyint, + IFUNC_SELECTOR ()); +libm_alias_double (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S index 4a13700001..2f427da778 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__nearbyintf) - .type __nearbyintf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __nearbyintf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __nearbyintf_c(%rip), %rax -2: ret -END(__nearbyintf) -weak_alias (__nearbyintf, nearbyintf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__nearbyintf_sse41) roundss $0xc, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c new file mode 100644 index 0000000000..ba7be27956 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c @@ -0,0 +1,32 @@ +/* Multiple versions of __nearbyintf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define nearbyintf __redirect_nearbyintf +#define __nearbyintf __redirect___nearbyintf +#include <math.h> +#undef nearbyintf +#undef __nearbyintf + +#define SYMBOL_NAME nearbyintf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf, + IFUNC_SELECTOR ()); +libm_alias_float (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S index 1c0d1e14b7..7d7568a1a0 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rint.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__rint) - .type __rint, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __rint_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __rint_c(%rip), %rax -2: ret -END(__rint) -weak_alias (__rint, rint) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__rint_sse41) roundsd $4, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c new file mode 100644 index 0000000000..f1cb2fed0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c @@ -0,0 +1,31 @@ +/* Multiple versions of __rint. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define rint __redirect_rint +#define __rint __redirect___rint +#include <math.h> +#undef rint +#undef __rint + +#define SYMBOL_NAME rint +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ()); +libm_alias_double (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S index 8e42fa561f..ef5d896f55 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2016 Free Software Foundation, Inc. +/* Copyright (C) 2011-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@gmail.come>, 2011. @@ -16,22 +16,9 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <machine/asm.h> -#include <init-arch.h> - - -ENTRY(__rintf) - .type __rintf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq __rintf_sse41(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jnz 2f - leaq __rintf_c(%rip), %rax -2: ret -END(__rintf) -weak_alias (__rintf, rintf) - +#include <sysdep.h> + .section .text.sse4.1,"ax",@progbits ENTRY(__rintf_sse41) roundss $4, %xmm0, %xmm0 ret diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c new file mode 100644 index 0000000000..41323b3b5b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __rintf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define rintf __redirect_rintf +#define __rintf __redirect___rintf +#include <math.h> +#undef rintf +#undef __rintf + +#define SYMBOL_NAME rintf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ()); +libm_alias_float (__rint, rint) diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c new file mode 100644 index 0000000000..15f3c394d5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c @@ -0,0 +1,11 @@ +#define __cos __cos_fma +#define __sin __sin_fma +#define __docos __docos_fma +#define __dubsin __dubsin_fma +#define __mpcos __mpcos_fma +#define __mpcos1 __mpcos1_fma +#define __mpsin __mpsin_fma +#define __mpsin1 __mpsin1_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c index 8ffd3e7125..b289269240 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -1,26 +1,39 @@ -#include <init-arch.h> -#include <math.h> -#undef NAN - -extern double __cos_sse2 (double); -extern double __sin_sse2 (double); -extern double __cos_avx (double); -extern double __sin_avx (double); -extern double __cos_fma4 (double); -extern double __sin_fma4 (double); - -libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __cos_avx : __cos_sse2)); -weak_alias (__cos, cos) - -libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __sin_avx : __sin_sse2)); -weak_alias (__sin, sin) +/* Multiple versions of sin and cos. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -#define __cos __cos_sse2 -#define __sin __sin_sse2 + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +extern double __redirect_sin (double); +extern double __redirect_cos (double); +#define SYMBOL_NAME sin +#include "ifunc-avx-fma4.h" +libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ()); +libm_alias_double (__sin, sin) + +#undef SYMBOL_NAME +#define SYMBOL_NAME cos +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ()); +libm_alias_double (__cos, cos) + +#define __cos __cos_sse2 +#define __sin __sin_sse2 #include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c new file mode 100644 index 0000000000..64abe7abca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c @@ -0,0 +1,240 @@ +/* Compute sine and cosine of argument optimized with vector. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <math.h> +#include <math_private.h> +#include <x86intrin.h> +#include <libm-alias-float.h> +#include "s_sincosf.h" + +#define SINCOSF __sincosf_fma + +#ifndef SINCOSF +# define SINCOSF_FUNC __sincosf +#else +# define SINCOSF_FUNC SINCOSF +#endif + +/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */ +static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2}; +static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 }; +static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 }; +static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 }; +static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 }; + +/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5. */ +static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 }; +static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 }; + +static const __v2df v2ones = { 1.0, 1.0 }; + +/* Compute the sine and cosine values using Chebyshev polynomials where + THETA is the range reduced absolute value of the input + and it is less than Pi/4, + N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide + whether a sine or cosine approximation is more accurate and + SIGNBIT is used to add the correct sign after the Chebyshev + polynomial is computed. */ +static void +reduced_sincos (const double theta, const unsigned int n, + const unsigned int signbit, float *sinx, float *cosx) +{ + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + /* Here sinf() and cosf() are calculated using sin Chebyshev polynomial: + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */ + v2x = V3 + v2theta2 * V4; /* S3+x^2*S4. */ + v2x = V2 + v2theta2 * v2x; /* S2+x^2*(S3+x^2*S4). */ + v2x = V1 + v2theta2 * v2x; /* S1+x^2*(S2+x^2*(S3+x^2*S4)). */ + v2x = V0 + v2theta2 * v2x; /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))). */ + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + /* We are operating on |x|, so we need to add back the original + signbit for sinf. */ + /* Determine positive or negative primary interval. */ + /* Are we in the primary interval of sin or cos? */ + if ((n & 2) == 0) + { + const __v2df v2sign = + { + ones[((n >> 2) & 1) ^ signbit], + ones[((n + 2) >> 2) & 1] + }; + v2cx[0] = v2sx[0]; + v2cx *= v2sign; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else + { + const __v2df v2sign = + { + ones[((n + 2) >> 2) & 1], + ones[((n >> 2) & 1) ^ signbit] + }; + v2cx[0] = v2sx[0]; + v2cx *= v2sign; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[1]; + *cosx = v4sx[0]; + } +} + +void +SINCOSF_FUNC (float x, float *sinx, float *cosx) +{ + double theta = x; + double abstheta = fabs (theta); + uint32_t ix, xi; + GET_FLOAT_WORD (xi, x); + /* |x| */ + ix = xi & 0x7fffffff; + /* If |x|< Pi/4. */ + if (ix < 0x3f490fdb) + { + if (ix >= 0x3d000000) /* |x| >= 2^-5. */ + { + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + /* Chebyshev polynomial of the form for sin and cos. */ + v2x = V3 + v2theta2 * V4; + v2x = V2 + v2theta2 * v2x; + v2x = V1 + v2theta2 * v2x; + v2x = V0 + v2theta2 * v2x; + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + v2cx[0] = v2sx[0]; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else if (ix >= 0x32000000) /* |x| >= 2^-27. */ + { + /* A simpler Chebyshev approximation is close enough for this range: + for sin: x+x^3*(SS0+x^2*SS1) + for cos: 1.0+x^2*(CC0+x^3*CC1). */ + __v2df v2x, v2sx, v2cx; + const __v2df v2theta = { theta, theta }; + const __v2df v2theta2 = v2theta * v2theta; + v2x = VC0 + v2theta * v2theta2 * VC1; + v2x = v2theta2 * v2x; + v2cx = v2ones + v2x; + v2sx = v2theta + v2theta * v2x; + v2cx[0] = v2sx[0]; + __v4sf v4sx = _mm_cvtpd_ps (v2cx); + *sinx = v4sx[0]; + *cosx = v4sx[1]; + } + else + { + /* Handle some special cases. */ + if (ix) + *sinx = theta - (theta * SMALL); + else + *sinx = theta; + *cosx = 1.0 - abstheta; + } + } + else /* |x| >= Pi/4. */ + { + unsigned int signbit = xi >> 31; + if (ix < 0x40e231d6) /* |x| < 9*Pi/4. */ + { + /* There are cases where FE_UPWARD rounding mode can + produce a result of abstheta * inv_PI_4 == 9, + where abstheta < 9pi/4, so the domain for + pio2_table must go to 5 (9 / 2 + 1). */ + unsigned int n = (abstheta * inv_PI_4) + 1; + theta = abstheta - pio2_table[n / 2]; + reduced_sincos (theta, n, signbit, sinx, cosx); + } + else if (ix < 0x7f800000) + { + if (ix < 0x4b000000) /* |x| < 2^23. */ + { + unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1; + double x = n / 2; + theta = (abstheta - x * PI_2_hi) - x * PI_2_lo; + /* Argument reduction needed. */ + reduced_sincos (theta, n, signbit, sinx, cosx); + } + else /* |x| >= 2^23. */ + { + x = fabsf (x); + int exponent + = (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS; + exponent += 3; + exponent /= 28; + double a = invpio4_table[exponent] * x; + double b = invpio4_table[exponent + 1] * x; + double c = invpio4_table[exponent + 2] * x; + double d = invpio4_table[exponent + 3] * x; + uint64_t l = a; + l &= ~0x7; + a -= l; + double e = a + b; + l = e; + e = a - l; + if (l & 1) + { + e -= 1.0; + e += b; + e += c; + e += d; + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + else + { + e += b; + e += c; + e += d; + if (e <= 1.0) + { + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + else + { + l++; + e -= 2.0; + e *= M_PI_4; + reduced_sincos (e, l + 1, signbit, sinx, cosx); + } + } + } + } + else + { + if (ix == 0x7f800000) + __set_errno (EDOM); + /* sin/cos(Inf or NaN) is NaN. */ + *sinx = *cosx = x - x; + } + } +} + +#ifndef SINCOSF +libm_alias_float (__sincos, sincos) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S new file mode 100644 index 0000000000..51d012bb12 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S @@ -0,0 +1,2 @@ +#define __sincosf __sincosf_sse2 +#include <sysdeps/x86_64/fpu/s_sincosf.S> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c new file mode 100644 index 0000000000..6cb4295558 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c @@ -0,0 +1,28 @@ +/* Multiple versions of sincosf. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern void __redirect_sincosf (float, float *, float *); + +#define SYMBOL_NAME sincosf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ()); + +libm_alias_float (__sincos, sincos) diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c new file mode 100644 index 0000000000..34440ebf4a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c @@ -0,0 +1,2 @@ +#define SINF __sinf_fma +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c new file mode 100644 index 0000000000..74e32c98db --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c @@ -0,0 +1,2 @@ +#define SINF __sinf_sse2 +#include <sysdeps/ieee754/flt-32/s_sinf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c new file mode 100644 index 0000000000..4fdfbd8d3e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c @@ -0,0 +1,28 @@ +/* Multiple versions of sinf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +extern float __redirect_sinf (float); + +#define SYMBOL_NAME sinf +#include "ifunc-fma.h" + +libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ()); + +libm_alias_float (__sin, sin) diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c index 53de5d3c98..5ee29a9a06 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c @@ -1,4 +1,4 @@ -#define tan __tan_avx +#define __tan __tan_avx #define __dbl_mp __dbl_mp_avx #define __sub __sub_avx #define SECTION __attribute__ ((section (".text.avx"))) diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c new file mode 100644 index 0000000000..1a1b9d2490 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c @@ -0,0 +1,8 @@ +#define __tan __tan_fma +#define __dbl_mp __dbl_mp_fma +#define __mpranred __mpranred_fma +#define __mptan __mptan_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c index a805440b46..e4e9f6cb85 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c @@ -1,4 +1,4 @@ -#define tan __tan_fma4 +#define __tan __tan_fma4 #define __dbl_mp __dbl_mp_fma4 #define __mpranred __mpranred_fma4 #define __mptan __mptan_fma4 diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c index 25f3bca07e..bb75d8d0bc 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -1,15 +1,30 @@ -#include <init-arch.h> -#include <math.h> +/* Multiple versions of tan. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. -extern double __tan_sse2 (double); -extern double __tan_avx (double); -extern double __tan_fma4 (double); + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 : - HAS_ARCH_FEATURE (AVX_Usable) - ? __tan_avx : __tan_sse2)); + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#define tan __tan_sse2 + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ +#include <libm-alias-double.h> +extern double __redirect_tan (double); + +#define SYMBOL_NAME tan +#include "ifunc-avx-fma4.h" + +libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ()); +libm_alias_double (__tan, tan) + +#define __tan __tan_sse2 #include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c new file mode 100644 index 0000000000..6204ae3c77 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c @@ -0,0 +1,2 @@ +#define __trunc __trunc_c +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S new file mode 100644 index 0000000000..b8046bfa0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S @@ -0,0 +1,25 @@ +/* trunc for SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__trunc_sse41) + roundsd $11, %xmm0, %xmm0 + ret +END(__trunc_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..a1b0c60630 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c @@ -0,0 +1,31 @@ +/* Multiple versions of __trunc. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-double.h> + +#define trunc __redirect_trunc +#define __trunc __redirect___trunc +#include <math.h> +#undef trunc +#undef __trunc + +#define SYMBOL_NAME trunc +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); +libm_alias_double (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c new file mode 100644 index 0000000000..7a5ac7da1f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c @@ -0,0 +1,2 @@ +#define __truncf __truncf_c +#include <sysdeps/ieee754/flt-32/s_truncf.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S new file mode 100644 index 0000000000..2dabc0be57 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S @@ -0,0 +1,25 @@ +/* truncf for SSE4.1. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .section .text.sse4.1,"ax",@progbits +ENTRY(__truncf_sse41) + roundss $11, %xmm0, %xmm0 + ret +END(__truncf_sse41) diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..a7e220bd0c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c @@ -0,0 +1,31 @@ +/* Multiple versions of __truncf. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <libm-alias-float.h> + +#define truncf __redirect_truncf +#define __truncf __redirect___truncf +#include <math.h> +#undef truncf +#undef __truncf + +#define SYMBOL_NAME truncf +#include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); +libm_alias_float (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c new file mode 100644 index 0000000000..dcd44bc5e8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c @@ -0,0 +1,15 @@ +#define __cos32 __cos32_fma +#define __sin32 __sin32_fma +#define __c32 __c32_fma +#define __mpsin __mpsin_fma +#define __mpsin1 __mpsin1_fma +#define __mpcos __mpcos_fma +#define __mpcos1 __mpcos1_fma +#define __mpranred __mpranred_fma +#define __add __add_fma +#define __dbl_mp __dbl_mp_fma +#define __mul __mul_fma +#define __sub __sub_fma +#define SECTION __attribute__ ((section (".text.fma"))) + +#include <sysdeps/ieee754/dbl-64/sincos32.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c deleted file mode 100644 index d01c6d71a4..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __slowexp __slowexp_avx -#define __add __add_avx -#define __dbl_mp __dbl_mp_avx -#define __mpexp __mpexp_avx -#define __mul __mul_avx -#define __sub __sub_avx -#define SECTION __attribute__ ((section (".text.avx"))) - -#include <sysdeps/ieee754/dbl-64/slowexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c deleted file mode 100644 index 3bcde84233..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c +++ /dev/null @@ -1,9 +0,0 @@ -#define __slowexp __slowexp_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c deleted file mode 100644 index 69d69823bb..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mplog __mplog_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S new file mode 100644 index 0000000000..a85729807f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized cos, vector length is 2. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2 +#include "../svml_d_cos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S deleted file mode 100644 index 7d720e2fcb..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized cos, vector length is 2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_cos) - .type _ZGVbN2v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_cos_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_cos_sse2(%rip), %rax - ret -END (_ZGVbN2v_cos) -libmvec_hidden_def (_ZGVbN2v_cos) - -#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2 -#include "../svml_d_cos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c new file mode 100644 index 0000000000..3ff39eecd7 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_cos +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_cos, __GI__ZGVbN2v_cos, __redirect__ZGVbN2v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S index 088fcae067..10be76e207 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function cos vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -205,7 +205,7 @@ ENTRY (_ZGVbN2v_cos_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -215,7 +215,7 @@ ENTRY (_ZGVbN2v_cos_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S new file mode 100644 index 0000000000..9f406ea7c9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized cos, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper +#include "../svml_d_cos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S deleted file mode 100644 index 65a3570d2e..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized cos, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_cos) - .type _ZGVdN4v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_cos_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_cos_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_cos) -libmvec_hidden_def (_ZGVdN4v_cos) - -#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper -#include "../svml_d_cos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c new file mode 100644 index 0000000000..cb8405201a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_cos +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_cos, __GI__ZGVdN4v_cos, __redirect__ZGVdN4v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S index 4e653216d9..38cdc6bb03 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -188,7 +188,7 @@ ENTRY (_ZGVdN4v_cos_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -199,7 +199,7 @@ ENTRY (_ZGVdN4v_cos_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S new file mode 100644 index 0000000000..081baeeff5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized cos, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper +#include "../svml_d_cos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S deleted file mode 100644 index 3e7f16d44e..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized cos, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_cos) - .type _ZGVeN8v_cos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX -1: leaq _ZGVeN8v_cos_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_cos_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_cos_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_cos) - -#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper -#include "../svml_d_cos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c new file mode 100644 index 0000000000..4aa12595bc --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized cos, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_cos +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_cos, __GI__ZGVeN8v_cos, __redirect__ZGVeN8v_cos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S index 1cac1d827a..24e3b36357 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_cos_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_cos #else /* @@ -221,7 +221,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -229,14 +229,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_cos_knl) ENTRY (_ZGVeN8v_cos_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_cos #else /* @@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -450,7 +450,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S new file mode 100644 index 0000000000..3591eb1f19 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2 +#include "../svml_d_exp2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S deleted file mode 100644 index 136c67a550..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_exp) - .type _ZGVbN2v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_exp_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_exp_sse2(%rip), %rax - ret -END (_ZGVbN2v_exp) -libmvec_hidden_def (_ZGVbN2v_exp) - -#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2 -#include "../svml_d_exp2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c new file mode 100644 index 0000000000..2cfe8937c9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_exp +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_exp, __GI__ZGVbN2v_exp, __redirect__ZGVbN2v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S index 445b230152..e98d11b311 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S @@ -1,5 +1,5 @@ /* Function exp vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -207,7 +207,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -217,7 +217,7 @@ ENTRY (_ZGVbN2v_exp_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S new file mode 100644 index 0000000000..f8e0b5517a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper +#include "../svml_d_exp4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S deleted file mode 100644 index 9d6a47be0a..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_exp) - .type _ZGVdN4v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_exp_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_exp_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_exp) -libmvec_hidden_def (_ZGVdN4v_exp) - -#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper -#include "../svml_d_exp4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c new file mode 100644 index 0000000000..59bb36984a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_exp +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_exp, __GI__ZGVdN4v_exp, __redirect__ZGVdN4v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S index 25f9e28941..87990f8ad7 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -193,7 +193,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVdN4v_exp_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S new file mode 100644 index 0000000000..b1d3cad0e1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized exp. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper +#include "../svml_d_exp8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S deleted file mode 100644 index 317ee36e61..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_exp) - .type _ZGVeN8v_exp, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_exp_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_exp_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_exp_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_exp) - -#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper -#include "../svml_d_exp8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c new file mode 100644 index 0000000000..cfdc96ec86 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized exp, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_exp +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_exp, __GI__ZGVeN8v_exp, __redirect__ZGVeN8v_exp) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S index 74f1d2ce7b..8dd8a03e4b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_exp_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_exp #else /* @@ -223,7 +223,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -231,14 +231,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_exp_knl) ENTRY (_ZGVeN8v_exp_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_exp #else /* @@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1160(%rsp,%r15), %xmm0 vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp vmovsd 1152(%rsp,%r15), %xmm0 vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call exp@PLT + call JUMPTARGET(__exp_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S new file mode 100644 index 0000000000..761a1a537d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_log _ZGVbN2v_log_sse2 +#include "../svml_d_log2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S deleted file mode 100644 index 03d86a3e63..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_log) - .type _ZGVbN2v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_log_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_log_sse2(%rip), %rax - ret -END (_ZGVbN2v_log) -libmvec_hidden_def (_ZGVbN2v_log) - -#define _ZGVbN2v_log _ZGVbN2v_log_sse2 -#include "../svml_d_log2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c new file mode 100644 index 0000000000..c24437a3be --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_log +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_log, __GI__ZGVbN2v_log, __redirect__ZGVbN2v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S index 5d254288f6..eb854c68d6 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S @@ -1,5 +1,5 @@ /* Function log vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_log_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S new file mode 100644 index 0000000000..2460512f78 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper +#include "../svml_d_log4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S deleted file mode 100644 index 9f6ddbef15..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_log) - .type _ZGVdN4v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_log_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_log) -libmvec_hidden_def (_ZGVdN4v_log) - -#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper -#include "../svml_d_log4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c new file mode 100644 index 0000000000..5751370d65 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_log +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_log, __GI__ZGVdN4v_log, __redirect__ZGVdN4v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S index 5da298747d..81515850e1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_log_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S new file mode 100644 index 0000000000..ecfbeafb23 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized log. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper +#include "../svml_d_log8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S deleted file mode 100644 index 2e1a1da1a5..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_log) - .type _ZGVeN8v_log, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_log_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_log_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_log) - -#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper -#include "../svml_d_log8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c new file mode 100644 index 0000000000..1e796dcfdd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized log, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_log +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_log, __GI__ZGVeN8v_log, __redirect__ZGVeN8v_log) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S index dca8e61f34..ae8af8d861 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_log_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_log #else /* @@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_log_knl) ENTRY (_ZGVeN8v_log_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_log #else /* @@ -443,7 +443,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call log@PLT + call JUMPTARGET(__log_finite) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S new file mode 100644 index 0000000000..2d8ad50681 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2 +#include "../svml_d_pow2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S deleted file mode 100644 index 4a50246889..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2vv_pow) - .type _ZGVbN2vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2vv_pow_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2vv_pow_sse2(%rip), %rax - ret -END (_ZGVbN2vv_pow) -libmvec_hidden_def (_ZGVbN2vv_pow) - -#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2 -#include "../svml_d_pow2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c new file mode 100644 index 0000000000..3424c0e326 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2vv_pow +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2vv_pow, __GI__ZGVbN2vv_pow, + __redirect__ZGVbN2vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S index 064d170878..77828b44d5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S @@ -1,5 +1,5 @@ /* Function pow vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -413,7 +413,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 72(%rsp,%r15), %xmm0 movsd 136(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) movsd %xmm0, 200(%rsp,%r15) jmp .LBL_1_8 @@ -424,7 +424,7 @@ ENTRY (_ZGVbN2vv_pow_sse4) movsd 64(%rsp,%r15), %xmm0 movsd 128(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) movsd %xmm0, 192(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S new file mode 100644 index 0000000000..4dcd14ff20 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper +#include "../svml_d_pow4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S deleted file mode 100644 index fb9f989adc..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4vv_pow) - .type _ZGVdN4vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4vv_pow_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4vv_pow_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4vv_pow) -libmvec_hidden_def (_ZGVdN4vv_pow) - -#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper -#include "../svml_d_pow4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c new file mode 100644 index 0000000000..447be39401 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4vv_pow +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4vv_pow, __GI__ZGVdN4vv_pow, + __redirect__ZGVdN4vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S index f2a73ffe1e..c43d62f202 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -367,7 +367,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 264(%rsp,%r15), %xmm1 vzeroupper - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 328(%rsp,%r15) jmp .LBL_1_8 @@ -379,7 +379,7 @@ ENTRY (_ZGVdN4vv_pow_avx2) vmovsd 256(%rsp,%r15), %xmm1 vzeroupper - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 320(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S new file mode 100644 index 0000000000..8acf700e76 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized pow. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper +#include "../svml_d_pow8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S deleted file mode 100644 index 30bc53f2f7..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8vv_pow) - .type _ZGVeN8vv_pow, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8vv_pow_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8vv_pow_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8vv_pow) - -#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper -#include "../svml_d_pow8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c new file mode 100644 index 0000000000..62f96965bb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized pow, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8vv_pow +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8vv_pow, __GI__ZGVeN8vv_pow, + __redirect__ZGVeN8vv_pow) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S index 4a515233fc..a28c39b73d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -82,7 +82,7 @@ .text ENTRY (_ZGVeN8vv_pow_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow #else pushq %rbp @@ -392,7 +392,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 vmovsd 1224(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_1_8 @@ -401,7 +401,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 vmovsd 1216(%rsp,%r15), %xmm1 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_1_7 @@ -409,7 +409,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow END (_ZGVeN8vv_pow_knl) ENTRY (_ZGVeN8vv_pow_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow #else pushq %rbp @@ -720,7 +720,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_2_8 @@ -732,7 +732,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call pow@PLT + call JUMPTARGET(__pow_finite) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S new file mode 100644 index 0000000000..cb7b31aa1c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized sin. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2 +#include "../svml_d_sin2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S deleted file mode 100644 index 112bec2224..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sin. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2v_sin) - .type _ZGVbN2v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2v_sin_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2v_sin_sse2(%rip), %rax - ret -END (_ZGVbN2v_sin) -libmvec_hidden_def (_ZGVbN2v_sin) - -#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2 -#include "../svml_d_sin2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c new file mode 100644 index 0000000000..1c5788f205 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2v_sin +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2v_sin, __GI__ZGVbN2v_sin, __redirect__ZGVbN2v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S index 5755ce6f74..15980e9eeb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sin vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_sin_sse4) shlq $4, %r15 movsd 200(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_sin_sse4) shlq $4, %r15 movsd 192(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S new file mode 100644 index 0000000000..07fae6f3b4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized sin, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper +#include "../svml_d_sin4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S deleted file mode 100644 index 700a1c629d..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sin, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4v_sin) - .type _ZGVdN4v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4v_sin_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4v_sin_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4v_sin) -libmvec_hidden_def (_ZGVdN4v_sin) - -#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper -#include "../svml_d_sin4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c new file mode 100644 index 0000000000..b5933914aa --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4v_sin +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4v_sin, __GI__ZGVdN4v_sin, __redirect__ZGVdN4v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S index 46b557158a..4f0917c56d 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_sin_avx2) vmovsd 328(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_sin_avx2) vmovsd 320(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S new file mode 100644 index 0000000000..b64c3390d6 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S @@ -0,0 +1,23 @@ +/* AVX2 version of vectorized sin. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <init-arch.h> + +#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper +#include "../svml_d_sin8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S deleted file mode 100644 index 5afce0ed88..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sin. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8v_sin) - .type _ZGVeN8v_sin, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8v_sin_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8v_sin_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8v_sin_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8v_sin) - -#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper -#include "../svml_d_sin8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c new file mode 100644 index 0000000000..57023d8494 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c @@ -0,0 +1,27 @@ +/* Multiple versions of vectorized sin, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8v_sin +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8v_sin, __GI__ZGVeN8v_sin, __redirect__ZGVeN8v_sin) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S index 6c565f3861..2d4b14fd1b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX-512, KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN8v_sin_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_sin #else /* @@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_1_8 @@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin movzbl %r12b, %r15d shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_1_7 #endif END (_ZGVeN8v_sin_knl) ENTRY (_ZGVeN8v_sin_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN4v_sin #else /* @@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) jmp .LBL_2_8 @@ -452,7 +452,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S new file mode 100644 index 0000000000..ab7f9c500d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2 +#include "../svml_d_sincos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S deleted file mode 100644 index 883d7d33a4..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN2vvv_sincos) - .type _ZGVbN2vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN2vvv_sincos_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN2vvv_sincos_sse2(%rip), %rax - ret -END (_ZGVbN2vvv_sincos) -libmvec_hidden_def (_ZGVbN2vvv_sincos) - -#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2 -#include "../svml_d_sincos2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c new file mode 100644 index 0000000000..f373bb40a3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 2. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN2vvv_sincos +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN2vvv_sincos, __GI__ZGVbN2vvv_sincos, + __redirect__ZGVbN2vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index 65ad540122..b4dfa37898 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVbN2vvv_sincos_sse4) +ENTRY (_ZGVbN2vl8l8_sincos_sse4) /* ALGORITHM DESCRIPTION: @@ -287,12 +287,12 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 136(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 200(%rsp,%r15) movsd 136(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -302,13 +302,67 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 128(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 192(%rsp,%r15) movsd 128(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVbN2vl8l8_sincos_sse4) +libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVbN2vvv_sincos_sse4) +#ifndef __ILP32__ + subq $72, %rsp + .cfi_def_cfa_offset 80 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movdqa 16(%esp), %xmm1 + movsd 32(%esp), %xmm0 + movq %xmm1, %rax + movdqa (%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 40(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 48(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN2vvv_sincos_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S new file mode 100644 index 0000000000..10b4a2cf16 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper +#include "../svml_d_sincos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S deleted file mode 100644 index 69a3f74650..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN4vvv_sincos) - .type _ZGVdN4vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN4vvv_sincos_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax - ret -END (_ZGVdN4vvv_sincos) -libmvec_hidden_def (_ZGVdN4vvv_sincos) - -#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper -#include "../svml_d_sincos4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c new file mode 100644 index 0000000000..1fabd7b471 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN4vvv_sincos +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN4vvv_sincos, __GI__ZGVdN4vvv_sincos, + __redirect__ZGVdN4vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S index 60d03e9f8b..d56aa96ac9 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVdN4vvv_sincos_avx2) +ENTRY (_ZGVdN4vl8l8_sincos_avx2) /* ALGORITHM DESCRIPTION: @@ -248,12 +248,12 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) vmovsd 264(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 328(%rsp,%r15) vmovsd 264(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 392(%rsp,%r15) jmp .LBL_1_8 @@ -264,14 +264,110 @@ ENTRY (_ZGVdN4vvv_sincos_avx2) vmovsd 256(%rsp,%r15), %xmm0 vzeroupper - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 320(%rsp,%r15) vmovsd 256(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 384(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVdN4vl8l8_sincos_avx2) +libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVdN4vvv_sincos_avx2) +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $128, %rsp + vmovdqu %ymm1, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm2, 96(%rdi) + lea 32(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2) + movq 64(%rsp), %rdx + movq 96(%rsp), %rsi + movq 72(%rsp), %r8 + movq 104(%rsp), %r10 + movq (%rsp), %rax + movq 32(%rsp), %rcx + movq 8(%rsp), %rdi + movq 40(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 80(%rsp), %rax + movq 112(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 88(%rsp), %rdi + movq 120(%rsp), %r9 + movq 16(%rsp), %r11 + movq 48(%rsp), %rdx + movq 24(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %r8, (%r9) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -48(%rbp), %esi + leal -80(%rbp), %edi + subl $104, %esp + vmovaps %xmm1, -96(%ebp) + vmovaps %xmm2, -112(%ebp) + call HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2) + movl -96(%ebp), %eax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -92(%ebp), %eax + vmovsd -72(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -88(%ebp), %eax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -84(%ebp), %eax + vmovsd -56(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -112(%ebp), %eax + vmovsd -48(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -108(%ebp), %eax + vmovsd -40(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -104(%ebp), %eax + vmovsd -32(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -100(%ebp), %eax + vmovsd -24(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + addl $104, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif END (_ZGVdN4vvv_sincos_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S new file mode 100644 index 0000000000..8cf88f6461 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sincos. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper +#include "../svml_d_sincos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S deleted file mode 100644 index 64cb08c5d1..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN8vvv_sincos) - .type _ZGVeN8vvv_sincos, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN8vvv_sincos_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN8vvv_sincos_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN8vvv_sincos) - -#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper -#include "../svml_d_sincos8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c new file mode 100644 index 0000000000..1409872ed2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincos, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN8vvv_sincos +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN8vvv_sincos, __GI__ZGVeN8vvv_sincos, + __redirect__ZGVeN8vvv_sincos) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S index 44700f90b8..2df626c0c1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -36,9 +36,9 @@ sin(R), sin(R') are approximated by corresponding polynomial. */ .text -ENTRY (_ZGVeN8vvv_sincos_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos +ENTRY (_ZGVeN8vl8l8_sincos_knl) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -278,12 +278,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos shlq $4, %r15 vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_1_8 @@ -293,22 +293,23 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos shlq $4, %r15 vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_1_7 #endif -END (_ZGVeN8vvv_sincos_knl) +END (_ZGVeN8vl8l8_sincos_knl) +libmvec_hidden_def(_ZGVeN8vl8l8_sincos_knl) -ENTRY (_ZGVeN8vvv_sincos_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos +ENTRY (_ZGVeN8vl8l8_sincos_skx) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -557,12 +558,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos vzeroupper vmovsd 1160(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1224(%rsp,%r15) vmovsd 1160(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1288(%rsp,%r15) jmp .LBL_2_8 @@ -574,17 +575,171 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos vzeroupper vmovsd 1152(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) vmovsd %xmm0, 1216(%rsp,%r15) vmovsd 1152(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) vmovsd %xmm0, 1280(%rsp,%r15) jmp .LBL_2_7 #endif +END (_ZGVeN8vl8l8_sincos_skx) +libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx) + +/* Wrapper between vvv and vl8l8 vector variants. */ +.macro WRAPPER_AVX512_vvv_vl8l8 callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $256, %rsp + vmovups %zmm1, 128(%rsp) + lea (%rsp), %rdi + vmovups %zmm2, 192(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 136(%rsp), %rsi + movq 144(%rsp), %r8 + movq 152(%rsp), %r10 + movq (%rsp), %rax + movq 8(%rsp), %rcx + movq 16(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 160(%rsp), %rax + movq 168(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 176(%rsp), %rdi + movq 184(%rsp), %r9 + movq 32(%rsp), %r11 + movq 40(%rsp), %rdx + movq 48(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq 192(%rsp), %r11 + movq 200(%rsp), %rdx + movq %rsi, (%rdi) + movq %r8, (%r9) + movq 208(%rsp), %rsi + movq 216(%rsp), %r8 + movq 64(%rsp), %r10 + movq 72(%rsp), %rax + movq 80(%rsp), %rcx + movq 88(%rsp), %rdi + movq %r10, (%r11) + movq %rax, (%rdx) + movq 224(%rsp), %r10 + movq 232(%rsp), %rax + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq 240(%rsp), %rcx + movq 248(%rsp), %rdi + movq 96(%rsp), %r9 + movq 104(%rsp), %r11 + movq 112(%rsp), %rdx + movq 120(%rsp), %rsi + movq %r9, (%r10) + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -112(%rbp), %esi + leal -176(%rbp), %edi + subl $232, %esp + vmovdqa %ymm1, -208(%ebp) + vmovdqa %ymm2, -240(%ebp) + call HIDDEN_JUMPTARGET(\callee) + vmovdqa -208(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovsd -176(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -168(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -200(%ebp), %rax + vmovsd -160(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -152(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -192(%ebp), %rax + vmovsd -144(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -136(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -184(%ebp), %rax + vmovsd -128(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -120(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovdqa -240(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovsd -112(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -104(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -232(%ebp), %rax + vmovsd -96(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -88(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -224(%ebp), %rax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -72(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movq -216(%ebp), %rax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + shrq $32, %rax + vmovsd -56(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + addl $232, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + +ENTRY (_ZGVeN8vvv_sincos_knl) +WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_knl +END (_ZGVeN8vvv_sincos_knl) + +ENTRY (_ZGVeN8vvv_sincos_skx) +WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx END (_ZGVeN8vvv_sincos_skx) .section .rodata, "a" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S new file mode 100644 index 0000000000..f01f89f294 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized cosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper +#include "../svml_s_cosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S deleted file mode 100644 index 755254a280..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized cosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_cosf) - .type _ZGVeN16v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_cosf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_cosf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_cosf) - -#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper -#include "../svml_s_cosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c new file mode 100644 index 0000000000..5bd0441b16 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_cosf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_cosf, __GI__ZGVeN16v_cosf, + __redirect__ZGVeN16v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S index 5004cd4758..6ea1137b42 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_cosf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf #else /* @@ -225,21 +225,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16v_cosf_knl) ENTRY (_ZGVeN16v_cosf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf #else /* @@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf vmovss 1156(%rsp,%r15,8), %xmm0 vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 .LBL_2_12: @@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf vmovss 1152(%rsp,%r15,8), %xmm0 vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S new file mode 100644 index 0000000000..727189f8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized cosf, vector length is 4. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2 +#include "../svml_s_cosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S deleted file mode 100644 index ad7de18851..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized cosf, vector length is 4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_cosf) - .type _ZGVbN4v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_cosf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_cosf_sse2(%rip), %rax - ret -END (_ZGVbN4v_cosf) -libmvec_hidden_def (_ZGVbN4v_cosf) - -#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2 -#include "../svml_s_cosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c new file mode 100644 index 0000000000..dde470af5d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_cosf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_cosf, __GI__ZGVbN4v_cosf, + __redirect__ZGVbN4v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S index d23ff72a30..f4e0553bb3 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -211,7 +211,7 @@ ENTRY (_ZGVbN4v_cosf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -220,7 +220,7 @@ ENTRY (_ZGVbN4v_cosf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S new file mode 100644 index 0000000000..1e1a5540c3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized cosf, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper +#include "../svml_s_cosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S deleted file mode 100644 index 602c70e324..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized cosf, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_cosf) - .type _ZGVdN8v_cosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_cosf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_cosf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_cosf) -libmvec_hidden_def (_ZGVdN8v_cosf) - -#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper -#include "../svml_s_cosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c new file mode 100644 index 0000000000..56531b215a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized cosf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_cosf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_cosf, __GI__ZGVdN8v_cosf, + __redirect__ZGVdN8v_cosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S index 513f3c0a29..dbff4a7b7e 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -197,7 +197,7 @@ ENTRY (_ZGVdN8v_cosf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -207,7 +207,7 @@ ENTRY (_ZGVdN8v_cosf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S new file mode 100644 index 0000000000..e0b7fd787f --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S @@ -0,0 +1,23 @@ +/* AVX2 version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <init-arch.h> + +#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper +#include "../svml_s_expf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S deleted file mode 100644 index f990d36483..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_expf) - .type _ZGVeN16v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_expf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_expf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_expf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_expf) - -#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper -#include "../svml_s_expf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c new file mode 100644 index 0000000000..d358d93546 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_expf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_expf, __GI__ZGVeN16v_expf, + __redirect__ZGVeN16v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S index 7eb7a1b775..89ba0df28f 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_expf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_expf #else /* @@ -212,14 +212,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 @@ -227,7 +227,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf END (_ZGVeN16v_expf_knl) ENTRY (_ZGVeN16v_expf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_expf #else /* @@ -422,7 +422,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -433,7 +433,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S new file mode 100644 index 0000000000..8f57e4bbd9 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2 +#include "../svml_s_expf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S deleted file mode 100644 index 2fbe6d475e..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_expf) - .type _ZGVbN4v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_expf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_expf_sse2(%rip), %rax - ret -END (_ZGVbN4v_expf) -libmvec_hidden_def (_ZGVbN4v_expf) - -#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2 -#include "../svml_s_expf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c new file mode 100644 index 0000000000..82befe0b5d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_expf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_expf, __GI__ZGVbN4v_expf, + __redirect__ZGVbN4v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S index c6f91e8dc1..254ec94096 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function expf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -195,7 +195,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -204,7 +204,7 @@ ENTRY (_ZGVbN4v_expf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call expf@PLT + call JUMPTARGET(__expf_finite) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S index 7d19bb423d..459699c80c 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized expf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_expf) - .type _ZGVdN8v_expf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_expf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_expf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_expf) -libmvec_hidden_def (_ZGVdN8v_expf) - #define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper #include "../svml_s_expf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c new file mode 100644 index 0000000000..0b8a47ede0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized expf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_expf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_expf, __GI__ZGVdN8v_expf, + __redirect__ZGVdN8v_expf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S index c6be6954f7..ae1d5317e4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -184,7 +184,7 @@ ENTRY(_ZGVdN8v_expf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -194,7 +194,7 @@ ENTRY(_ZGVdN8v_expf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call expf@PLT + call JUMPTARGET(__expf_finite) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S new file mode 100644 index 0000000000..b23bd12fa0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper +#include "../svml_s_logf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S deleted file mode 100644 index 9efb2fb7df..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_logf) - .type _ZGVeN16v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_logf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_logf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_logf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_logf) - -#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper -#include "../svml_s_logf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c new file mode 100644 index 0000000000..fec61883b4 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_logf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_logf, __GI__ZGVeN16v_logf, + __redirect__ZGVeN16v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S index 6209058381..4cf0a96fe4 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVeN16v_logf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_logf #else /* @@ -197,21 +197,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16v_logf_knl) ENTRY (_ZGVeN16v_logf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_logf #else /* @@ -391,7 +391,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -402,7 +402,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S new file mode 100644 index 0000000000..2c2331e1d8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2 +#include "../svml_s_logf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S deleted file mode 100644 index c85615ac25..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_logf) - .type _ZGVbN4v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_logf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_logf_sse2(%rip), %rax - ret -END (_ZGVbN4v_logf) -libmvec_hidden_def (_ZGVbN4v_logf) - -#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2 -#include "../svml_s_logf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c new file mode 100644 index 0000000000..f249c351bd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_logf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_logf, __GI__ZGVbN4v_logf, + __redirect__ZGVbN4v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S index 1ce9838513..651eb5eb1a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function logf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -177,7 +177,7 @@ ENTRY (_ZGVbN4v_logf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -186,7 +186,7 @@ ENTRY (_ZGVbN4v_logf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call logf@PLT + call JUMPTARGET(__logf_finite) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S index 8f6d83dd56..862379277b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S @@ -1,5 +1,5 @@ -/* Multiple versions of vectorized logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. +/* SSE version of vectorized logf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,21 +16,5 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_logf) - .type _ZGVdN8v_logf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8v_logf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_logf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_logf) -libmvec_hidden_def (_ZGVdN8v_logf) - #define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper #include "../svml_s_logf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c new file mode 100644 index 0000000000..dbd29657ca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized logf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_logf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_logf, __GI__ZGVdN8v_logf, + __redirect__ZGVdN8v_logf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S index 91fb549ce6..c7f5448fcb 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -166,7 +166,7 @@ ENTRY(_ZGVdN8v_logf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -176,7 +176,7 @@ ENTRY(_ZGVdN8v_logf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call logf@PLT + call JUMPTARGET(__logf_finite) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S new file mode 100644 index 0000000000..de705c8632 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper +#include "../svml_s_powf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S deleted file mode 100644 index 80048ce977..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16vv_powf) - .type _ZGVeN16vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16vv_powf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16vv_powf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16vv_powf) - -#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper -#include "../svml_s_powf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c new file mode 100644 index 0000000000..91ea810441 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized powf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16vv_powf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16vv_powf, __GI__ZGVeN16vv_powf, + __redirect__ZGVeN16vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S index 45d48723af..bdcd50afe1 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -82,7 +82,7 @@ .text ENTRY (_ZGVeN16vv_powf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf #else pushq %rbp @@ -344,7 +344,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 vmovss 1220(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_1_8 @@ -352,14 +352,14 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 vmovss 1216(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_1_7 #endif END (_ZGVeN16vv_powf_knl) ENTRY (_ZGVeN16vv_powf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf #else pushq %rbp @@ -629,7 +629,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf vmovss 1156(%rsp,%r15,8), %xmm1 vzeroupper vmovss 1092(%rsp,%r15,8), %xmm0 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -638,7 +638,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf vmovss 1152(%rsp,%r15,8), %xmm1 vzeroupper vmovss 1088(%rsp,%r15,8), %xmm0 - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 #endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S new file mode 100644 index 0000000000..b6789a621d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2 +#include "../svml_s_powf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S deleted file mode 100644 index b46821189b..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4vv_powf) - .type _ZGVbN4vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4vv_powf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4vv_powf_sse2(%rip), %rax - ret -END (_ZGVbN4vv_powf) -libmvec_hidden_def (_ZGVbN4vv_powf) - -#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2 -#include "../svml_s_powf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c new file mode 100644 index 0000000000..8149d7c991 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized powf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4vv_powf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4vv_powf, __GI__ZGVbN4vv_powf, + __redirect__ZGVbN4vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S index 420f98c6a6..bc59545c98 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function powf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -356,7 +356,7 @@ ENTRY (_ZGVbN4vv_powf_sse4) movss 68(%rsp,%r15,8), %xmm0 movss 132(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) movss %xmm0, 196(%rsp,%r15,8) jmp .LBL_1_8 @@ -366,7 +366,7 @@ ENTRY (_ZGVbN4vv_powf_sse4) movss 64(%rsp,%r15,8), %xmm0 movss 128(%rsp,%r15,8), %xmm1 - call powf@PLT + call JUMPTARGET(__powf_finite) movss %xmm0, 192(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S new file mode 100644 index 0000000000..48da6d25c7 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized powf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper +#include "../svml_s_powf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S deleted file mode 100644 index 945908a2ff..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8vv_powf) - .type _ZGVdN8vv_powf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8vv_powf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8vv_powf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8vv_powf) -libmvec_hidden_def (_ZGVdN8vv_powf) - -#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper -#include "../svml_s_powf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c new file mode 100644 index 0000000000..0da188180e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8vv_powf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8vv_powf, __GI__ZGVdN8vv_powf, + __redirect__ZGVdN8vv_powf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S index 4446859130..53a4b4bc2b 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -317,7 +317,7 @@ ENTRY(_ZGVdN8vv_powf_avx2) vmovss 132(%rsp,%r15,8), %xmm1 vzeroupper - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 196(%rsp,%r15,8) jmp .LBL_1_8 @@ -328,7 +328,7 @@ ENTRY(_ZGVdN8vv_powf_avx2) vmovss 128(%rsp,%r15,8), %xmm1 vzeroupper - call powf@PLT + call JUMPTARGET(__powf_finite) vmovss %xmm0, 192(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S new file mode 100644 index 0000000000..c677e3f1cf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper +#include "../svml_s_sincosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S deleted file mode 100644 index 16cee0c676..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16vvv_sincosf) - .type _ZGVeN16vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16vvv_sincosf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16vvv_sincosf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16vvv_sincosf) - -#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper -#include "../svml_s_sincosf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c new file mode 100644 index 0000000000..b753be6bbd --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16vvv_sincosf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16vvv_sincosf, __GI__ZGVeN16vvv_sincosf, + __redirect__ZGVeN16vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S index 758aeeaeed..5fa4bc412a 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -41,7 +41,7 @@ b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -49,9 +49,9 @@ R2 = XOR( RC, SC ). */ .text -ENTRY (_ZGVeN16vvv_sincosf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT -WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf +ENTRY (_ZGVeN16vl4l4_sincosf_knl) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT +WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf #else pushq %rbp cfi_adjust_cfa_offset (8) @@ -243,12 +243,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_1_8 @@ -257,20 +257,21 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_1_7 #endif -END (_ZGVeN16vvv_sincosf_knl) +END (_ZGVeN16vl4l4_sincosf_knl) +libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_knl) -ENTRY (_ZGVeN16vvv_sincosf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +ENTRY (_ZGVeN16vl4l4_sincosf_skx) +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf #else pushq %rbp @@ -470,12 +471,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) vmovss 1156(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1284(%rsp,%r15,8) jmp .LBL_2_8 @@ -486,16 +487,266 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) vmovss 1152(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 1280(%rsp,%r15,8) jmp .LBL_2_7 #endif +END (_ZGVeN16vl4l4_sincosf_skx) +libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx) + +/* Wrapper between vvv and vl4l4 vector variants. */ +.macro WRAPPER_AVX512_vvv_vl4l4 callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $384, %rsp + vmovups %zmm1, 128(%rsp) + lea (%rsp), %rdi + vmovups %zmm2, 192(%rdi) + vmovups %zmm3, 256(%rdi) + vmovups %zmm4, 320(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 136(%rsp), %rsi + movq 144(%rsp), %r8 + movq 152(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 160(%rsp), %rax + movq 168(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 176(%rsp), %rdi + movq 184(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 192(%rsp), %r11 + movq 200(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 208(%rsp), %rsi + movq 216(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 224(%rsp), %r10 + movq 232(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 240(%rsp), %rcx + movq 248(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movq 256(%rsp), %r9 + movq 264(%rsp), %r11 + movl %edx, (%rcx) + movl %esi, (%rdi) + movq 272(%rsp), %rdx + movq 280(%rsp), %rsi + movl 64(%rsp), %r8d + movl 68(%rsp), %r10d + movl 72(%rsp), %eax + movl 76(%rsp), %ecx + movl %r8d, (%r9) + movl %r10d, (%r11) + movq 288(%rsp), %r8 + movq 296(%rsp), %r10 + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 304(%rsp), %rax + movq 312(%rsp), %rcx + movl 80(%rsp), %edi + movl 84(%rsp), %r9d + movl 88(%rsp), %r11d + movl 92(%rsp), %edx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 320(%rsp), %rdi + movq 328(%rsp), %r9 + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 336(%rsp), %r11 + movq 344(%rsp), %rdx + movl 96(%rsp), %esi + movl 100(%rsp), %r8d + movl 104(%rsp), %r10d + movl 108(%rsp), %eax + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 352(%rsp), %rsi + movq 360(%rsp), %r8 + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 368(%rsp), %r10 + movq 376(%rsp), %rax + movl 112(%rsp), %ecx + movl 116(%rsp), %edi + movl 120(%rsp), %r9d + movl 124(%rsp), %r11d + movl %ecx, (%rsi) + movl %edi, (%r8) + movl %r9d, (%r10) + movl %r11d, (%rax) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -112(%rbp), %esi + leal -176(%rbp), %edi + subl $296, %esp + vmovdqa64 %zmm1, -240(%ebp) + vmovdqa64 %zmm2, -304(%ebp) + call HIDDEN_JUMPTARGET(\callee) + movl -240(%ebp), %eax + vmovss -176(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -236(%ebp), %eax + vmovss -172(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -232(%ebp), %eax + vmovss -168(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -228(%ebp), %eax + vmovss -164(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -224(%ebp), %eax + vmovss -160(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -220(%ebp), %eax + vmovss -156(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -216(%ebp), %eax + vmovss -152(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -212(%ebp), %eax + vmovss -148(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -208(%ebp), %eax + vmovss -144(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -204(%ebp), %eax + vmovss -140(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -200(%ebp), %eax + vmovss -136(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -196(%ebp), %eax + vmovss -132(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -192(%ebp), %eax + vmovss -128(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -188(%ebp), %eax + vmovss -124(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -184(%ebp), %eax + vmovss -120(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -180(%ebp), %eax + vmovss -116(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -304(%ebp), %eax + vmovss -112(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -300(%ebp), %eax + vmovss -108(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -296(%ebp), %eax + vmovss -104(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -292(%ebp), %eax + vmovss -100(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -288(%ebp), %eax + vmovss -96(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -284(%ebp), %eax + vmovss -92(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -280(%ebp), %eax + vmovss -88(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -276(%ebp), %eax + vmovss -84(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -272(%ebp), %eax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -268(%ebp), %eax + vmovss -76(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -264(%ebp), %eax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -260(%ebp), %eax + vmovss -68(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -256(%ebp), %eax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -252(%ebp), %eax + vmovss -60(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -248(%ebp), %eax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -244(%ebp), %eax + vmovss -52(%ebp), %xmm0 + vmovss %xmm0, (%eax) + addl $296, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + +ENTRY (_ZGVeN16vvv_sincosf_knl) +WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_knl +END (_ZGVeN16vvv_sincosf_knl) + +ENTRY (_ZGVeN16vvv_sincosf_skx) +WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx END (_ZGVeN16vvv_sincosf_skx) .section .rodata, "a" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S new file mode 100644 index 0000000000..cc718b3a2e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2 +#include "../svml_s_sincosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S deleted file mode 100644 index d72b4049e2..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4vvv_sincosf) - .type _ZGVbN4vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4vvv_sincosf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4vvv_sincosf_sse2(%rip), %rax - ret -END (_ZGVbN4vvv_sincosf) -libmvec_hidden_def (_ZGVbN4vvv_sincosf) - -#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2 -#include "../svml_s_sincosf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c new file mode 100644 index 0000000000..705d96a8fb --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4vvv_sincosf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4vvv_sincosf, __GI__ZGVbN4vvv_sincosf, + __redirect__ZGVbN4vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S index 643fc0ca3b..d758ceeb30 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_s_trig_data.h" .text -ENTRY (_ZGVbN4vvv_sincosf_sse4) +ENTRY (_ZGVbN4vl4l4_sincosf_sse4) /* ALGORITHM DESCRIPTION: @@ -42,7 +42,7 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -241,12 +241,12 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) movzbl %r12b, %r15d movss 132(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 196(%rsp,%r15,8) movss 132(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -255,14 +255,92 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4) movzbl %r12b, %r15d movss 128(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 192(%rsp,%r15,8) movss 128(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 +END (_ZGVbN4vl4l4_sincosf_sse4) +libmvec_hidden_def(_ZGVbN4vl4l4_sincosf_sse4) + +/* vvv version implemented with wrapper to vl4l4 variant. */ +ENTRY (_ZGVbN4vvv_sincosf_sse4) +#ifndef __ILP32__ + subq $104, %rsp + .cfi_def_cfa_offset 112 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + movdqu %xmm3, 48(%rsi) + movdqu %xmm4, 64(%rsi) + call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4) + movq 32(%rsp), %rdx + movq 40(%rsp), %rsi + movq 48(%rsp), %r8 + movq 56(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 64(%rsp), %rax + movq 72(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 80(%rsp), %rdi + movq 88(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movl %r8d, (%r9) + addq $104, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4) + movl 16(%esp), %eax + movss 32(%esp), %xmm0 + movss %xmm0, (%eax) + movl 20(%esp), %eax + movss 36(%esp), %xmm0 + movss %xmm0, (%eax) + movl 24(%esp), %eax + movss 40(%esp), %xmm0 + movss %xmm0, (%eax) + movl 28(%esp), %eax + movss 44(%esp), %xmm0 + movss %xmm0, (%eax) + movl (%esp), %eax + movss 48(%esp), %xmm0 + movss %xmm0, (%eax) + movl 4(%esp), %eax + movss 52(%esp), %xmm0 + movss %xmm0, (%eax) + movl 8(%esp), %eax + movss 56(%esp), %xmm0 + movss %xmm0, (%eax) + movl 12(%esp), %eax + movss 60(%esp), %xmm0 + movss %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN4vvv_sincosf_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S new file mode 100644 index 0000000000..348d1e6619 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized sincosf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper +#include "../svml_s_sincosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S deleted file mode 100644 index 0123b8024e..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8vvv_sincosf) - .type _ZGVdN8vvv_sincosf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVdN8vvv_sincosf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8vvv_sincosf) -libmvec_hidden_def (_ZGVdN8vvv_sincosf) - -#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper -#include "../svml_s_sincosf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c new file mode 100644 index 0000000000..74f3d3f041 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sincosf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8vvv_sincosf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8vvv_sincosf, __GI__ZGVdN8vvv_sincosf, + __redirect__ZGVdN8vvv_sincosf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S index f2a0ba7116..8b4b92dd94 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_s_trig_data.h" .text -ENTRY(_ZGVdN8vvv_sincosf_avx2) +ENTRY (_ZGVdN8vl4l4_sincosf_avx2) /* ALGORITHM DESCRIPTION: @@ -42,7 +42,7 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) b) Calculate 2 polynomials for sin and cos: RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3)))); RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4)))); - c) Swap RS & RC if if first bit of obtained value after + c) Swap RS & RC if first bit of obtained value after Right Shifting is set to 1. Using And, Andnot & Or operations. 3) Destination sign setting a) Set shifted destination sign using XOR operation: @@ -213,12 +213,12 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) vmovss 260(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 324(%rsp,%r15,8) vmovss 260(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -228,14 +228,162 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2) vmovss 256(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 320(%rsp,%r15,8) vmovss 256(%rsp,%r15,8), %xmm0 - call cosf@PLT + call JUMPTARGET(cosf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 -END(_ZGVdN8vvv_sincosf_avx2) +END (_ZGVdN8vl4l4_sincosf_avx2) +libmvec_hidden_def(_ZGVdN8vl4l4_sincosf_avx2) + +/* vvv version implemented with wrapper to vl4l4 variant. */ +ENTRY (_ZGVdN8vvv_sincosf_avx2) +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $192, %rsp + vmovdqu %ymm1, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm2, 96(%rdi) + vmovdqu %ymm3, 128(%rdi) + vmovdqu %ymm4, 160(%rdi) + lea 32(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2) + movq 64(%rsp), %rdx + movq 72(%rsp), %rsi + movq 80(%rsp), %r8 + movq 88(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 96(%rsp), %rax + movq 104(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 112(%rsp), %rdi + movq 120(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 128(%rsp), %r11 + movq 136(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 144(%rsp), %rsi + movq 152(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 160(%rsp), %r10 + movq 168(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 176(%rsp), %rcx + movq 184(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x78,0x6 + leal -48(%rbp), %esi + leal -80(%rbp), %edi + subl $136, %esp + vmovdqa %ymm1, -112(%ebp) + vmovdqa %ymm2, -144(%ebp) + call HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2) + vmovdqa -112(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -76(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -104(%ebp), %rax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -68(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -96(%ebp), %rax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -60(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -88(%ebp), %rax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -52(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + vmovdqa -144(%ebp), %xmm0 + vmovq %xmm0, %rax + vmovss -48(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -44(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -136(%ebp), %rax + vmovss -40(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -36(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -128(%ebp), %rax + vmovss -32(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -28(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + movq -120(%ebp), %rax + vmovss -24(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -20(%ebp), %xmm0 + shrq $32, %rax + vmovss %xmm0, (%eax) + addl $136, %esp + popq %r10 + .cfi_def_cfa 10, 0 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +END (_ZGVdN8vvv_sincosf_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S new file mode 100644 index 0000000000..fa521b9dac --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S @@ -0,0 +1,20 @@ +/* AVX2 version of vectorized sinf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper +#include "../svml_s_sinf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S deleted file mode 100644 index 2212cdd94d..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S +++ /dev/null @@ -1,37 +0,0 @@ -/* Multiple versions of vectorized sinf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVeN16v_sinf) - .type _ZGVeN16v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVeN16v_sinf_skx(%rip), %rax - HAS_ARCH_FEATURE (AVX512DQ_Usable) - jnz 2f - leaq _ZGVeN16v_sinf_knl(%rip), %rax - HAS_ARCH_FEATURE (AVX512F_Usable) - jnz 2f - leaq _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax -2: ret -END (_ZGVeN16v_sinf) - -#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper -#include "../svml_s_sinf16_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c new file mode 100644 index 0000000000..97e5b58284 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 16. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVeN16v_sinf +#include "ifunc-mathvec-avx512.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVeN16v_sinf, __GI__ZGVeN16v_sinf, + __redirect__ZGVeN16v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S index 61d8d3793a..141f747eb5 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX-512. KNL and SKX versions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY(_ZGVeN16v_sinf_knl) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf #else /* @@ -229,21 +229,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf cfi_restore_state movzbl %r12b, %r15d vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_1_8 .LBL_1_12: movzbl %r12b, %r15d vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_1_7 #endif END(_ZGVeN16v_sinf_knl) ENTRY (_ZGVeN16v_sinf_skx) -#ifndef HAVE_AVX512_ASM_SUPPORT +#ifndef HAVE_AVX512DQ_ASM_SUPPORT WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf #else /* @@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf vzeroupper vmovss 1156(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1220(%rsp,%r15,8) jmp .LBL_2_8 @@ -466,7 +466,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf vzeroupper vmovss 1152(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 1216(%rsp,%r15,8) jmp .LBL_2_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S new file mode 100644 index 0000000000..1d2e65c39d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S @@ -0,0 +1,20 @@ +/* SSE2 version of vectorized sinf. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2 +#include "../svml_s_sinf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S deleted file mode 100644 index b31554730d..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sinf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVbN4v_sinf) - .type _ZGVbN4v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - leaq _ZGVbN4v_sinf_sse4(%rip), %rax - HAS_CPU_FEATURE (SSE4_1) - jz 2f - ret -2: leaq _ZGVbN4v_sinf_sse2(%rip), %rax - ret -END (_ZGVbN4v_sinf) -libmvec_hidden_def (_ZGVbN4v_sinf) - -#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2 -#include "../svml_s_sinf4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c new file mode 100644 index 0000000000..93b8bfebbf --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 4. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVbN4v_sinf +#include "ifunc-mathvec-sse4_1.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVbN4v_sinf, __GI__ZGVbN4v_sinf, + __redirect__ZGVbN4v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S index 5268ab1f09..39a4c92235 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -207,7 +207,7 @@ ENTRY(_ZGVbN4v_sinf_sse4) movzbl %r12b, %r15d movss 196(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 260(%rsp,%r15,8) jmp .LBL_1_8 @@ -216,7 +216,7 @@ ENTRY(_ZGVbN4v_sinf_sse4) movzbl %r12b, %r15d movss 192(%rsp,%r15,8), %xmm0 - call sinf@PLT + call JUMPTARGET(sinf) movss %xmm0, 256(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S new file mode 100644 index 0000000000..f2af3a0b4b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S @@ -0,0 +1,20 @@ +/* SSE version of vectorized sinf, vector length is 8. + Copyright (C) 2014-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper +#include "../svml_s_sinf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S deleted file mode 100644 index 47fe0a4adc..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S +++ /dev/null @@ -1,36 +0,0 @@ -/* Multiple versions of vectorized sinf, vector length is 8. - Copyright (C) 2014-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY (_ZGVdN8v_sinf) - .type _ZGVdN8v_sinf, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX -1: leaq _ZGVdN8v_sinf_avx2(%rip), %rax - HAS_ARCH_FEATURE (AVX2_Usable) - jz 2f - ret -2: leaq _ZGVdN8v_sinf_sse_wrapper(%rip), %rax - ret -END (_ZGVdN8v_sinf) -libmvec_hidden_def (_ZGVdN8v_sinf) - -#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper -#include "../svml_s_sinf8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c new file mode 100644 index 0000000000..cf13b6647c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c @@ -0,0 +1,28 @@ +/* Multiple versions of vectorized sinf, vector length is 8. + Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define SYMBOL_NAME _ZGVdN8v_sinf +#include "ifunc-mathvec-avx2.h" + +libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ()); + +#ifdef SHARED +__hidden_ver1 (_ZGVdN8v_sinf, __GI__ZGVdN8v_sinf, + __redirect__ZGVdN8v_sinf) + __attribute__ ((visibility ("hidden"))); +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S index 9fdaadb2e8..5f7a95e9ad 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -201,7 +201,7 @@ ENTRY(_ZGVdN8v_sinf_avx2) vmovss 324(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 388(%rsp,%r15,8) jmp .LBL_1_8 @@ -211,7 +211,7 @@ ENTRY(_ZGVdN8v_sinf_avx2) vmovss 320(%rsp,%r15,8), %xmm0 vzeroupper - call sinf@PLT + call JUMPTARGET(sinf) vmovss %xmm0, 384(%rsp,%r15,8) jmp .LBL_1_7 diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c index 0fbaa3748e..62efed10da 100644 --- a/sysdeps/x86_64/fpu/printf_fphex.c +++ b/sysdeps/x86_64/fpu/printf_fphex.c @@ -1,5 +1,5 @@ /* Print floating point number in hexadecimal notation according to ISO C99. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/s_ceill.S b/sysdeps/x86_64/fpu/s_ceill.S index 910c371d58..8f2bd351f6 100644 --- a/sysdeps/x86_64/fpu/s_ceill.S +++ b/sysdeps/x86_64/fpu/s_ceill.S @@ -5,27 +5,33 @@ * Public domain. */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> ENTRY(__ceill) fldt 8(%rsp) - fstcw -4(%rsp) /* store fpu control word */ + fnstenv -28(%rsp) /* store fpu environment */ /* We use here %edx although only the low 1 bits are defined. But none of the operations should care and they are faster than the 16 bit operations. */ movl $0x0800,%edx /* round towards +oo */ - orl -4(%rsp),%edx + orl -28(%rsp),%edx andl $0xfbff,%edx - movl %edx,-8(%rsp) - fldcw -8(%rsp) /* load modified control word */ + movl %edx,-32(%rsp) + fldcw -32(%rsp) /* load modified control word */ frndint /* round */ - fldcw -4(%rsp) /* restore original control word */ + /* Preserve "invalid" exceptions from sNaN input. */ + fnstsw + andl $0x1, %eax + orl %eax, -24(%rsp) + + fldenv -28(%rsp) /* restore original environment */ ret END (__ceill) -weak_alias (__ceill, ceill) +libm_alias_ldouble (__ceil, ceil) diff --git a/sysdeps/x86_64/fpu/s_copysign.S b/sysdeps/x86_64/fpu/s_copysign.S index 18f568f46f..e2921ce770 100644 --- a/sysdeps/x86_64/fpu/s_copysign.S +++ b/sysdeps/x86_64/fpu/s_copysign.S @@ -1,5 +1,5 @@ /* copy sign, double version. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <machine/asm.h> +#include <libm-alias-double.h> .section .rodata.cst16,"aM",@progbits,16 @@ -47,4 +48,4 @@ ENTRY(__copysign) ret END (__copysign) -weak_alias (__copysign, copysign) +libm_alias_double (__copysign, copysign) diff --git a/sysdeps/x86_64/fpu/s_copysignf.S b/sysdeps/x86_64/fpu/s_copysignf.S index 00a1fabaee..4093e781fe 100644 --- a/sysdeps/x86_64/fpu/s_copysignf.S +++ b/sysdeps/x86_64/fpu/s_copysignf.S @@ -1,5 +1,5 @@ /* copy sign, double version. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <machine/asm.h> +#include <libm-alias-float.h> .section .rodata @@ -42,4 +43,4 @@ ENTRY(__copysignf) retq END (__copysignf) -weak_alias (__copysignf, copysignf) +libm_alias_float (__copysign, copysign) diff --git a/sysdeps/x86_64/fpu/s_copysignl.S b/sysdeps/x86_64/fpu/s_copysignl.S index 2ffd612d65..8616205d38 100644 --- a/sysdeps/x86_64/fpu/s_copysignl.S +++ b/sysdeps/x86_64/fpu/s_copysignl.S @@ -5,6 +5,7 @@ * Public domain. */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> RCSID("$NetBSD: $") @@ -19,4 +20,4 @@ ENTRY(__copysignl) fldt 8(%rsp) ret END (__copysignl) -weak_alias (__copysignl, copysignl) +libm_alias_ldouble (__copysign, copysign) diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S deleted file mode 100644 index 31968e498f..0000000000 --- a/sysdeps/x86_64/fpu/s_cosf.S +++ /dev/null @@ -1,533 +0,0 @@ -/* Optimized cosf function. - Copyright (C) 2012-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return 1.0-|x|. - * 2) if |x| < 2^-27: return 1.0-|x|. - * 3) if |x| < 2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1. - * 4) if |x| < Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * cos(+-0) = 1 not raising inexact, - * cos(subnormal) raises inexact, - * cos(min_normalized) raises inexact, - * cos(normalized) raises inexact, - * cos(Inf) = NaN, raises invalid, sets errno to EDOM, - * cos(NaN) = NaN. - */ - - .text -ENTRY(__cosf) - /* Input: single precision x in %xmm0 */ - - movd %xmm0, %eax /* Bits of x */ - movaps %xmm0, %xmm7 /* Copy of x */ - cvtss2sd %xmm0, %xmm0 /* DP x */ - movss L(SP_ABS_MASK)(%rip), %xmm3 - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - andps %xmm7, %xmm3 /* SP |x| */ - andpd L(DP_ABS_MASK)(%rip), %xmm0 /* DP |x| */ - movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - lea L(PIO4J)(%rip), %rsi - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - addl $2, %eax /* n */ - subsd (%rsi,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd L(DP_C4)(%rip), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - movsd L(DP_C3)(%rip), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - lea L(DP_ONES)(%rip), %rsi - addsd L(DP_C2)(%rip), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - addsd L(DP_C1)(%rip), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd L(DP_C0)(%rip), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd L(DP_ONES)(%rip), %xmm3 - - mulsd (%rsi,%rax,8), %xmm3 /* DP result */ - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(sin_poly): - /* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd L(DP_S4)(%rip), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd L(DP_S3)(%rip), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea L(DP_ONES)(%rip), %rsi - addsd L(DP_S2)(%rip), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd L(DP_S1)(%rip), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd L(DP_S0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd (%rsi,%rax,8), %xmm4 - /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm2, %xmm3 - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - addl $2, %eax /* n */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - lea L(_FPI)(%rip), %rsi - movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -8(%rsi,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd (%rsi,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 8(%rsi,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd L(DP_2POW52)(%rip), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+L(DP_2POW52)(%rip), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - lea L(DP_ZERONE)(%rip), %rsi - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd (%rsi,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $3, %eax /* n=k+3 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd L(DP_C4)(%rip), %xmm3 /* C4 */ - mulsd %xmm0, %xmm3 /* z*C4 */ - movsd L(DP_C3)(%rip), %xmm5 /* C3 */ - mulsd %xmm0, %xmm5 /* z*C3 */ - addsd L(DP_C2)(%rip), %xmm3 /* C2+z*C4 */ - mulsd %xmm0, %xmm3 /* z*(C2+z*C4) */ - addsd L(DP_C1)(%rip), %xmm5 /* C1+z*C3 */ - mulsd %xmm0, %xmm5 /* z*(C1+z*C3) */ - addsd L(DP_C0)(%rip), %xmm3 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm3 /* y*(C0+z*(C2+z*C4)) */ - /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd %xmm5, %xmm3 - /* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd L(DP_ONES)(%rip), %xmm3 - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd L(DP_COS2_1)(%rip), %xmm3 /* DP DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_1 */ - addsd L(DP_COS2_0)(%rip), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */ - /* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */ - addsd L(DP_ONES)(%rip), %xmm3 - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(arg_less_2pn27): - /* Here if |x|<2^-27 */ - andps L(SP_ABS_MASK)(%rip),%xmm7 /* |x| */ - movss L(SP_ONE)(%rip), %xmm0 /* 1.0 */ - subss %xmm7, %xmm0 /* result is 1.0-|x| */ - ret - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Align stack to 16 bytes. */ - subq $8, %rsp - cfi_adjust_cfa_offset (8) - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - addq $8, %rsp - cfi_adjust_cfa_offset (-8) - - movl $EDOM, (%rax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movaps %xmm7, %xmm0 /* load x */ - subss %xmm0, %xmm0 /* Result is NaN */ - ret -END(__cosf) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5. */ - .p2align 3 -L(DP_COS2_0): - .long 0xff5cc6fd,0xbfdfffff - .type L(DP_COS2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_0)) - - .p2align 3 -L(DP_COS2_1): - .long 0xb178dac5,0x3fa55514 - .type L(DP_COS2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_COS2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2),@object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - - .p2align 2 -L(SP_ONE): - .long 0x3f800000 /* 1.0 */ - .type L(SP_ONE), @object - ASM_SIZE_DIRECTIVE(L(SP_ONE)) - -weak_alias(__cosf, cosf) diff --git a/sysdeps/x86_64/fpu/s_fabs.c b/sysdeps/x86_64/fpu/s_fabs.c index d3a313fdf5..d1e17878d4 100644 --- a/sysdeps/x86_64/fpu/s_fabs.c +++ b/sysdeps/x86_64/fpu/s_fabs.c @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,10 +17,11 @@ <http://www.gnu.org/licenses/>. */ #include <math.h> +#include <libm-alias-double.h> double __fabs (double x) { return __builtin_fabs (x); } -weak_alias (__fabs, fabs) +libm_alias_double (__fabs, fabs) diff --git a/sysdeps/x86_64/fpu/s_fabsf.c b/sysdeps/x86_64/fpu/s_fabsf.c index e6dcda9433..2f39228560 100644 --- a/sysdeps/x86_64/fpu/s_fabsf.c +++ b/sysdeps/x86_64/fpu/s_fabsf.c @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,10 +17,11 @@ <http://www.gnu.org/licenses/>. */ #include <math.h> +#include <libm-alias-float.h> float __fabsf (float x) { return __builtin_fabsf (x); } -weak_alias (__fabsf, fabsf) +libm_alias_float (__fabs, fabs) diff --git a/sysdeps/x86_64/fpu/s_fabsl.S b/sysdeps/x86_64/fpu/s_fabsl.S index 6881ff11c7..7f03ecdccb 100644 --- a/sysdeps/x86_64/fpu/s_fabsl.S +++ b/sysdeps/x86_64/fpu/s_fabsl.S @@ -1,5 +1,5 @@ /* Absolute value of floating point number. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-ldouble.h> .text ENTRY(__fabsl) @@ -24,4 +25,4 @@ ENTRY(__fabsl) fabs ret END(__fabsl) -weak_alias (__fabsl, fabsl) +libm_alias_ldouble (__fabs, fabs) diff --git a/sysdeps/x86_64/fpu/s_floorl.S b/sysdeps/x86_64/fpu/s_floorl.S index f9ecc388df..75f8255648 100644 --- a/sysdeps/x86_64/fpu/s_floorl.S +++ b/sysdeps/x86_64/fpu/s_floorl.S @@ -5,26 +5,32 @@ * Public domain. */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> ENTRY(__floorl) fldt 8(%rsp) - fstcw -4(%rsp) /* store fpu control word */ + fnstenv -28(%rsp) /* store fpu environment */ /* We use here %edx although only the low 1 bits are defined. But none of the operations should care and they are faster than the 16 bit operations. */ movl $0x400,%edx /* round towards -oo */ - orl -4(%rsp),%edx + orl -28(%rsp),%edx andl $0xf7ff,%edx - movl %edx,-8(%rsp) - fldcw -8(%rsp) /* load modified control word */ + movl %edx,-32(%rsp) + fldcw -32(%rsp) /* load modified control word */ frndint /* round */ - fldcw -4(%rsp) /* restore original control word */ + /* Preserve "invalid" exceptions from sNaN input. */ + fnstsw + andl $0x1, %eax + orl %eax, -24(%rsp) + + fldenv -28(%rsp) /* restore original environment */ ret END (__floorl) -weak_alias (__floorl, floorl) +libm_alias_ldouble (__floor, floor) diff --git a/sysdeps/x86_64/fpu/s_fmax.S b/sysdeps/x86_64/fpu/s_fmax.S index 02096c0aea..7cd8f1ed10 100644 --- a/sysdeps/x86_64/fpu/s_fmax.S +++ b/sysdeps/x86_64/fpu/s_fmax.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-double.h> .text ENTRY(__fmax) @@ -27,9 +28,26 @@ ENTRY(__fmax) jmp 2f 1: ucomisd %xmm1, %xmm1 // Is xmm1 a NaN? - jp 2f // then return xmm0 + jp 3f + // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling. + movsd %xmm0, -8(%rsp) + testb $0x8, -2(%rsp) + jz 4f movsd %xmm1, %xmm0 // otherwise return xmm1 + ret + +3: // xmm1 is a NaN; xmm0 may or may not be. + ucomisd %xmm0, %xmm0 + jp 4f + // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling. + movsd %xmm1, -8(%rsp) + testb $0x8, -2(%rsp) + jz 4f + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + addsd %xmm1, %xmm0 2: ret END(__fmax) -weak_alias (__fmax, fmax) +libm_alias_double (__fmax, fmax) diff --git a/sysdeps/x86_64/fpu/s_fmaxf.S b/sysdeps/x86_64/fpu/s_fmaxf.S index 28e129701e..9b932fddc2 100644 --- a/sysdeps/x86_64/fpu/s_fmaxf.S +++ b/sysdeps/x86_64/fpu/s_fmaxf.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-float.h> .text ENTRY(__fmaxf) @@ -27,9 +28,26 @@ ENTRY(__fmaxf) jmp 2f 1: ucomiss %xmm1, %xmm1 // Is xmm1 a NaN? - jp 2f // then return xmm0 + jp 3f + // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling. + movss %xmm0, -4(%rsp) + testb $0x40, -2(%rsp) + jz 4f movss %xmm1, %xmm0 // otherwise return xmm1 + ret + +3: // xmm1 is a NaN; xmm0 may or may not be. + ucomiss %xmm0, %xmm0 + jp 4f + // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling. + movss %xmm1, -4(%rsp) + testb $0x40, -2(%rsp) + jz 4f + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + addss %xmm1, %xmm0 2: ret END(__fmaxf) -weak_alias (__fmaxf, fmaxf) +libm_alias_float (__fmax, fmax) diff --git a/sysdeps/x86_64/fpu/s_fmaxl.S b/sysdeps/x86_64/fpu/s_fmaxl.S index f0c2bc0d56..3463a07083 100644 --- a/sysdeps/x86_64/fpu/s_fmaxl.S +++ b/sysdeps/x86_64/fpu/s_fmaxl.S @@ -1,5 +1,5 @@ /* Compute maximum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,22 +18,42 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-ldouble.h> .text ENTRY(__fmaxl) fldt 8(%rsp) // x fldt 24(%rsp) // x : y - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - - fxch - fucomi %st(1), %st + jp 2f fcmovb %st(1), %st fstp %st(1) ret + +2: // Unordered. + fucomi %st(0), %st + jp 3f + // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. + testb $0x40, 15(%rsp) + jz 4f + fstp %st(1) + ret + +3: // st(0) is a NaN; st(1) may or may not be. + fxch + fucomi %st(0), %st + jp 4f + // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. + testb $0x40, 31(%rsp) + jz 4f + fstp %st(1) + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + faddp + ret END(__fmaxl) -weak_alias (__fmaxl, fmaxl) +libm_alias_ldouble (__fmax, fmax) diff --git a/sysdeps/x86_64/fpu/s_fmin.S b/sysdeps/x86_64/fpu/s_fmin.S index fb14e2f3ed..15b6eaed90 100644 --- a/sysdeps/x86_64/fpu/s_fmin.S +++ b/sysdeps/x86_64/fpu/s_fmin.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-double.h> .text ENTRY(__fmin) @@ -27,9 +28,26 @@ ENTRY(__fmin) jmp 2f 1: ucomisd %xmm1, %xmm1 // Is xmm1 a NaN? - jp 2f // then return xmm0 + jp 3f + // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling. + movsd %xmm0, -8(%rsp) + testb $0x8, -2(%rsp) + jz 4f movsd %xmm1, %xmm0 // otherwise return xmm1 + ret + +3: // xmm1 is a NaN; xmm0 may or may not be. + ucomisd %xmm0, %xmm0 + jp 4f + // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling. + movsd %xmm1, -8(%rsp) + testb $0x8, -2(%rsp) + jz 4f + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + addsd %xmm1, %xmm0 2: ret END(__fmin) -weak_alias (__fmin, fmin) +libm_alias_double (__fmin, fmin) diff --git a/sysdeps/x86_64/fpu/s_fminf.S b/sysdeps/x86_64/fpu/s_fminf.S index c8d6d0fd33..28e26aead5 100644 --- a/sysdeps/x86_64/fpu/s_fminf.S +++ b/sysdeps/x86_64/fpu/s_fminf.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>, 2002. @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-float.h> .text ENTRY(__fminf) @@ -27,9 +28,26 @@ ENTRY(__fminf) jmp 2f 1: ucomiss %xmm1, %xmm1 // Is xmm1 a NaN? - jp 2f // then return xmm0 + jp 3f + // xmm0 is a NaN; xmm1 is not. Test if xmm0 is signaling. + movss %xmm0, -4(%rsp) + testb $0x40, -2(%rsp) + jz 4f movss %xmm1, %xmm0 // otherwise return xmm1 + ret + +3: // xmm1 is a NaN; xmm0 may or may not be. + ucomiss %xmm0, %xmm0 + jp 4f + // xmm1 is a NaN; xmm0 is not. Test if xmm1 is signaling. + movss %xmm1, -4(%rsp) + testb $0x40, -2(%rsp) + jz 4f + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + addss %xmm1, %xmm0 2: ret END(__fminf) -weak_alias (__fminf, fminf) +libm_alias_float (__fmin, fmin) diff --git a/sysdeps/x86_64/fpu/s_fminl.S b/sysdeps/x86_64/fpu/s_fminl.S index f1a06d29d7..df81762449 100644 --- a/sysdeps/x86_64/fpu/s_fminl.S +++ b/sysdeps/x86_64/fpu/s_fminl.S @@ -1,5 +1,5 @@ /* Compute minimum of two numbers, regarding NaN as missing argument. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -18,20 +18,42 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-ldouble.h> .text ENTRY(__fminl) fldt 8(%rsp) // x fldt 24(%rsp) // x : y - fucomi %st(0), %st - fcmovu %st(1), %st // now %st contains y if not NaN, x otherwise - fucomi %st(1), %st + jp 2f fcmovnb %st(1), %st fstp %st(1) ret + +2: // Unordered. + fucomi %st(0), %st + jp 3f + // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. + testb $0x40, 15(%rsp) + jz 4f + fstp %st(1) + ret + +3: // st(0) is a NaN; st(1) may or may not be. + fxch + fucomi %st(0), %st + jp 4f + // st(1) is a NaN; st(0) is not. Test if st(1) is signaling. + testb $0x40, 31(%rsp) + jz 4f + fstp %st(1) + ret + +4: // Both arguments are NaNs, or one is a signaling NaN. + faddp + ret END(__fminl) -weak_alias (__fminl, fminl) +libm_alias_ldouble (__fmin, fmin) diff --git a/sysdeps/x86_64/fpu/s_llrint.S b/sysdeps/x86_64/fpu/s_llrint.S index 6634c653ea..7b93724e46 100644 --- a/sysdeps/x86_64/fpu/s_llrint.S +++ b/sysdeps/x86_64/fpu/s_llrint.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.d>, 2002. @@ -19,14 +19,15 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-double.h> .text ENTRY(__llrint) cvtsd2si %xmm0,%rax ret END(__llrint) -weak_alias (__llrint, llrint) +libm_alias_double (__llrint, llrint) #ifndef __ILP32__ strong_alias (__llrint, __lrint) -weak_alias (__llrint, lrint) +libm_alias_double (__llrint, lrint) #endif diff --git a/sysdeps/x86_64/fpu/s_llrintf.S b/sysdeps/x86_64/fpu/s_llrintf.S index 5ac03dffd9..b6088de1ff 100644 --- a/sysdeps/x86_64/fpu/s_llrintf.S +++ b/sysdeps/x86_64/fpu/s_llrintf.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 2002-2016 Free Software Foundation, Inc. + Copyright (C) 2002-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.d>, 2002. @@ -19,14 +19,15 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-float.h> .text ENTRY(__llrintf) cvtss2si %xmm0,%rax ret END(__llrintf) -weak_alias (__llrintf, llrintf) +libm_alias_float (__llrint, llrint) #ifndef __ILP32__ strong_alias (__llrintf, __lrintf) -weak_alias (__llrintf, lrintf) +libm_alias_float (__llrint, lrint) #endif diff --git a/sysdeps/x86_64/fpu/s_llrintl.S b/sysdeps/x86_64/fpu/s_llrintl.S index 5f4d827dff..49f6ff1961 100644 --- a/sysdeps/x86_64/fpu/s_llrintl.S +++ b/sysdeps/x86_64/fpu/s_llrintl.S @@ -1,6 +1,6 @@ /* Round argument to nearest integral value according to current rounding direction. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -18,6 +18,7 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> +#include <libm-alias-ldouble.h> .text ENTRY(__llrintl) @@ -27,8 +28,8 @@ ENTRY(__llrintl) movq -8(%rsp),%rax ret END(__llrintl) -weak_alias (__llrintl, llrintl) +libm_alias_ldouble (__llrint, llrint) #ifndef __ILP32__ strong_alias (__llrintl, __lrintl) -weak_alias (__llrintl, lrintl) +libm_alias_ldouble (__llrint, lrint) #endif diff --git a/sysdeps/x86_64/fpu/s_log1pl.S b/sysdeps/x86_64/fpu/s_log1pl.S index e83f64d3c0..947e5e4552 100644 --- a/sysdeps/x86_64/fpu/s_log1pl.S +++ b/sysdeps/x86_64/fpu/s_log1pl.S @@ -68,6 +68,7 @@ ENTRY(__log1pl) jnz 4b // in case x is ±Inf fstp %st(1) fstp %st(1) + fadd %st(0) ret END (__log1pl) diff --git a/sysdeps/x86_64/fpu/s_nearbyintl.S b/sysdeps/x86_64/fpu/s_nearbyintl.S index 76d41bdd52..80508bdbee 100644 --- a/sysdeps/x86_64/fpu/s_nearbyintl.S +++ b/sysdeps/x86_64/fpu/s_nearbyintl.S @@ -4,15 +4,12 @@ */ /* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>. */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> ENTRY(__nearbyintl) fldt 8(%rsp) fnstenv -28(%rsp) - movl -28(%rsp), %eax - orl $0x20, %eax - movl %eax, -32(%rsp) - fldcw -32(%rsp) frndint fnstsw andl $0x1, %eax @@ -20,4 +17,4 @@ ENTRY(__nearbyintl) fldenv -28(%rsp) ret END (__nearbyintl) -weak_alias (__nearbyintl, nearbyintl) +libm_alias_ldouble (__nearbyint, nearbyint) diff --git a/sysdeps/x86_64/fpu/s_signbit.S b/sysdeps/x86_64/fpu/s_signbit.S index 92a79d3123..becfc646cb 100644 --- a/sysdeps/x86_64/fpu/s_signbit.S +++ b/sysdeps/x86_64/fpu/s_signbit.S @@ -1,5 +1,5 @@ /* Return nonzero value if number is negative. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redha.com>, 2009. diff --git a/sysdeps/x86_64/fpu/s_signbitf.S b/sysdeps/x86_64/fpu/s_signbitf.S index 885645372e..c7be6a6329 100644 --- a/sysdeps/x86_64/fpu/s_signbitf.S +++ b/sysdeps/x86_64/fpu/s_signbitf.S @@ -1,5 +1,5 @@ /* Return nonzero value if number is negative. - Copyright (C) 2009-2016 Free Software Foundation, Inc. + Copyright (C) 2009-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redha.com>, 2009. diff --git a/sysdeps/x86_64/fpu/s_sincosf.S b/sysdeps/x86_64/fpu/s_sincosf.S index 5e7cbe57e3..2086e8ca5c 100644 --- a/sysdeps/x86_64/fpu/s_sincosf.S +++ b/sysdeps/x86_64/fpu/s_sincosf.S @@ -1,5 +1,5 @@ /* Optimized sincosf function. - Copyright (C) 2012-2016 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,8 +17,8 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> +#include <errno.h> +#include <libm-alias-float.h> /* Short algorithm description: * @@ -561,4 +561,6 @@ L(SP_ONE): .type L(SP_ONE), @object ASM_SIZE_DIRECTIVE(L(SP_ONE)) -weak_alias(__sincosf, sincosf) +#ifndef __sincosf +libm_alias_float (__sincos, sincos) +#endif diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S deleted file mode 100644 index c980c6e207..0000000000 --- a/sysdeps/x86_64/fpu/s_sinf.S +++ /dev/null @@ -1,559 +0,0 @@ -/* Optimized sinf function. - Copyright (C) 2012-2016 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#define __need_Emath -#include <bits/errno.h> - -/* Short algorithm description: - * - * 1) if |x| == 0: return x. - * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed. - * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1. - * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). - * 5) if |x| < 9*Pi/4: - * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, - * t=|x|-j*Pi/4. - * 5.2) Reconstruction: - * s = sign(x) * (-1.0)^((n>>2)&1) - * if(n&2 != 0) { - * using cos(t) polynomial for |t|<Pi/4, result is - * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). - * } else { - * using sin(t) polynomial for |t|<Pi/4, result is - * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). - * } - * 6) if |x| < 2^23, large args: - * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 6.2) Reconstruction same as (5.2). - * 7) if |x| >= 2^23, very large args: - * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, - * t=|x|-j*Pi/4. - * 7.2) Reconstruction same as (5.2). - * 8) if x is Inf, return x-x, and set errno=EDOM. - * 9) if x is NaN, return x-x. - * - * Special cases: - * sin(+-0) = +-0 not raising inexact/underflow, - * sin(subnormal) raises inexact/underflow, - * sin(min_normalized) raises inexact/underflow, - * sin(normalized) raises inexact, - * sin(Inf) = NaN, raises invalid, sets errno to EDOM, - * sin(NaN) = NaN. - */ - - .text -ENTRY(__sinf) - /* Input: single precision x in %xmm0 */ - - movd %xmm0, %eax /* Bits of x */ - movaps %xmm0, %xmm7 /* Copy of x */ - cvtss2sd %xmm0, %xmm0 /* DP x */ - movss L(SP_ABS_MASK)(%rip), %xmm3 - movl %eax, %edi /* Copy of x bits */ - andl $0x7fffffff, %eax /* |x| */ - - cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ - jb L(arg_less_pio4) - - /* Here if |x|>=Pi/4 */ - andps %xmm7, %xmm3 /* SP |x| */ - andpd L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */ - movss L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */ - - cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ - jae L(large_args) - - /* Here if Pi/4<=|x|<9*Pi/4 */ - mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ - movl %edi, %ecx /* Load x */ - cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ - lea L(PIO4J)(%rip), %rsi - shrl $31, %ecx /* sign of x */ - addl $1, %eax /* k+1 */ - movl $0x0e, %edx - andl %eax, %edx /* j = (k+1)&0x0e */ - subsd (%rsi,%rdx,8), %xmm0 /* t = |x| - j * Pi/4 */ - -L(reconstruction): - /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ - testl $2, %eax /* n&2 != 0? */ - jz L(sin_poly) - -/*L(cos_poly):*/ - /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) - */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd L(DP_C4)(%rip), %xmm4 /* C4 */ - mulsd %xmm0, %xmm4 /* z*C4 */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - movsd L(DP_C3)(%rip), %xmm3 /* C3 */ - mulsd %xmm0, %xmm3 /* z*C3 */ - lea L(DP_ONES)(%rip), %rsi - addsd L(DP_C2)(%rip), %xmm4 /* C2+z*C4 */ - mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ - addsd L(DP_C1)(%rip), %xmm3 /* C1+z*C3 */ - mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ - addsd L(DP_C0)(%rip), %xmm4 /* C0+z*(C2+z*C4) */ - mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ - - /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd %xmm4, %xmm3 - /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ - addsd L(DP_ONES)(%rip), %xmm3 - - mulsd (%rsi,%rcx,8), %xmm3 /* DP result */ - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(sin_poly): - /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4: - * y = t*t; z = y*y; - * s = sign(x) * (-1.0)^((n>>2)&1) - * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) - */ - - movaps %xmm0, %xmm4 /* t */ - shrl $2, %eax /* n>>2 */ - mulsd %xmm0, %xmm0 /* y=t^2 */ - andl $1, %eax /* (n>>2)&1 */ - movaps %xmm0, %xmm1 /* y */ - xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ - mulsd %xmm0, %xmm0 /* z=t^4 */ - - movsd L(DP_S4)(%rip), %xmm2 /* S4 */ - mulsd %xmm0, %xmm2 /* z*S4 */ - movsd L(DP_S3)(%rip), %xmm3 /* S3 */ - mulsd %xmm0, %xmm3 /* z*S3 */ - lea L(DP_ONES)(%rip), %rsi - addsd L(DP_S2)(%rip), %xmm2 /* S2+z*S4 */ - mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ - addsd L(DP_S1)(%rip), %xmm3 /* S1+z*S3 */ - mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ - addsd L(DP_S0)(%rip), %xmm2 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ - /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ - mulsd (%rsi,%rcx,8), %xmm4 - /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm2, %xmm3 - /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - mulsd %xmm4, %xmm3 - /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(large_args): - /* Here if |x|>=9*Pi/4 */ - cmpl $0x7f800000, %eax /* x is Inf or NaN? */ - jae L(arg_inf_or_nan) - - /* Here if finite |x|>=9*Pi/4 */ - cmpl $0x4b000000, %eax /* |x|<2^23? */ - jae L(very_large_args) - - /* Here if 9*Pi/4<=|x|<2^23 */ - movsd L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */ - mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ - cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ - addl $1, %eax /* k+1 */ - movl %eax, %edx - andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ - cvtsi2sdl %edx, %xmm4 /* DP j */ - movl %edi, %ecx /* Load x */ - movsd L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */ - shrl $31, %ecx /* sign bit of x */ - mulsd %xmm4, %xmm2 /* -j*PIO4HI */ - movsd L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */ - addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ - mulsd %xmm3, %xmm4 /* j*PIO4LO */ - addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ - jmp L(reconstruction) - - .p2align 4 -L(very_large_args): - /* Here if finite |x|>=2^23 */ - - /* bitpos = (ix>>23) - BIAS_32 + 59; */ - shrl $23, %eax /* eb = biased exponent of x */ - /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ - subl $68, %eax - movl $28, %ecx /* %cl=28 */ - movl %eax, %edx /* bitpos copy */ - - /* j = bitpos/28; */ - div %cl /* j in register %al=%ax/%cl */ - movapd %xmm0, %xmm3 /* |x| */ - /* clear unneeded remainder from %ah */ - andl $0xff, %eax - - imull $28, %eax, %ecx /* j*28 */ - lea L(_FPI)(%rip), %rsi - movsd L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */ - movapd %xmm0, %xmm5 /* |x| */ - mulsd -16(%rsi,%rax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ - movapd %xmm0, %xmm1 /* |x| */ - mulsd -8(%rsi,%rax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ - mulsd (%rsi,%rax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ - addl $19, %ecx /* j*28+19 */ - mulsd 8(%rsi,%rax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ - cmpl %ecx, %edx /* bitpos>=j*28+19? */ - jl L(very_large_skip1) - - /* Here if bitpos>=j*28+19 */ - andpd %xmm3, %xmm4 /* HI(tmp3) */ - subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ -L(very_large_skip1): - - movsd L(DP_2POW52)(%rip), %xmm6 - movapd %xmm5, %xmm2 /* tmp2 copy */ - addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ - movl $1, %edx - addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ - movsd 8+L(DP_2POW52)(%rip), %xmm4 - movd %xmm6, %eax /* k = I64_LO(tmp6); */ - addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ - movl %edi, %ecx /* Load x */ - comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ - jbe L(very_large_skip2) - - /* Here if tmp4 > tmp5 */ - subl $1, %eax /* k-- */ - addsd 8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */ -L(very_large_skip2): - - andl %eax, %edx /* k&1 */ - lea L(DP_ZERONE)(%rip), %rsi - subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ - addsd (%rsi,%rdx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ - addsd %xmm2, %xmm3 /* t += tmp2 */ - shrl $31, %ecx /* sign of x */ - addsd %xmm3, %xmm0 /* t += tmp0 */ - addl $1, %eax /* n=k+1 */ - addsd %xmm1, %xmm0 /* t += tmp1 */ - mulsd L(DP_PIO4)(%rip), %xmm0 /* t *= PI04 */ - - jmp L(reconstruction) /* end of very_large_args peth */ - - .p2align 4 -L(arg_less_pio4): - /* Here if |x|<Pi/4 */ - cmpl $0x3d000000, %eax /* |x|<2^-5? */ - jl L(arg_less_2pn5) - - /* Here if 2^-5<=|x|<Pi/4 */ - movaps %xmm0, %xmm3 /* x */ - mulsd %xmm0, %xmm0 /* y=x^2 */ - movaps %xmm0, %xmm1 /* y */ - mulsd %xmm0, %xmm0 /* z=x^4 */ - movsd L(DP_S4)(%rip), %xmm4 /* S4 */ - mulsd %xmm0, %xmm4 /* z*S4 */ - movsd L(DP_S3)(%rip), %xmm5 /* S3 */ - mulsd %xmm0, %xmm5 /* z*S3 */ - addsd L(DP_S2)(%rip), %xmm4 /* S2+z*S4 */ - mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */ - addsd L(DP_S1)(%rip), %xmm5 /* S1+z*S3 */ - mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */ - addsd L(DP_S0)(%rip), %xmm4 /* S0+z*(S2+z*S4) */ - mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ - mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ - mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ - /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm5, %xmm4 - /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ - addsd %xmm4, %xmm3 - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(arg_less_2pn5): - /* Here if |x|<2^-5 */ - cmpl $0x32000000, %eax /* |x|<2^-27? */ - jl L(arg_less_2pn27) - - /* Here if 2^-27<=|x|<2^-5 */ - movaps %xmm0, %xmm1 /* DP x */ - mulsd %xmm0, %xmm0 /* DP x^2 */ - movsd L(DP_SIN2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ - addsd L(DP_SIN2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */ - mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ - mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ - cvtsd2ss %xmm3, %xmm0 /* SP result */ - ret - - .p2align 4 -L(arg_less_2pn27): - cmpl $0, %eax /* x=0? */ - je L(arg_zero) /* in case x=0 return sin(+-0)==+-0 */ - /* Here if |x|<2^-27 */ - /* - * Special cases here: - * sin(subnormal) raises inexact/underflow - * sin(min_normalized) raises inexact/underflow - * sin(normalized) raises inexact - */ - movaps %xmm0, %xmm3 /* Copy of DP x */ - mulsd L(DP_SMALL)(%rip), %xmm0 /* x*DP_SMALL */ - subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */ - cvtsd2ss %xmm3, %xmm0 /* Result converted to SP */ - ret - - .p2align 4 -L(arg_zero): - movaps %xmm7, %xmm0 /* SP x */ - ret - - .p2align 4 -L(arg_inf_or_nan): - /* Here if |x| is Inf or NAN */ - jne L(skip_errno_setting) /* in case of x is NaN */ - - /* Align stack to 16 bytes. */ - subq $8, %rsp - cfi_adjust_cfa_offset (8) - /* Here if x is Inf. Set errno to EDOM. */ - call JUMPTARGET(__errno_location) - addq $8, %rsp - cfi_adjust_cfa_offset (-8) - - movl $EDOM, (%rax) - - .p2align 4 -L(skip_errno_setting): - /* Here if |x| is Inf or NAN. Continued. */ - movaps %xmm7, %xmm0 /* load x */ - subss %xmm0, %xmm0 /* Result is NaN */ - ret -END(__sinf) - - .section .rodata, "a" - .p2align 3 -L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ - .long 0x00000000,0x00000000 - .long 0x54442d18,0x3fe921fb - .long 0x54442d18,0x3ff921fb - .long 0x7f3321d2,0x4002d97c - .long 0x54442d18,0x400921fb - .long 0x2955385e,0x400f6a7a - .long 0x7f3321d2,0x4012d97c - .long 0xe9bba775,0x4015fdbb - .long 0x54442d18,0x401921fb - .long 0xbeccb2bb,0x401c463a - .long 0x2955385e,0x401f6a7a - .type L(PIO4J), @object - ASM_SIZE_DIRECTIVE(L(PIO4J)) - - .p2align 3 -L(_FPI): /* 4/Pi broken into sum of positive DP values */ - .long 0x00000000,0x00000000 - .long 0x6c000000,0x3ff45f30 - .long 0x2a000000,0x3e3c9c88 - .long 0xa8000000,0x3c54fe13 - .long 0xd0000000,0x3aaf47d4 - .long 0x6c000000,0x38fbb81b - .long 0xe0000000,0x3714acc9 - .long 0x7c000000,0x3560e410 - .long 0x56000000,0x33bca2c7 - .long 0xac000000,0x31fbd778 - .long 0xe0000000,0x300b7246 - .long 0xe8000000,0x2e5d2126 - .long 0x48000000,0x2c970032 - .long 0xe8000000,0x2ad77504 - .long 0xe0000000,0x290921cf - .long 0xb0000000,0x274deb1c - .long 0xe0000000,0x25829a73 - .long 0xbe000000,0x23fd1046 - .long 0x10000000,0x2224baed - .long 0x8e000000,0x20709d33 - .long 0x80000000,0x1e535a2f - .long 0x64000000,0x1cef904e - .long 0x30000000,0x1b0d6398 - .long 0x24000000,0x1964ce7d - .long 0x16000000,0x17b908bf - .type L(_FPI), @object - ASM_SIZE_DIRECTIVE(L(_FPI)) - -/* Coefficients of polynomial - for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */ - .p2align 3 -L(DP_SIN2_0): - .long 0x5543d49d,0xbfc55555 - .type L(DP_SIN2_0), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_0)) - - .p2align 3 -L(DP_SIN2_1): - .long 0x75cec8c5,0x3f8110f4 - .type L(DP_SIN2_1), @object - ASM_SIZE_DIRECTIVE(L(DP_SIN2_1)) - - .p2align 3 -L(DP_ZERONE): - .long 0x00000000,0x00000000 /* 0.0 */ - .long 0x00000000,0xbff00000 /* 1.0 */ - .type L(DP_ZERONE), @object - ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) - - .p2align 3 -L(DP_ONES): - .long 0x00000000,0x3ff00000 /* +1.0 */ - .long 0x00000000,0xbff00000 /* -1.0 */ - .type L(DP_ONES), @object - ASM_SIZE_DIRECTIVE(L(DP_ONES)) - -/* Coefficients of polynomial - for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_S3): - .long 0x64e6b5b4,0x3ec71d72 - .type L(DP_S3), @object - ASM_SIZE_DIRECTIVE(L(DP_S3)) - - .p2align 3 -L(DP_S1): - .long 0x10c2688b,0x3f811111 - .type L(DP_S1), @object - ASM_SIZE_DIRECTIVE(L(DP_S1)) - - .p2align 3 -L(DP_S4): - .long 0x1674b58a,0xbe5a947e - .type L(DP_S4), @object - ASM_SIZE_DIRECTIVE(L(DP_S4)) - - .p2align 3 -L(DP_S2): - .long 0x8b4bd1f9,0xbf2a019f - .type L(DP_S2), @object - ASM_SIZE_DIRECTIVE(L(DP_S2)) - - .p2align 3 -L(DP_S0): - .long 0x55551cd9,0xbfc55555 - .type L(DP_S0), @object - ASM_SIZE_DIRECTIVE(L(DP_S0)) - - .p2align 3 -L(DP_SMALL): - .long 0x00000000,0x3cd00000 /* 2^(-50) */ - .type L(DP_SMALL), @object - ASM_SIZE_DIRECTIVE(L(DP_SMALL)) - -/* Coefficients of polynomial - for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ - .p2align 3 -L(DP_C3): - .long 0x9ac43cc0,0x3efa00eb - .type L(DP_C3), @object - ASM_SIZE_DIRECTIVE(L(DP_C3)) - - .p2align 3 -L(DP_C1): - .long 0x545c50c7,0x3fa55555 - .type L(DP_C1), @object - ASM_SIZE_DIRECTIVE(L(DP_C1)) - - .p2align 3 -L(DP_C4): - .long 0xdd8844d7,0xbe923c97 - .type L(DP_C4), @object - ASM_SIZE_DIRECTIVE(L(DP_C4)) - - .p2align 3 -L(DP_C2): - .long 0x348b6874,0xbf56c16b - .type L(DP_C2), @object - ASM_SIZE_DIRECTIVE(L(DP_C2)) - - .p2align 3 -L(DP_C0): - .long 0xfffe98ae,0xbfdfffff - .type L(DP_C0), @object - ASM_SIZE_DIRECTIVE(L(DP_C0)) - - .p2align 3 -L(DP_PIO4): - .long 0x54442d18,0x3fe921fb /* Pi/4 */ - .type L(DP_PIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4)) - - .p2align 3 -L(DP_2POW52): - .long 0x00000000,0x43300000 /* +2^52 */ - .long 0x00000000,0xc3300000 /* -2^52 */ - .type L(DP_2POW52), @object - ASM_SIZE_DIRECTIVE(L(DP_2POW52)) - - .p2align 3 -L(DP_INVPIO4): - .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ - .type L(DP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) - - .p2align 3 -L(DP_PIO4HI): - .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ - .type L(DP_PIO4HI), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) - - .p2align 3 -L(DP_PIO4LO): - .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ - .type L(DP_PIO4LO), @object - ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) - - .p2align 2 -L(SP_INVPIO4): - .long 0x3fa2f983 /* 4/Pi */ - .type L(SP_INVPIO4), @object - ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) - - .p2align 4 -L(DP_ABS_MASK): /* Mask for getting DP absolute value */ - .long 0xffffffff,0x7fffffff - .long 0xffffffff,0x7fffffff - .type L(DP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) - - .p2align 3 -L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ - .long 0x00000000,0xffffffff - .type L(DP_HI_MASK),@object - ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) - - .p2align 4 -L(SP_ABS_MASK): /* Mask for getting SP absolute value */ - .long 0x7fffffff,0x7fffffff - .long 0x7fffffff,0x7fffffff - .type L(SP_ABS_MASK), @object - ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK)) - -weak_alias(__sinf, sinf) diff --git a/sysdeps/x86_64/fpu/s_truncl.S b/sysdeps/x86_64/fpu/s_truncl.S index c37cf00241..22427ece00 100644 --- a/sysdeps/x86_64/fpu/s_truncl.S +++ b/sysdeps/x86_64/fpu/s_truncl.S @@ -1,5 +1,5 @@ /* Truncate long double value. - Copyright (C) 1997-2016 Free Software Foundation, Inc. + Copyright (C) 1997-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -17,17 +17,21 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +#include <libm-alias-ldouble.h> #include <machine/asm.h> ENTRY(__truncl) fldt 8(%rsp) - fstcw -4(%rsp) + fnstenv -28(%rsp) movl $0xc00, %edx - orl -4(%rsp), %edx - movl %edx, -8(%rsp) - fldcw -8(%rsp) + orl -28(%rsp), %edx + movl %edx, -32(%rsp) + fldcw -32(%rsp) frndint - fldcw -4(%rsp) + fnstsw + andl $0x1, %eax + orl %eax, -24(%rsp) + fldenv -28(%rsp) ret END(__truncl) -weak_alias (__truncl, truncl) +libm_alias_ldouble (__trunc, trunc) diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S index 7f62d29917..111548367b 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S index b92ff13b86..28b31d510c 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S index a3da721e35..988d0650ca 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S @@ -1,5 +1,5 @@ /* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S index e5d986d11a..830776b5d2 100644 --- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S @@ -1,5 +1,5 @@ /* Function cos vectorized with AVX-512, wrapper to AVX2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S index 9e511037a1..e19ddb7f3b 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,7 +21,7 @@ .text ENTRY (_ZGVbN2v_exp) -WRAPPER_IMPL_SSE2 exp +WRAPPER_IMPL_SSE2 __exp_finite END (_ZGVbN2v_exp) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S index 8cac8adbc7..341fea8f30 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S index 1a0fbf574a..39e6fcf228 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S @@ -1,5 +1,5 @@ /* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S index 2486e888a4..94edc01fcb 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S @@ -1,5 +1,5 @@ /* Function exp vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S index 6d1acbdd21..5e229c9bcc 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp_data.S +++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S @@ -1,5 +1,5 @@ /* Data for vector function exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h index f993403d47..a3721ce137 100644 --- a/sysdeps/x86_64/fpu/svml_d_exp_data.h +++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function exp. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S index 8ea40fee56..41522f2069 100644 --- a/sysdeps/x86_64/fpu/svml_d_log2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,7 +21,7 @@ .text ENTRY (_ZGVbN2v_log) -WRAPPER_IMPL_SSE2 log +WRAPPER_IMPL_SSE2 __log_finite END (_ZGVbN2v_log) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S index 72813d8921..5857b45aa0 100644 --- a/sysdeps/x86_64/fpu/svml_d_log4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S index 6ca1139931..bab3ba9877 100644 --- a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S @@ -1,5 +1,5 @@ /* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S index 6850fd9a44..bb3523ee0d 100644 --- a/sysdeps/x86_64/fpu/svml_d_log8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S @@ -1,5 +1,5 @@ /* Function log vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S index 9ab541b23f..0514551ccf 100644 --- a/sysdeps/x86_64/fpu/svml_d_log_data.S +++ b/sysdeps/x86_64/fpu/svml_d_log_data.S @@ -1,5 +1,5 @@ /* Data for function log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h index 30c2b54a4b..a317c7b845 100644 --- a/sysdeps/x86_64/fpu/svml_d_log_data.h +++ b/sysdeps/x86_64/fpu/svml_d_log_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function log. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/svml_d_pow2_core.S index b25515c825..b2451b2ed5 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow2_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,7 +21,7 @@ .text ENTRY (_ZGVbN2vv_pow) -WRAPPER_IMPL_SSE2_ff pow +WRAPPER_IMPL_SSE2_ff __pow_finite END (_ZGVbN2vv_pow) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/svml_d_pow4_core.S index 547993799e..1520ba1d45 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow4_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S index 4e4e9867b4..d4b265c91a 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S @@ -1,5 +1,5 @@ /* Function pow vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/svml_d_pow8_core.S index 372e5a9c83..15292ccebd 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_pow8_core.S @@ -1,5 +1,5 @@ /* Function pow vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.S b/sysdeps/x86_64/fpu/svml_d_pow_data.S index 8481f95455..9e5f99c25e 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow_data.S +++ b/sysdeps/x86_64/fpu/svml_d_pow_data.S @@ -1,5 +1,5 @@ /* Data for function pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.h b/sysdeps/x86_64/fpu/svml_d_pow_data.h index 239ba96984..55b573b2a7 100644 --- a/sysdeps/x86_64/fpu/svml_d_pow_data.h +++ b/sysdeps/x86_64/fpu/svml_d_pow_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function pow. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/svml_d_sin2_core.S index f6ec13104b..6485e0819f 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin2_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/svml_d_sin4_core.S index 95a1dec6f6..7c7c426451 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin4_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S index 29d1526a12..a8200dfc58 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S @@ -1,5 +1,5 @@ /* Function sin vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/svml_d_sin8_core.S index abd86b3d98..7f07a41ba1 100644 --- a/sysdeps/x86_64/fpu/svml_d_sin8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sin8_core.S @@ -1,5 +1,5 @@ /* Function sin vectorized with AVX-512, wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S index 74afa0a677..ebf9e25aca 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,89 @@ #include "svml_d_wrapper_impl.h" .text -ENTRY (_ZGVbN2vvv_sincos) +ENTRY (_ZGVbN2vl8l8_sincos) WRAPPER_IMPL_SSE2_fFF sincos +END (_ZGVbN2vl8l8_sincos) +libmvec_hidden_def (_ZGVbN2vl8l8_sincos) + +/* SSE2 ISA version as wrapper to scalar (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_SSE2_fFF_vvv callee +#ifndef __ILP32__ + subq $88, %rsp + cfi_adjust_cfa_offset(88) + movaps %xmm0, 64(%rsp) + lea (%rsp), %rdi + movdqa %xmm1, 32(%rdi) + lea 16(%rsp), %rsi + movdqa %xmm2, 32(%rsi) + call JUMPTARGET(\callee) + movsd 72(%rsp), %xmm0 + lea 8(%rsp), %rdi + lea 24(%rsp), %rsi + call JUMPTARGET(\callee) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $88, %rsp + cfi_adjust_cfa_offset(-88) + ret +#else + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + pushq %rbx + .cfi_def_cfa_offset 24 + .cfi_offset 3, -24 + subl $88, %esp + .cfi_def_cfa_offset 112 + leal 64(%rsp), %esi + movaps %xmm1, 32(%esp) + leal 48(%rsp), %edi + movaps %xmm2, 16(%esp) + movq %rsi, %rbp + movq %rdi, %rbx + movaps %xmm0, (%esp) + call JUMPTARGET(\callee) + movupd 8(%esp), %xmm0 + leal 8(%rbp), %esi + leal 8(%rbx), %edi + call JUMPTARGET(\callee) + movdqa 32(%esp), %xmm1 + movsd 48(%esp), %xmm0 + movq %xmm1, %rax + movdqa 16(%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 64(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 72(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $88, %esp + .cfi_def_cfa_offset 24 + popq %rbx + .cfi_def_cfa_offset 16 + popq %rbp + .cfi_def_cfa_offset 8 + ret +#endif +.endm + +ENTRY (_ZGVbN2vvv_sincos) +WRAPPER_IMPL_SSE2_fFF_vvv sincos END (_ZGVbN2vvv_sincos) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S index 2c0b011fb3..626a2b3a7b 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,131 @@ #include "svml_d_wrapper_impl.h" .text +ENTRY (_ZGVdN4vl8l8_sincos) +WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos +END (_ZGVdN4vl8l8_sincos) +libmvec_hidden_def (_ZGVdN4vl8l8_sincos) + +/* AVX2 ISA version as wrapper to SSE ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX2_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $160, %rsp + vmovupd %ymm0, 128(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm1, 64(%rdi) + vmovdqu %ymm2, 96(%rdi) + lea 32(%rsp), %rsi + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + vmovupd 144(%rsp), %xmm0 + lea 16(%rsp), %rdi + lea 48(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 64(%rsp), %rdx + movq 96(%rsp), %rsi + movq 72(%rsp), %r8 + movq 104(%rsp), %r10 + movq (%rsp), %rax + movq 32(%rsp), %rcx + movq 8(%rsp), %rdi + movq 40(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 80(%rsp), %rax + movq 112(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 88(%rsp), %rdi + movq 120(%rsp), %r9 + movq 16(%rsp), %r11 + movq 48(%rsp), %rdx + movq 24(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %r8, (%r9) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -80(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -112(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $152, %esp + vmovaps %xmm1, -128(%ebp) + vmovaps %xmm2, -144(%ebp) + vmovapd %ymm0, -176(%ebp) + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + leal 16(%r12), %esi + vmovapd -160(%ebp), %xmm0 + leal 16(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movq -128(%ebp), %rax + vmovsd -112(%ebp), %xmm0 + vmovdqa -128(%ebp), %xmm5 + vmovdqa -144(%ebp), %xmm1 + vmovsd %xmm0, (%eax) + vmovsd -104(%ebp), %xmm0 + vpextrd $1, %xmm5, %eax + vmovsd %xmm0, (%eax) + movq -120(%ebp), %rax + vmovsd -96(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -88(%ebp), %xmm0 + vpextrd $3, %xmm5, %eax + vmovsd %xmm0, (%eax) + movq -144(%ebp), %rax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -72(%ebp), %xmm0 + vpextrd $1, %xmm1, %eax + vmovsd %xmm0, (%eax) + movq -136(%ebp), %rax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -56(%ebp), %xmm0 + vpextrd $3, %xmm1, %eax + vmovsd %xmm0, (%eax) + addl $152, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + ENTRY (_ZGVdN4vvv_sincos) -WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos +WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos END (_ZGVdN4vvv_sincos) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S index e4320a97c7..4a5d4f637a 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S @@ -1,5 +1,5 @@ /* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,6 +20,124 @@ #include "svml_d_wrapper_impl.h" .text +ENTRY (_ZGVcN4vl8l8_sincos) +WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos +END (_ZGVcN4vl8l8_sincos) + +/* AVX ISA version as wrapper to SSE ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + movq %rsp, %rbp + andq $-32, %rsp + subq $160, %rsp + vmovupd %ymm0, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %xmm1, 96(%rdi) + vmovdqu %xmm2, 112(%rdi) + vmovdqu %xmm3, 128(%rdi) + vmovdqu %xmm4, 144(%rdi) + lea 32(%rsp), %rsi + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + vmovdqu 80(%rsp), %xmm0 + lea 16(%rsp), %rdi + lea 48(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 96(%rsp), %rdx + movq 104(%rsp), %rsi + movq 112(%rsp), %r8 + movq 120(%rsp), %r10 + movq (%rsp), %rax + movq 8(%rsp), %rcx + movq 16(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 128(%rsp), %rax + movq 136(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 144(%rsp), %rdi + movq 152(%rsp), %r9 + movq 32(%rsp), %r11 + movq 40(%rsp), %rdx + movq 48(%rsp), %rsi + movq 56(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %r8, (%r9) + movq %rbp, %rsp + popq %rbp + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -80(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -112(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $152, %esp + vmovaps %xmm1, -128(%ebp) + vmovaps %xmm2, -144(%ebp) + vmovapd %ymm0, -176(%ebp) + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + leal 16(%r12), %esi + vmovupd -160(%ebp), %xmm0 + leal 16(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movq -128(%ebp), %rax + vmovsd -112(%ebp), %xmm0 + vmovdqa -128(%ebp), %xmm5 + vmovdqa -144(%ebp), %xmm1 + vmovsd %xmm0, (%eax) + vmovsd -104(%ebp), %xmm0 + vpextrd $1, %xmm5, %eax + vmovsd %xmm0, (%eax) + movq -120(%ebp), %rax + vmovsd -96(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -88(%ebp), %xmm0 + vpextrd $3, %xmm5, %eax + vmovsd %xmm0, (%eax) + movq -144(%ebp), %rax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -72(%ebp), %xmm0 + vpextrd $1, %xmm1, %eax + vmovsd %xmm0, (%eax) + movq -136(%ebp), %rax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + vmovsd -56(%ebp), %xmm0 + vpextrd $3, %xmm1, %eax + vmovsd %xmm0, (%eax) + addl $152, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + ENTRY (_ZGVcN4vvv_sincos) -WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos +WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos END (_ZGVcN4vvv_sincos) diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S index 68d490e5bc..7cf453872b 100644 --- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S +++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,6 +20,172 @@ #include "svml_d_wrapper_impl.h" .text +ENTRY (_ZGVeN8vl8l8_sincos) +WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos +END (_ZGVeN8vl8l8_sincos) + +/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX512_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $320, %rsp + vmovups %zmm0, 256(%rsp) + lea (%rsp), %rdi + vmovups %zmm1, 128(%rdi) + vmovups %zmm2, 192(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + vmovdqu 288(%rsp), %ymm0 + lea 32(%rsp), %rdi + lea 96(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 192(%rsp), %rsi + movq 136(%rsp), %r8 + movq 200(%rsp), %r10 + movq (%rsp), %rax + movq 64(%rsp), %rcx + movq 8(%rsp), %rdi + movq 72(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq 144(%rsp), %rax + movq 208(%rsp), %rcx + movq %rdi, (%r8) + movq %r9, (%r10) + movq 152(%rsp), %rdi + movq 216(%rsp), %r9 + movq 16(%rsp), %r11 + movq 80(%rsp), %rdx + movq 24(%rsp), %rsi + movq 88(%rsp), %r8 + movq %r11, (%rax) + movq %rdx, (%rcx) + movq 160(%rsp), %r11 + movq 224(%rsp), %rdx + movq %rsi, (%rdi) + movq %r8, (%r9) + movq 168(%rsp), %rsi + movq 232(%rsp), %r8 + movq 32(%rsp), %r10 + movq 96(%rsp), %rax + movq 40(%rsp), %rcx + movq 104(%rsp), %rdi + movq %r10, (%r11) + movq %rax, (%rdx) + movq 176(%rsp), %r10 + movq 240(%rsp), %rax + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq 184(%rsp), %rcx + movq 248(%rsp), %rdi + movq 48(%rsp), %r9 + movq 112(%rsp), %r11 + movq 56(%rsp), %rdx + movq 120(%rsp), %rsi + movq %r9, (%r10) + movq %r11, (%rax) + movq %rdx, (%rcx) + movq %rsi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -112(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -176(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $280, %esp + vmovdqa %ymm1, -208(%ebp) + vmovdqa %ymm2, -240(%ebp) + vmovapd %zmm0, -304(%ebp) + call HIDDEN_JUMPTARGET(\callee) + leal 32(%r12), %esi + vmovupd -272(%ebp), %ymm0 + leal 32(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movl -208(%ebp), %eax + vmovsd -176(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -204(%ebp), %eax + vmovsd -168(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -200(%ebp), %eax + vmovsd -160(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -196(%ebp), %eax + vmovsd -152(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -192(%ebp), %eax + vmovsd -144(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -188(%ebp), %eax + vmovsd -136(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -184(%ebp), %eax + vmovsd -128(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -180(%ebp), %eax + vmovsd -120(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -240(%ebp), %eax + vmovsd -112(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -236(%ebp), %eax + vmovsd -104(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -232(%ebp), %eax + vmovsd -96(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -228(%ebp), %eax + vmovsd -88(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -224(%ebp), %eax + vmovsd -80(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -220(%ebp), %eax + vmovsd -72(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -216(%ebp), %eax + vmovsd -64(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + movl -212(%ebp), %eax + vmovsd -56(%ebp), %xmm0 + vmovsd %xmm0, (%eax) + addl $280, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + ENTRY (_ZGVeN8vvv_sincos) -WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos +WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos END (_ZGVeN8vvv_sincos) diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S index 887dacee91..2b148325fc 100644 --- a/sysdeps/x86_64/fpu/svml_d_trig_data.S +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S @@ -1,5 +1,5 @@ /* Data for vectorized sin, cos, sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h index 4617b5e0c3..b9bb5dc6af 100644 --- a/sysdeps/x86_64/fpu/svml_d_trig_data.h +++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized sin, cos, sincos. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h index 54f4f58371..d8452e0c2b 100644 --- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h @@ -1,5 +1,5 @@ /* Wrapper implementations of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,10 +21,10 @@ subq $40, %rsp cfi_adjust_cfa_offset(40) movaps %xmm0, (%rsp) - call \callee@PLT + call JUMPTARGET(\callee) movsd %xmm0, 16(%rsp) movsd 8(%rsp), %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movsd 16(%rsp), %xmm1 movsd %xmm0, 24(%rsp) unpcklpd %xmm0, %xmm1 @@ -40,11 +40,11 @@ cfi_adjust_cfa_offset(56) movaps %xmm0, (%rsp) movaps %xmm1, 16(%rsp) - call \callee@PLT + call JUMPTARGET(\callee) movsd %xmm0, 32(%rsp) movsd 8(%rsp), %xmm0 movsd 24(%rsp), %xmm1 - call \callee@PLT + call JUMPTARGET(\callee) movsd 32(%rsp), %xmm1 movsd %xmm0, 40(%rsp) unpcklpd %xmm0, %xmm1 @@ -69,7 +69,7 @@ leaq 16(%rsp), %rsi leaq 24(%rsp), %rdi movaps %xmm0, (%rsp) - call \callee@PLT + call JUMPTARGET(\callee) leaq 16(%rsp), %rsi leaq 24(%rsp), %rdi movsd 24(%rsp), %xmm0 @@ -79,7 +79,7 @@ movsd 16(%rsp), %xmm0 movsd %xmm0, (%rbx) movapd %xmm1, %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movsd 24(%rsp), %xmm0 movsd %xmm0, 8(%rbp) movsd 16(%rsp), %xmm0 @@ -201,29 +201,14 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $128, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 + vmovups %zmm0, (%rsp) vmovupd (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 64(%rsp) vmovupd 32(%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 96(%rsp) -/* Below is encoding for vmovups 64(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x01 + vmovups 64(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -241,23 +226,8 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $192, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 -/* Below is encoding for vmovups %zmm1, 64(%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x4c - .byte 0x24 - .byte 0x01 + vmovups %zmm0, (%rsp) + vmovups %zmm1, 64(%rsp) vmovupd (%rsp), %ymm0 vmovupd 64(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) @@ -266,15 +236,7 @@ vmovupd 96(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 160(%rsp) -/* Below is encoding for vmovups 128(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x02 + vmovups 128(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -299,14 +261,7 @@ cfi_rel_offset (%r13, 0) subq $176, %rsp movq %rsi, %r13 -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 + vmovups %zmm0, (%rsp) movq %rdi, %r12 vmovupd (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) diff --git a/sysdeps/x86_64/fpu/svml_finite_alias.S b/sysdeps/x86_64/fpu/svml_finite_alias.S index 2dcfc37590..21a9d6d2ee 100644 --- a/sysdeps/x86_64/fpu/svml_finite_alias.S +++ b/sysdeps/x86_64/fpu/svml_finite_alias.S @@ -2,7 +2,7 @@ aliases in libmvec.so while compiler creates the vector names based on scalar asm name. Corresponding discussion is at <https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S index 9ca4fbfaa8..d1a4647082 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S index 363090c54a..d58ccecc09 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with SSE2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S index 26a6a4e4d6..f9dc74fc49 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S @@ -1,5 +1,5 @@ /* Function cosf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S index 6c210d98ce..45f14e23df 100644 --- a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S @@ -1,5 +1,5 @@ /* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/svml_s_expf16_core.S index d8eecac674..4e18b6f544 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf16_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/svml_s_expf4_core.S index 65b5d1a3ce..a2a6209621 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf4_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVbN4v_expf) -WRAPPER_IMPL_SSE2 expf +WRAPPER_IMPL_SSE2 __expf_finite END (_ZGVbN4v_expf) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/svml_s_expf8_core.S index e3cf975bf6..46297208cd 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_expf8_core.S @@ -1,5 +1,5 @@ /* Function expf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S index 90469d7dcf..1210dcf885 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S @@ -1,5 +1,5 @@ /* Function expf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.S b/sysdeps/x86_64/fpu/svml_s_expf_data.S index 4b644082b6..a1cb6e7591 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_expf_data.S @@ -1,5 +1,5 @@ /* Data for function expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.h b/sysdeps/x86_64/fpu/svml_s_expf_data.h index 3610633c96..56a1d8bdf6 100644 --- a/sysdeps/x86_64/fpu/svml_s_expf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_expf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vector function expf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/svml_s_logf16_core.S index cc2e97df78..e1f4b0cf0c 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf16_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/svml_s_logf4_core.S index 195f328d92..496b93ffa6 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf4_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -22,7 +22,7 @@ .text ENTRY (_ZGVbN4v_logf) -WRAPPER_IMPL_SSE2 logf +WRAPPER_IMPL_SSE2 __logf_finite END (_ZGVbN4v_logf) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/svml_s_logf8_core.S index 8bb6926667..f0ccee7205 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_logf8_core.S @@ -1,5 +1,5 @@ /* Function logf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S index c2efba23f2..1ddd0381cd 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S @@ -1,5 +1,5 @@ /* Function logf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.S b/sysdeps/x86_64/fpu/svml_s_logf_data.S index a5675f5c7a..154f98c2e0 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_logf_data.S @@ -1,5 +1,5 @@ /* Data for vector function logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.h b/sysdeps/x86_64/fpu/svml_s_logf_data.h index 619d5c4bd1..82a9903b10 100644 --- a/sysdeps/x86_64/fpu/svml_s_logf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_logf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized function logf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/svml_s_powf16_core.S index cb52af0c6b..0859996d0a 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf16_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/svml_s_powf4_core.S index 88fae60892..4276e6ea28 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf4_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,7 +21,7 @@ .text ENTRY (_ZGVbN4vv_powf) -WRAPPER_IMPL_SSE2_ff powf +WRAPPER_IMPL_SSE2_ff __powf_finite END (_ZGVbN4vv_powf) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/svml_s_powf8_core.S index 8ea44897c1..764dc99ee7 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_powf8_core.S @@ -1,5 +1,5 @@ /* Function powf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S index b5e4e5e6ef..8bb1ef22fd 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S @@ -1,5 +1,5 @@ /* Function powf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.S b/sysdeps/x86_64/fpu/svml_s_powf_data.S index fc1a3d9390..74a31abd1e 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf_data.S +++ b/sysdeps/x86_64/fpu/svml_s_powf_data.S @@ -1,5 +1,5 @@ /* Data for function powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.h b/sysdeps/x86_64/fpu/svml_s_powf_data.h index 514004238a..5d3270cf27 100644 --- a/sysdeps/x86_64/fpu/svml_s_powf_data.h +++ b/sysdeps/x86_64/fpu/svml_s_powf_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for function powf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S index 5cbf10b8da..40eb974a74 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,6 +20,270 @@ #include "svml_s_wrapper_impl.h" .text +ENTRY (_ZGVeN16vl4l4_sincosf) +WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf +END (_ZGVeN16vl4l4_sincosf) + +/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX512_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $448, %rsp + vmovups %zmm0, 384(%rsp) + lea (%rsp), %rdi + vmovups %zmm1, 128(%rdi) + vmovups %zmm2, 192(%rdi) + vmovups %zmm3, 256(%rdi) + vmovups %zmm4, 320(%rdi) + lea 64(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + vmovdqu 416(%rsp), %ymm0 + lea 32(%rsp), %rdi + lea 96(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 128(%rsp), %rdx + movq 136(%rsp), %rsi + movq 144(%rsp), %r8 + movq 152(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 160(%rsp), %rax + movq 168(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 176(%rsp), %rdi + movq 184(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 192(%rsp), %r11 + movq 200(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 208(%rsp), %rsi + movq 216(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 224(%rsp), %r10 + movq 232(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 240(%rsp), %rcx + movq 248(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movq 256(%rsp), %r9 + movq 264(%rsp), %r11 + movl %edx, (%rcx) + movl %esi, (%rdi) + movq 272(%rsp), %rdx + movq 280(%rsp), %rsi + movl 64(%rsp), %r8d + movl 68(%rsp), %r10d + movl 72(%rsp), %eax + movl 76(%rsp), %ecx + movl %r8d, (%r9) + movl %r10d, (%r11) + movq 288(%rsp), %r8 + movq 296(%rsp), %r10 + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 304(%rsp), %rax + movq 312(%rsp), %rcx + movl 80(%rsp), %edi + movl 84(%rsp), %r9d + movl 88(%rsp), %r11d + movl 92(%rsp), %edx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 320(%rsp), %rdi + movq 328(%rsp), %r9 + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 336(%rsp), %r11 + movq 344(%rsp), %rdx + movl 96(%rsp), %esi + movl 100(%rsp), %r8d + movl 104(%rsp), %r10d + movl 108(%rsp), %eax + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 352(%rsp), %rsi + movq 360(%rsp), %r8 + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 368(%rsp), %r10 + movq 376(%rsp), %rax + movl 112(%rsp), %ecx + movl 116(%rsp), %edi + movl 120(%rsp), %r9d + movl 124(%rsp), %r11d + movl %ecx, (%rsi) + movl %edi, (%r8) + movl %r9d, (%r10) + movl %r11d, (%rax) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-64, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -112(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -176(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $344, %esp + vmovdqa64 %zmm1, -240(%ebp) + vmovdqa64 %zmm2, -304(%ebp) + vmovaps %zmm0, -368(%ebp) + call HIDDEN_JUMPTARGET(\callee) + leal 32(%r12), %esi + vmovups -336(%ebp), %ymm0 + leal 32(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movl -240(%ebp), %eax + vmovss -176(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -236(%ebp), %eax + vmovss -172(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -232(%ebp), %eax + vmovss -168(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -228(%ebp), %eax + vmovss -164(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -224(%ebp), %eax + vmovss -160(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -220(%ebp), %eax + vmovss -156(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -216(%ebp), %eax + vmovss -152(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -212(%ebp), %eax + vmovss -148(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -208(%ebp), %eax + vmovss -144(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -204(%ebp), %eax + vmovss -140(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -200(%ebp), %eax + vmovss -136(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -196(%ebp), %eax + vmovss -132(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -192(%ebp), %eax + vmovss -128(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -188(%ebp), %eax + vmovss -124(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -184(%ebp), %eax + vmovss -120(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -180(%ebp), %eax + vmovss -116(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -304(%ebp), %eax + vmovss -112(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -300(%ebp), %eax + vmovss -108(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -296(%ebp), %eax + vmovss -104(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -292(%ebp), %eax + vmovss -100(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -288(%ebp), %eax + vmovss -96(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -284(%ebp), %eax + vmovss -92(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -280(%ebp), %eax + vmovss -88(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -276(%ebp), %eax + vmovss -84(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -272(%ebp), %eax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -268(%ebp), %eax + vmovss -76(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -264(%ebp), %eax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -260(%ebp), %eax + vmovss -68(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -256(%ebp), %eax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -252(%ebp), %eax + vmovss -60(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -248(%ebp), %eax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -244(%ebp), %eax + vmovss -52(%ebp), %xmm0 + vmovss %xmm0, (%eax) + addl $344, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + ENTRY (_ZGVeN16vvv_sincosf) -WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf +WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf END (_ZGVeN16vvv_sincosf) diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S index 1a7d2733af..5daa5118d6 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,13 +16,135 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ - #include <sysdep.h> #include "svml_s_wrapper_impl.h" .text -ENTRY (_ZGVbN4vvv_sincosf) +ENTRY (_ZGVbN4vl4l4_sincosf) WRAPPER_IMPL_SSE2_fFF sincosf +END (_ZGVbN4vl4l4_sincosf) +libmvec_hidden_def (_ZGVbN4vl4l4_sincosf) + +/* SSE2 ISA version as wrapper to scalar (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_SSE2_fFF_vvv callee +#ifndef __ILP32__ + subq $120, %rsp + cfi_adjust_cfa_offset(120) + movaps %xmm0, 96(%rsp) + lea (%rsp), %rdi + movdqa %xmm1, 32(%rdi) + lea 16(%rsp), %rsi + movdqa %xmm2, 32(%rsi) + movdqa %xmm3, 48(%rsi) + movdqa %xmm4, 64(%rsi) + call JUMPTARGET(\callee) + movss 100(%rsp), %xmm0 + lea 4(%rsp), %rdi + lea 20(%rsp), %rsi + call JUMPTARGET(\callee) + movss 104(%rsp), %xmm0 + lea 8(%rsp), %rdi + lea 24(%rsp), %rsi + call JUMPTARGET(\callee) + movss 108(%rsp), %xmm0 + lea 12(%rsp), %rdi + lea 28(%rsp), %rsi + call JUMPTARGET(\callee) + movq 32(%rsp), %rdx + movq 40(%rsp), %rsi + movq 48(%rsp), %r8 + movq 56(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 64(%rsp), %rax + movq 72(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 80(%rsp), %rdi + movq 88(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movl %r8d, (%r9) + addq $120, %rsp + cfi_adjust_cfa_offset(-120) + ret +#else + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + pushq %rbx + .cfi_def_cfa_offset 24 + .cfi_offset 3, -24 + subl $88, %esp + .cfi_def_cfa_offset 112 + leal 64(%rsp), %esi + movaps %xmm1, (%esp) + leal 48(%rsp), %edi + movaps %xmm2, 16(%esp) + movq %rsi, %rbp + movq %rdi, %rbx + movaps %xmm0, 32(%esp) + call JUMPTARGET(\callee) + movups 36(%esp), %xmm0 + leal 4(%rbp), %esi + leal 4(%rbx), %edi + call JUMPTARGET(\callee) + movups 40(%esp), %xmm0 + leal 8(%rbp), %esi + leal 8(%rbx), %edi + call JUMPTARGET(\callee) + movups 44(%esp), %xmm0 + leal 12(%rbp), %esi + leal 12(%rbx), %edi + call JUMPTARGET(\callee) + movq (%esp), %rax + movss 48(%esp), %xmm0 + movdqa (%esp), %xmm4 + movdqa 16(%esp), %xmm7 + movss %xmm0, (%eax) + movss 52(%esp), %xmm0 + pextrd $1, %xmm4, %eax + movss %xmm0, (%eax) + movq 8(%esp), %rax + movss 56(%esp), %xmm0 + movss %xmm0, (%eax) + movss 60(%esp), %xmm0 + pextrd $3, %xmm4, %eax + movss %xmm0, (%eax) + movq 16(%esp), %rax + movss 64(%esp), %xmm0 + movss %xmm0, (%eax) + movss 68(%esp), %xmm0 + pextrd $1, %xmm7, %eax + movss %xmm0, (%eax) + movq 24(%esp), %rax + movss 72(%esp), %xmm0 + movss %xmm0, (%eax) + movss 76(%esp), %xmm0 + pextrd $3, %xmm7, %eax + movss %xmm0, (%eax) + addl $88, %esp + .cfi_def_cfa_offset 24 + popq %rbx + .cfi_def_cfa_offset 16 + popq %rbp + .cfi_def_cfa_offset 8 + ret +#endif +.endm + +ENTRY (_ZGVbN4vvv_sincosf) +WRAPPER_IMPL_SSE2_fFF_vvv sincosf END (_ZGVbN4vvv_sincosf) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S index 74d1dfd1a8..d6d4600d10 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,8 +20,179 @@ #include "svml_s_wrapper_impl.h" .text +ENTRY (_ZGVdN8vl4l4_sincosf) +WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf +END (_ZGVdN8vl4l4_sincosf) +libmvec_hidden_def (_ZGVdN8vl4l4_sincosf) + +/* AVX2 ISA version as wrapper to SSE ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX2_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-32, %rsp + subq $224, %rsp + vmovups %ymm0, 192(%rsp) + lea (%rsp), %rdi + vmovdqu %ymm1, 64(%rdi) + vmovdqu %ymm2, 96(%rdi) + vmovdqu %ymm3, 128(%rdi) + vmovdqu %ymm4, 160(%rdi) + lea 32(%rsp), %rsi + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + vmovups 208(%rsp), %xmm0 + lea 16(%rsp), %rdi + lea 48(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 64(%rsp), %rdx + movq 72(%rsp), %rsi + movq 80(%rsp), %r8 + movq 88(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 96(%rsp), %rax + movq 104(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 112(%rsp), %rdi + movq 120(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 128(%rsp), %r11 + movq 136(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 144(%rsp), %rsi + movq 152(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 160(%rsp), %r10 + movq 168(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 176(%rsp), %rcx + movq 184(%rsp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -80(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -112(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $184, %esp + vmovdqa %ymm1, -144(%ebp) + vmovdqa %ymm2, -176(%ebp) + vmovaps %ymm0, -208(%ebp) + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + leal 16(%r12), %esi + vmovups -192(%ebp), %xmm0 + leal 16(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movl -144(%ebp), %eax + vmovss -112(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -140(%ebp), %eax + vmovss -108(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -136(%ebp), %eax + vmovss -104(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -132(%ebp), %eax + vmovss -100(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -128(%ebp), %eax + vmovss -96(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -124(%ebp), %eax + vmovss -92(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -120(%ebp), %eax + vmovss -88(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -116(%ebp), %eax + vmovss -84(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -176(%ebp), %eax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -172(%ebp), %eax + vmovss -76(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -168(%ebp), %eax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -164(%ebp), %eax + vmovss -68(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -160(%ebp), %eax + vmovss -64(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -156(%ebp), %eax + vmovss -60(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -152(%ebp), %eax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + movl -148(%ebp), %eax + vmovss -52(%ebp), %xmm0 + vmovss %xmm0, (%eax) + addl $184, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + ENTRY (_ZGVdN8vvv_sincosf) -WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf +WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf END (_ZGVdN8vvv_sincosf) #ifndef USE_MULTIARCH diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S index 55b8b2d768..585e6d87c4 100644 --- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S @@ -1,5 +1,5 @@ /* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,6 +20,179 @@ #include "svml_s_wrapper_impl.h" .text -ENTRY(_ZGVcN8vvv_sincosf) -WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf -END(_ZGVcN8vvv_sincosf) +ENTRY (_ZGVcN8vl4l4_sincosf) +WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf +END (_ZGVcN8vl4l4_sincosf) + +/* AVX ISA version as wrapper to SSE ISA version (for vector + function declared with #pragma omp declare simd notinbranch). */ +.macro WRAPPER_IMPL_AVX_fFF_vvv callee +#ifndef __ILP32__ + pushq %rbp + movq %rsp, %rbp + andq $-32, %rsp + subq $224, %rsp + vmovups %ymm0, 64(%rsp) + lea (%rsp), %rdi + vmovdqu %xmm1, 96(%rdi) + vmovdqu %xmm2, 112(%rdi) + vmovdqu %xmm3, 128(%rdi) + vmovdqu %xmm4, 144(%rdi) + vmovdqu %xmm5, 160(%rdi) + lea 32(%rsp), %rsi + vmovdqu %xmm6, 144(%rsi) + vmovdqu %xmm7, 160(%rsi) + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + vmovdqu 80(%rsp), %xmm0 + lea 16(%rsp), %rdi + lea 48(%rsp), %rsi + call HIDDEN_JUMPTARGET(\callee) + movq 96(%rsp), %rdx + movq 104(%rsp), %rsi + movq 112(%rsp), %r8 + movq 120(%rsp), %r10 + movl (%rsp), %eax + movl 4(%rsp), %ecx + movl 8(%rsp), %edi + movl 12(%rsp), %r9d + movl %eax, (%rdx) + movl %ecx, (%rsi) + movq 128(%rsp), %rax + movq 136(%rsp), %rcx + movl %edi, (%r8) + movl %r9d, (%r10) + movq 144(%rsp), %rdi + movq 152(%rsp), %r9 + movl 16(%rsp), %r11d + movl 20(%rsp), %edx + movl 24(%rsp), %esi + movl 28(%rsp), %r8d + movl %r11d, (%rax) + movl %edx, (%rcx) + movq 160(%rsp), %r11 + movq 168(%rsp), %rdx + movl %esi, (%rdi) + movl %r8d, (%r9) + movq 176(%rsp), %rsi + movq 184(%rsp), %r8 + movl 32(%rsp), %r10d + movl 36(%rsp), %eax + movl 40(%rsp), %ecx + movl 44(%rsp), %edi + movl %r10d, (%r11) + movl %eax, (%rdx) + movq 192(%rsp), %r10 + movq 200(%rsp), %rax + movl %ecx, (%rsi) + movl %edi, (%r8) + movq 16(%rbp), %rcx + movq 24(%rbp), %rdi + movl 48(%rsp), %r9d + movl 52(%rsp), %r11d + movl 56(%rsp), %edx + movl 60(%rsp), %esi + movl %r9d, (%r10) + movl %r11d, (%rax) + movl %edx, (%rcx) + movl %esi, (%rdi) + movq %rbp, %rsp + popq %rbp + ret +#else + leal 8(%rsp), %r10d + .cfi_def_cfa 10, 0 + andl $-32, %esp + pushq -8(%r10d) + pushq %rbp + .cfi_escape 0x10,0x6,0x2,0x76,0 + movl %esp, %ebp + pushq %r12 + leal -80(%rbp), %esi + pushq %r10 + .cfi_escape 0xf,0x3,0x76,0x70,0x6 + .cfi_escape 0x10,0xc,0x2,0x76,0x78 + leal -112(%rbp), %edi + movq %rsi, %r12 + pushq %rbx + .cfi_escape 0x10,0x3,0x2,0x76,0x68 + movq %rdi, %rbx + subl $184, %esp + vmovaps %xmm1, -128(%ebp) + vmovaps %xmm2, -144(%ebp) + vmovaps %xmm3, -160(%ebp) + vmovaps %xmm4, -176(%ebp) + vmovaps %ymm0, -208(%ebp) + vzeroupper + call HIDDEN_JUMPTARGET(\callee) + leal 16(%r12), %esi + vmovups -192(%ebp), %xmm0 + leal 16(%rbx), %edi + call HIDDEN_JUMPTARGET(\callee) + movq -128(%ebp), %rax + vmovss -112(%ebp), %xmm0 + vmovdqa -128(%ebp), %xmm7 + vmovdqa -144(%ebp), %xmm3 + vmovss %xmm0, (%eax) + vmovss -108(%ebp), %xmm0 + vpextrd $1, %xmm7, %eax + vmovss %xmm0, (%eax) + movq -120(%ebp), %rax + vmovss -104(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -100(%ebp), %xmm0 + vpextrd $3, %xmm7, %eax + vmovdqa -160(%ebp), %xmm7 + vmovss %xmm0, (%eax) + movq -144(%ebp), %rax + vmovss -96(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -92(%ebp), %xmm0 + vpextrd $1, %xmm3, %eax + vmovss %xmm0, (%eax) + movq -136(%ebp), %rax + vmovss -88(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -84(%ebp), %xmm0 + vpextrd $3, %xmm3, %eax + vmovss %xmm0, (%eax) + movq -160(%ebp), %rax + vmovss -80(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -76(%ebp), %xmm0 + vpextrd $1, %xmm7, %eax + vmovss %xmm0, (%eax) + movq -152(%ebp), %rax + vmovss -72(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -68(%ebp), %xmm0 + vpextrd $3, %xmm7, %eax + vmovss %xmm0, (%eax) + movq -176(%ebp), %rax + vmovss -64(%ebp), %xmm0 + vmovdqa -176(%ebp), %xmm3 + vmovss %xmm0, (%eax) + vmovss -60(%ebp), %xmm0 + vpextrd $1, %xmm3, %eax + vmovss %xmm0, (%eax) + movq -168(%ebp), %rax + vmovss -56(%ebp), %xmm0 + vmovss %xmm0, (%eax) + vmovss -52(%ebp), %xmm0 + vpextrd $3, %xmm3, %eax + vmovss %xmm0, (%eax) + addl $184, %esp + popq %rbx + popq %r10 + .cfi_def_cfa 10, 0 + popq %r12 + popq %rbp + leal -8(%r10), %esp + .cfi_def_cfa 7, 8 + ret +#endif +.endm + +ENTRY (_ZGVcN8vvv_sincosf) +WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf +END (_ZGVcN8vvv_sincosf) diff --git a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S index d7a31e1ea6..8c5547e26f 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX-512. Wrapper to AVX2 version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S index 6f10137134..d56137b32a 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with SSE2. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S index c459658688..e39392243e 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S @@ -1,5 +1,5 @@ /* Function sinf vectorized with AVX2, wrapper version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S index 5e95aa2e02..9984e6f9f7 100644 --- a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S +++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S @@ -1,5 +1,5 @@ /* Function sinf vectorized in AVX ISA as wrapper to SSE4 ISA version. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.S b/sysdeps/x86_64/fpu/svml_s_trig_data.S index b61aa6abb9..8f1e1f60b8 100644 --- a/sysdeps/x86_64/fpu/svml_s_trig_data.S +++ b/sysdeps/x86_64/fpu/svml_s_trig_data.S @@ -1,5 +1,5 @@ /* Data for function cosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.h b/sysdeps/x86_64/fpu/svml_s_trig_data.h index 2e469a918a..0faf161c08 100644 --- a/sysdeps/x86_64/fpu/svml_s_trig_data.h +++ b/sysdeps/x86_64/fpu/svml_s_trig_data.h @@ -1,5 +1,5 @@ /* Offsets for data table for vectorized sinf, cosf, sincosf. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h index b1a03be3d9..937afb5cbc 100644 --- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h +++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h @@ -1,5 +1,5 @@ /* Wrapper implementations of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,16 +21,16 @@ subq $40, %rsp cfi_adjust_cfa_offset(40) movaps %xmm0, (%rsp) - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 16(%rsp) movss 4(%rsp), %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 20(%rsp) movss 8(%rsp), %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 24(%rsp) movss 12(%rsp), %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movss 16(%rsp), %xmm3 movss 20(%rsp), %xmm2 movss 24(%rsp), %xmm1 @@ -50,19 +50,19 @@ cfi_adjust_cfa_offset(56) movaps %xmm0, (%rsp) movaps %xmm1, 16(%rsp) - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 32(%rsp) movss 4(%rsp), %xmm0 movss 20(%rsp), %xmm1 - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 36(%rsp) movss 8(%rsp), %xmm0 movss 24(%rsp), %xmm1 - call \callee@PLT + call JUMPTARGET(\callee) movss %xmm0, 40(%rsp) movss 12(%rsp), %xmm0 movss 28(%rsp), %xmm1 - call \callee@PLT + call JUMPTARGET(\callee) movss 32(%rsp), %xmm3 movss 36(%rsp), %xmm2 movss 40(%rsp), %xmm1 @@ -91,7 +91,7 @@ leaq 24(%rsp), %rsi leaq 28(%rsp), %rdi movaps %xmm0, (%rsp) - call \callee@PLT + call JUMPTARGET(\callee) leaq 24(%rsp), %rsi leaq 28(%rsp), %rdi movss 28(%rsp), %xmm0 @@ -101,7 +101,7 @@ movss %xmm0, (%rbx) movaps %xmm1, %xmm0 shufps $85, %xmm1, %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movss 28(%rsp), %xmm0 leaq 24(%rsp), %rsi movss %xmm0, 4(%rbp) @@ -111,7 +111,7 @@ movss %xmm0, 4(%rbx) movaps %xmm1, %xmm0 unpckhps %xmm1, %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movaps (%rsp), %xmm1 leaq 24(%rsp), %rsi leaq 28(%rsp), %rdi @@ -121,7 +121,7 @@ movss 24(%rsp), %xmm0 movss %xmm0, 8(%rbx) movaps %xmm1, %xmm0 - call \callee@PLT + call JUMPTARGET(\callee) movss 28(%rsp), %xmm0 movss %xmm0, 12(%rbp) movss 24(%rsp), %xmm0 @@ -246,29 +246,14 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $128, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 + vmovups %zmm0, (%rsp) vmovupd (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 64(%rsp) vmovupd 32(%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) vmovupd %ymm0, 96(%rsp) -/* Below is encoding for vmovups 64(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x01 + vmovups 64(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -286,23 +271,8 @@ cfi_def_cfa_register (%rbp) andq $-64, %rsp subq $192, %rsp -/* Below is encoding for vmovups %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x04 - .byte 0x24 -/* Below is encoding for vmovups %zmm1, 64(%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x11 - .byte 0x4c - .byte 0x24 - .byte 0x01 + vmovups %zmm0, (%rsp) + vmovups %zmm1, 64(%rsp) vmovups (%rsp), %ymm0 vmovups 64(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) @@ -311,15 +281,7 @@ vmovups 96(%rsp), %ymm1 call HIDDEN_JUMPTARGET(\callee) vmovups %ymm0, 160(%rsp) -/* Below is encoding for vmovups 128(%rsp), %zmm0. */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x10 - .byte 0x44 - .byte 0x24 - .byte 0x02 + vmovups 128(%rsp), %zmm0 movq %rbp, %rsp cfi_def_cfa_register (%rsp) popq %rbp @@ -340,14 +302,7 @@ pushq %r13 subq $176, %rsp movq %rsi, %r13 -/* Below is encoding for vmovaps %zmm0, (%rsp). */ - .byte 0x62 - .byte 0xf1 - .byte 0x7c - .byte 0x48 - .byte 0x29 - .byte 0x04 - .byte 0x24 + vmovaps %zmm0, (%rsp) movq %rdi, %r12 vmovaps (%rsp), %ymm0 call HIDDEN_JUMPTARGET(\callee) diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c new file mode 100644 index 0000000000..514883dcf9 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c new file mode 100644 index 0000000000..514883dcf9 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c new file mode 100644 index 0000000000..514883dcf9 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c new file mode 100644 index 0000000000..43914ef0e7 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c new file mode 100644 index 0000000000..6f2e588021 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c @@ -0,0 +1,25 @@ +/* Part of test to build shared library to ensure link against + *_finite aliases from libmvec. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <stdlib.h> +#include <math-tests-arch.h> + +#include "test-double.h" +#include "test-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c new file mode 100644 index 0000000000..d38b49d6c8 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c @@ -0,0 +1,29 @@ +/* Part of test to ensure link against *_finite aliases from libmvec. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern int +test_finite_alias (void); + +static int +do_test (void) +{ + return test_finite_alias (); +} + +#define TEST_FUNCTION do_test () +#include "../../../test-skeleton.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c new file mode 100644 index 0000000000..fc2ffea314 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos-main.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c new file mode 100644 index 0000000000..896f1bcbaf --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c new file mode 100644 index 0000000000..fc2ffea314 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos-main.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c new file mode 100644 index 0000000000..896f1bcbaf --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c new file mode 100644 index 0000000000..fc2ffea314 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos-main.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c new file mode 100644 index 0000000000..896f1bcbaf --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c @@ -0,0 +1 @@ +#include "test-double-libmvec-sincos.c" diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c new file mode 100644 index 0000000000..2e52fddf5d --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c @@ -0,0 +1,43 @@ +/* Test for vector sincos ABI. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define N 1000 +double x[N], s[N], c[N]; +double* s_ptrs[N]; +double* c_ptrs[N]; + +int +test_sincos_abi (void) +{ + int i; + + for(i = 0; i < N; i++) + { + x[i] = i / 3; + s_ptrs[i] = &s[i]; + c_ptrs[i] = &c[i]; + } + +#pragma omp simd + for(i = 0; i < N; i++) + sincos (x[i], s_ptrs[i], c_ptrs[i]); + + return 0; +} diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c new file mode 100644 index 0000000000..cffaa73135 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c @@ -0,0 +1,44 @@ +/* Test for vector sincos ABI. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math-tests-arch.h> + +extern int test_sincos_abi (void); + +int arch_check = 1; + +static void +check_arch (void) +{ + CHECK_ARCH_EXT; + arch_check = 0; +} + +static int +do_test (void) +{ + check_arch (); + + if (arch_check) + return 77; + + return test_sincos_abi (); +} + +#define TEST_FUNCTION do_test () +#include "../../../test-skeleton.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c index a9d15979aa..4ff1439f9c 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,17 @@ <http://www.gnu.org/licenses/>. */ #include "test-double-vlen2.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m128d VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow) + +#define VEC_INT_TYPE __m128i + +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos) diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c index eb6a531502..c7bdad517b 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include "test-double-vlen4.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #undef VEC_SUFF @@ -26,7 +27,14 @@ VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow) + +#ifndef __ILP32__ +# define VEC_INT_TYPE __m256i +#else +# define VEC_INT_TYPE __m128i +#endif + +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos) diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h index 0cadef03d6..4b196e66fc 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h @@ -1,5 +1,5 @@ /* Tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,18 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-double-vlen4.h" +#include <test-double-vlen4.h> #undef VEC_SUFF #define VEC_SUFF _vlen4_avx2 -#define TEST_VECTOR_cos 1 -#define TEST_VECTOR_sin 1 -#define TEST_VECTOR_sincos 1 -#define TEST_VECTOR_log 1 -#define TEST_VECTOR_exp 1 -#define TEST_VECTOR_pow 1 - +#undef REQUIRE_AVX #define REQUIRE_AVX2 - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c index 52b81da3ee..2bb0085700 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,21 @@ <http://www.gnu.org/licenses/>. */ #include "test-double-vlen4.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m256d VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow) + +#define VEC_INT_TYPE __m128i + +#ifndef __ILP32__ +VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos) +#else +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos) +#endif diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.h index 9ae97f1388..316340cb59 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4.h @@ -1,5 +1,5 @@ /* Tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,6 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-double-vlen4.h" - -#define TEST_VECTOR_cos 1 -#define TEST_VECTOR_sin 1 -#define TEST_VECTOR_sincos 1 -#define TEST_VECTOR_log 1 -#define TEST_VECTOR_exp 1 -#define TEST_VECTOR_pow 1 +#include_next <test-double-vlen4.h> #define REQUIRE_AVX - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c index c10bb9cb4a..ea179284ed 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX-512 versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,21 @@ <http://www.gnu.org/licenses/>. */ #include "test-double-vlen8.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m512d VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos) VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log) VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp) VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow) + +#ifndef __ILP32__ +# define VEC_INT_TYPE __m512i +#else +# define VEC_INT_TYPE __m256i +#endif + +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos) diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.h index 4fb6c8d196..41d188081e 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8.h @@ -1,5 +1,5 @@ /* Tests for AVX-512 versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,6 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-double-vlen8.h" - -#define TEST_VECTOR_cos 1 -#define TEST_VECTOR_sin 1 -#define TEST_VECTOR_sincos 1 -#define TEST_VECTOR_log 1 -#define TEST_VECTOR_exp 1 -#define TEST_VECTOR_pow 1 +#include_next <test-double-vlen8.h> #define REQUIRE_AVX512F - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c new file mode 100644 index 0000000000..7fc3d8aedd --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c new file mode 100644 index 0000000000..7fc3d8aedd --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c new file mode 100644 index 0000000000..7fc3d8aedd --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c new file mode 100644 index 0000000000..f3691cc8e6 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-alias.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c new file mode 100644 index 0000000000..5e6a587a94 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c @@ -0,0 +1,25 @@ +/* Part of test to build shared library to ensure link against + *_finite aliases from libmvec. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include <stdlib.h> +#include <math-tests-arch.h> + +#include "test-float.h" +#include "test-libmvec-alias-mod.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c new file mode 100644 index 0000000000..d38b49d6c8 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c @@ -0,0 +1,29 @@ +/* Part of test to ensure link against *_finite aliases from libmvec. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +extern int +test_finite_alias (void); + +static int +do_test (void) +{ + return test_finite_alias (); +} + +#define TEST_FUNCTION do_test () +#include "../../../test-skeleton.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c new file mode 100644 index 0000000000..558e2ac649 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf-main.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c new file mode 100644 index 0000000000..5b45f0a055 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c new file mode 100644 index 0000000000..558e2ac649 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf-main.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c new file mode 100644 index 0000000000..5b45f0a055 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c new file mode 100644 index 0000000000..558e2ac649 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf-main.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c new file mode 100644 index 0000000000..5b45f0a055 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c @@ -0,0 +1 @@ +#include "test-float-libmvec-sincosf.c" diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c new file mode 100644 index 0000000000..ce1dd1a8a4 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c @@ -0,0 +1,42 @@ +/* Test for vector sincosf ABI. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define N 1000 +float x[N], s[N], c[N]; +float *s_ptrs[N]; +float *c_ptrs[N]; + +int +test_sincosf_abi (void) +{ + int i; + for(i = 0; i < N; i++) + { + x[i] = i / 3; + s_ptrs[i] = &s[i]; + c_ptrs[i] = &c[i]; + } + +#pragma omp simd + for(i = 0; i < N; i++) + sincosf (x[i], s_ptrs[i], c_ptrs[i]); + + return 0; +} diff --git a/sysdeps/x86_64/fpu/s_fdiml.S b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c index f9f1e20259..a56d9680a0 100644 --- a/sysdeps/x86_64/fpu/s_fdiml.S +++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c @@ -1,7 +1,6 @@ -/* Compute positive difference. - Copyright (C) 1997-2016 Free Software Foundation, Inc. +/* Test for vector sincosf ABI. + Copyright (C) 2016-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -17,27 +16,29 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> +#include <math-tests-arch.h> - .text -ENTRY(__fdiml) - fldt 8(%rsp) // x - fldt 24(%rsp) // x : y +extern int test_sincosf_abi (void); - fucomi %st(1), %st - jp 1f +int arch_check = 1; - jc 3f - fstp %st(1) - fldz - jmp 2f +static void +check_arch (void) +{ + CHECK_ARCH_EXT; + arch_check = 0; +} -3: fsubrp %st, %st(1) - ret +static int +do_test (void) +{ + check_arch (); -1: fucomi %st(0), %st - fcmovnu %st(1), %st -2: fstp %st(1) - ret -END(__fdiml) -weak_alias (__fdiml, fdiml) + if (arch_check) + return 77; + + return test_sincosf_abi (); +} + +#define TEST_FUNCTION do_test () +#include "../../../test-skeleton.c" diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c index dc09e4a338..d2a81ecf53 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX-512 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,21 @@ <http://www.gnu.org/licenses/>. */ #include "test-float-vlen16.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m512 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVeN16v_cosf) VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVeN16v_sinf) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf) + +#define VEC_INT_TYPE __m512i + +#ifndef __ILP32__ +VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf) +#else +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf) +#endif diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.h index 882bfc840d..ffe27866b5 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen16.c +++ b/sysdeps/x86_64/fpu/test-float-vlen16.h @@ -1,5 +1,5 @@ /* Tests for AVX-512 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,6 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-float-vlen16.h" - -#define TEST_VECTOR_cosf 1 -#define TEST_VECTOR_sinf 1 -#define TEST_VECTOR_sincosf 1 -#define TEST_VECTOR_logf 1 -#define TEST_VECTOR_expf 1 -#define TEST_VECTOR_powf 1 +#include_next <test-float-vlen16.h> #define REQUIRE_AVX512F - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c index 0bb9818146..afa7da26f6 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for SSE ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,21 @@ <http://www.gnu.org/licenses/>. */ #include "test-float-vlen4.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m128 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf) VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf) + +#define VEC_INT_TYPE __m128i + +#ifndef __ILP32__ +VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf) +#else +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf) +#endif diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c index 4985ac2379..d7e79a3f37 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +17,7 @@ <http://www.gnu.org/licenses/>. */ #include "test-float-vlen8.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #undef VEC_SUFF @@ -26,7 +27,17 @@ VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVdN8v_cosf) VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVdN8v_sinf) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf) + +/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf. */ +#undef VECTOR_WRAPPER_fFF + +#define VEC_INT_TYPE __m256i + +#ifndef __ILP32__ +VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf) +#else +VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf) +#endif diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h index 7a416385b6..c468dd6e69 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h @@ -1,5 +1,5 @@ /* Tests for AVX2 ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,18 +16,10 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-float-vlen8.h" +#include <test-float-vlen8.h> #undef VEC_SUFF #define VEC_SUFF _vlen8_avx2 -#define TEST_VECTOR_cosf 1 -#define TEST_VECTOR_sinf 1 -#define TEST_VECTOR_sincosf 1 -#define TEST_VECTOR_logf 1 -#define TEST_VECTOR_expf 1 -#define TEST_VECTOR_powf 1 - +#undef REQUIRE_AVX #define REQUIRE_AVX2 - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c index 9cc2883399..6f7869ba3d 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c @@ -1,5 +1,5 @@ /* Wrapper part of tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,13 +17,21 @@ <http://www.gnu.org/licenses/>. */ #include "test-float-vlen8.h" +#include "test-math-vector-sincos.h" #include <immintrin.h> #define VEC_TYPE __m256 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVcN8v_cosf) VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVcN8v_sinf) -VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf) VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf) VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf) VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf) + +#define VEC_INT_TYPE __m128i + +#ifndef __ILP32__ +VECTOR_WRAPPER_fFF_4 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf) +#else +VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf) +#endif diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.h index c92a50ae7e..153820ecc2 100644 --- a/sysdeps/x86_64/fpu/test-float-vlen8.c +++ b/sysdeps/x86_64/fpu/test-float-vlen8.h @@ -1,5 +1,5 @@ /* Tests for AVX ISA versions of vector math functions. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,15 +16,6 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include "test-float-vlen8.h" - -#define TEST_VECTOR_cosf 1 -#define TEST_VECTOR_sinf 1 -#define TEST_VECTOR_sincosf 1 -#define TEST_VECTOR_logf 1 -#define TEST_VECTOR_expf 1 -#define TEST_VECTOR_powf 1 +#include_next <test-float-vlen8.h> #define REQUIRE_AVX - -#include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c new file mode 100644 index 0000000000..6d70844147 --- /dev/null +++ b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c @@ -0,0 +1,66 @@ +/* Part of test to build shared library to ensure link against + *_finite aliases from libmvec. + Copyright (C) 2016-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define N 4000 +FLOAT log_arg[N]; +FLOAT exp_arg[N]; +FLOAT log_res[N]; +FLOAT exp_res[N]; +FLOAT pow_res[N]; +int arch_check = 1; + +static void +init_arg (void) +{ + int i; + + CHECK_ARCH_EXT; + + arch_check = 0; + + for (i = 0; i < N; i += 1) + { + log_arg[i] = 1.0; + exp_arg[i] = 0.0; + } +} + +int +test_finite_alias (void) +{ + int i; + + init_arg (); + + if (arch_check) return 77; + +#pragma omp simd + for (i = 0; i < N; i += 1) + { + log_res[i] = FUNC (log) (log_arg[i]); + exp_res[i] = FUNC (exp) (exp_arg[i]); + pow_res[i] = FUNC (pow) (log_arg[i], log_arg[i]); + } + + if (log_res[0] != 0.0) return 1; + if (exp_res[0] != 1.0) return 1; + if (pow_res[0] != 1.0) return 1; + + return 0; +} diff --git a/sysdeps/x86_64/fpu/x86_64-math-asm.h b/sysdeps/x86_64/fpu/x86_64-math-asm.h index db3f9f78b0..597b967b7b 100644 --- a/sysdeps/x86_64/fpu/x86_64-math-asm.h +++ b/sysdeps/x86_64/fpu/x86_64-math-asm.h @@ -1,5 +1,5 @@ /* Helper macros for x86_64 libm functions. - Copyright (C) 2015-2016 Free Software Foundation, Inc. + Copyright (C) 2015-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or |